2 # 巻別に標点を付けるためのファイルを生成する
5 # 21 July 11: part_wordnum, volume_wordnumの追加
8 require 'rexml/parsers/sax2parser'
9 require 'rexml/sax2listener'
11 SOURCE_DIR = File.join(File.dirname(__FILE__), '..', 'data')
12 TARGET_DIR = File.join(File.dirname(__FILE__), '..', 'yml', 'swjz')
13 SWJZ = File.new(File.join(SOURCE_DIR, 'swjz.xml'))
16 include REXML::SAX2Listener
19 @doc_count = 0 # yaml documents in a file
25 @in_chaptertitle = false
28 @in_part_wordnum = false
29 @in_volume_wordnum = false
40 def start_element(uri, localname, qname, attributes)
42 before_chaptertitle(qname, attributes)
44 before_wordhead(qname, attributes)
45 before_explanation(qname)
46 before_duan_note(qname)
47 before_part_wordnum(qname)
48 before_volume_wordnum(qname)
50 def end_element(uri, localname, qname)
52 after_chaptertitle(qname)
55 after_explanation(qname)
56 after_duan_note(qname)
57 after_part_wordnum(qname)
58 after_volume_wordnum(qname)
62 push_chaptertitle(text)
64 push_explanation(text)
69 printf("\nSW: %s\n", @sw_count)
70 printf("Word: %s\n", @word_count)
73 #--- Process elements ---
75 def before_chapter(qname)
76 if qname == 'chapter' then
81 def after_chapter(qname)
82 @in_chapter = false if qname == 'chapter'
85 def before_chaptertitle(qname, attributes)
86 if qname == 'chaptertitle' and @in_chapter then
87 @in_chaptertitle = true
88 chapter_id = attributes['id']
89 # printf("chapter_id: %s\n", chapter_id)
90 setup_outfile(chapter_id)
93 def after_chaptertitle(qname)
94 if qname == 'chaptertitle' and @in_chapter then
95 @out.printf("chapter: %s\n", @chaptertitle + @duan_note)
97 @in_chaptertitle = false
100 def push_chaptertitle(text)
101 @chaptertitle += text if @in_chaptertitle
104 def before_shuowen(qname)
105 if qname == 'shuowen'
109 @out.printf("\n--- #%i\ncontent:\n", @doc_count) # sequence of sw's
112 def after_shuowen(qname)
113 @in_shuowen = false if qname == 'shuowen'
116 def before_wordhead(qname, attributes)
117 if qname == 'wordhead' then
119 @wordhead_id = attributes['id']
120 @position = attributes['img']
123 def after_wordhead(qname)
124 if qname == 'wordhead' then
125 @out.printf(" - word: %s\n", @wordhead)
126 @out.printf(" position: %s\n", @position) if @position
128 @out.printf(" id: %s\n", @wordhead_id)
131 @out.printf(" content:\n")
139 def push_wordhead(text)
140 @wordhead += text if @in_wordhead
143 def before_explanation(qname)
144 @in_explanation = true if qname == 'explanation'
146 def after_explanation(qname)
147 if qname == 'explanation'
149 @out.printf(" - ex: %s\n", @explanation)
151 @in_explanation = false
155 def push_explanation(text)
156 @explanation += text if @in_explanation
159 def before_duan_note(qname)
160 if qname == 'duan_note' then
165 def after_duan_note(qname)
166 if qname == 'duan_note' then
168 @out.printf(" - dn: %s\n", @duan_note)
169 elsif @in_part_wordnum or @in_volume_wordnum then
170 @out.printf(" - dn: %s\n", @duan_note)
173 @in_duan_note = false
176 def push_duan_note(text)
177 @duan_note += text if @in_duan_note
180 def before_part_wordnum(qname)
181 if qname == 'part_wordnum' then
182 @in_part_wordnum = true
185 @out.printf("\n--- #%i\npart:\n", @doc_count) # sequence number
188 def after_part_wordnum(qname)
189 if qname == 'part_wordnum' then
190 @in_part_wordnum = false
195 def before_volume_wordnum(qname)
196 if qname == 'volume_wordnum' then
197 @in_volume_wordnum = true
200 @out.printf("\n--- #%i\nvolume:\n", @doc_count) # sequence number
203 def after_volume_wordnum(qname)
204 if qname == 'volume_wordnum' then
205 @in_volume_wordnum = false
210 #--- virtual element processing: wordnum ---
215 @out.printf(" - ex: %s\n", @wordnum) unless @wordnum.empty?
219 def push_wordnum(text)
220 @wordnum += text if @in_wordnum
224 def setup_outfile(chapter_id)
225 @out = File.open(File.join(TARGET_DIR, chapter_id + '.yml'), "w")
229 parser = REXML::Parsers::SAX2Parser.new SWJZ
230 listener = SwjzCollector.new
231 parser.listen(listener)