OSDN Git Service

change fig command to not use hash.
[otptools/otptools.git] / markupper.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3  
4 import sys
5 import os
6 import re
7 import codecs
8 import pickle
9
10 import HTMLTagFilter
11
12 sys.stdin = codecs.getreader('utf_8')(sys.stdin)
13 sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
14
15 alist = ["a", "a:href", "a:name", "b", "br" ]
16 dlist = ["*"]
17
18 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
19 path_to_index = "./_markup_index"
20
21 class _InputStream(object):
22     """InputStream base class."""
23     def __init__(self):
24         pass
25
26     def __iter__(self):
27         """return Iterator"""
28         return self
29
30     def next(self):
31         """function for iterator"""
32         pass
33
34
35 class Markupper(object):
36     """
37     """
38     def __init__(self):
39         self._input_iter = None
40         self._index_past = {}
41         self._index = {}
42
43     def index_add(self, key, val):
44         """
45         Add key and value to index.
46
47         @param key:
48         @type key:
49
50         @param val:
51         @key val:
52         """
53         self._index[key] = val
54
55     def index(self, key):
56         """
57         Get index
58         """
59         return self._index[key]
60
61     def index_haskey(self, key):
62         return self._index.has_key(key)
63
64     def markup(self, input_iter, release="0"):
65         """
66         Do markup.
67
68         @param input_iter: iterator to use as input
69         @type input_iter: iterator
70         """
71         self.input_iter = input_iter
72         self._page_counter = 1
73         # alist = ["a", "a:href", "a:name", "b", "br" ]
74         # dlist = ["*"]
75         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
76         self._release = release
77
78         self.index_add("figs", [])
79
80         anchor = ""
81         for line in self.input_iter:
82             line = self._default_markup_rule(line)
83             # head-of-line rules
84             if re.search(ur"^☆{{{$", line):
85                 self._inline(line)
86                 continue
87             elif re.search(ur"^☆comment\s{{{$", line):
88                 self._comment(line)
89                 continue
90             elif re.search(ur"^☆\*", line):
91                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
92                 continue
93             elif re.search(ur"^・", line):
94                 self._ulist(line)
95                 continue
96             elif re.search(ur"^☆begin-column:", line):
97                 self._begin_column(line)
98                 continue
99             elif re.search(ur"^☆end-column", line):
100                 self._end_column(line)
101                 continue
102             elif re.search(ur"^☆space", line):
103                 self._space(line)
104                 continue
105             elif re.search(ur"^☆call_tables", line):
106                 self._call_tables(line)
107                 continue
108             elif re.search(ur"^●", line):
109                 if anchor != "":
110                     line = re.sub(ur"^●(.*)$", ur'<h4 id="%s">\1</h4>' % anchor, line)
111                     anchor = ""
112                 else:
113                     line = re.sub(ur"^●(.*)$", ur"<h4>\1</h4>", line)
114                 print line
115                 continue
116             elif re.search(ur"^○", line):
117                 if anchor != "":
118                     line = re.sub(ur"^○(.*)$", ur'<b id="%s">\1</b>' % anchor, line)
119                     anchor = ""
120                 else:
121                     line = re.sub(ur"^○(.*)$", ur"<b>\1</b>", line)
122                 print line
123                 continue
124             elif re.search(ur"^☆----", line):
125                 line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
126                 print line
127                 continue
128             elif re.search(ur"^☆\+---", line):
129                 self._code(line)
130                 continue
131             elif re.search(ur"^☆表", line):
132                 self._table(line)
133                 continue
134             elif re.search(ur"^☆図", line):
135                 self._fig(line)
136                 continue
137             elif re.search(ur"^☆リスト", line):
138                 self._list(line)
139                 continue
140
141             if re.search(ur"^ ", line):
142                 line = "<p>" + line + "</p>"
143
144             if re.search(r"^\s*$", line):
145                 line = ""
146
147             print line
148
149         # end-of-loop
150
151
152     def load_index(self, path_to_index):
153         """
154         load index database.
155
156         @param path_to_index: index db's path
157         @type path_to_index: string
158         """
159         # load index
160         try:
161             index_file = open(path_to_index, "r")
162             self._index_past = pickle.load(index_file)
163             index_file.close()
164         except IOError:
165             sys.stderr.write("warn: cannot read index file,\n")
166
167     def save_index(self, path_to_index):
168         """
169         save index database.
170
171         @param path_to_index: index db's path
172         @type path_to_index: string
173         """
174         # save index
175         try:
176             index_file = open(path_to_index, "w")
177             pickle.dump(self._index, index_file)
178             index_file.close()
179         except IOError:
180             sys.stderr.write("warn: cannot write index file,\n")
181
182     def make_hashlist(self, path_to_hashfile):
183         """
184         create hash list.
185
186         @param path_to_hashfile: hashfile's path
187         @type path_to_hashfile: string
188         """
189         try:
190             file_img_hash = open(path_to_hashfile, "r")
191         except IOError:
192             sys.stderr.write("cannot open file: %s" % path_img_hash)
193             return None;
194
195         self.hashlist = {};
196         for line in file_img_hash:
197             splited = line.strip().split("\t", 2)
198             # hashlist's format: <hash> \t <filename>
199             self.hashlist[splited[1]] = splited[0]
200
201     def _call_tagles(self):
202         pass
203
204     def _default_markup_rule(self, line):
205         """
206         apply default markup rules.
207
208         @param line: string to apply markup
209         @type line: string
210         """
211         # line = re.sub(ur"&", ur"&amp", line)
212         # line = re.sub(ur"<", ur"&lt;", line)
213         # line = re.sub(ur">", ur"&gt;", line)
214
215         # apply filter
216         # line = tag_filter.apply(line)
217
218         line = re.sub(ur"[★*](表[0-9~]+)", ur"<b>\1</b>", line)
219         line = re.sub(ur"[★*](図[0-9~]+)", ur"<b>\1</b>", line)
220         line = re.sub(ur"[★*](リスト[0-9~]+)", ur"<b>\1</b>", line)
221         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
222         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
223
224         # comment
225         if re.search(ur"^☆#", line):
226             line = ""
227
228         return line
229
230
231     def _ulist(self, line):
232         """Proccess ul"""
233         print "<ul>"
234         while re.search(ur"^・", line):
235             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
236             line = self.input_iter.next()
237         print "</ul>\n"
238
239     def _begin_column(self, line):
240         """Proccess column"""
241         try:
242             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
243         except AttributeError:
244             str_title = ""
245
246         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
247     <tr><th>%s</th></tr>
248     <tr><td><span style="font-size: 85%%;">
249     """ % (str_title)
250         print html
251
252     def _end_column(self, line):
253         print """</span></td></tr>
254     </table>
255     """
256
257     def _list_start(self):
258         return "<pre>"
259
260     def _list_end(self):
261         return "</pre>"
262
263     def _list(self, line):
264         try:
265             str_title = re.search("^☆(リスト.*)$", line).group(1)
266         except AttributeError:
267             str_title = ""
268         print "<p><b>%s</b></p>" % (str_title)
269         print self._list_start(line)
270
271         for line in self.input_iter:
272             line = line.strip()
273             line = line.replace("&", "&amp;")
274             line = line.replace("<", "&lt;")
275             line = line.replace(">", "&gt;")
276             if line == "----":
277                 break
278             print line
279         print self._list_end()
280
281     def _code(self, line):
282         print self._list_start()
283
284         for line in self.input_iter:
285             #        line = line.strip()
286             line = line.replace("&", "&amp;")
287             line = line.replace("<", "&lt;")
288             line = line.replace(">", "&gt;")
289             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
290
291             if re.search(ur"^☆\+---$", line):
292                 break
293             print line,
294         print self._list_end()
295
296
297     def _inline(self, line):
298         for line in self.input_iter:
299             #        line = line.strip()
300             if re.search(ur"^☆}}}", line):
301                 break
302             print line
303
304
305     def _comment(self, line):
306         for line in self.input_iter:
307             line = line.strip()
308             if re.search(ur"^☆}}}", line):
309                 break
310
311     def _space(self, line):
312         print "<br><br>"
313
314
315     def _fig_start(self, cap=""):
316         return """<table align="center" border="0" cellpadding="0" cellspacing="0">
317     <tr> <td valign="top" align="center">
318     """
319
320     def _fig_end(self, cap=""):
321         return """</td> </tr>
322     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
323     %s
324     </span></td> </tr>
325     </table>
326     """ % (cap)
327
328     def _fig(self, line):
329         if self._release == 1:
330             self._fig_release(line)
331         else:
332             self._fig(line)
333
334
335     def _fig(self, line):
336         try:
337             str_title = re.search(ur"^☆(図.*)$", line).group(1)
338         except AttributeError:
339             str_title = ""
340         print self._fig_start()
341
342         line = self.input_iter.next()
343         imgname = ""
344         imgname_s = ""
345         hash = ""
346         hash_s = ""
347         match_o1 = re.search(ur"<([^,]*?)>", line)
348         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
349         if not match_o1 == None:
350             imgname = match_o1.group(1)
351             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
352         elif not match_o2 == None:
353             imgname = match_o1.group(1)
354             imgname_s = match_o1.group(2)
355
356         if not os.path.isfile(imgname_s):
357             imgname_s = imgname
358
359         print """<a href="%s">
360      <img src="%s" alt="%s">
361 </a>
362      """ % (imgname, imgname_s, str_title)
363         
364
365         dic = self.index("figs")
366         dic.append(imgname)
367         if imgname_s != "":
368             dic.append(imgname_s)
369
370         print self._fig_end(str_title);
371
372
373     def _fig_release(self, line):
374         try:
375             str_title = re.search(ur"^☆(図.*)$", line).group(1)
376         except AttributeError:
377             str_title = ""
378         print self._fig_start()
379
380         line = self.input_iter.next()
381         imgname = ""
382         imgname_s = ""
383         hash = ""
384         hash_s = ""
385         match_o1 = re.search(ur"<([^,]*?)>", line)
386         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
387         if not match_o1 == None:
388             imgname = match_o1.group(1)
389             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
390         elif not match_o2 == None:
391             imgname = match_o1.group(1)
392             imgname_s = match_o1.group(2)
393
394
395         hash = self.hashlist.get(imgname, "")
396         hash_s = self.hashlist.get(imgname_s, "")
397         if hash_s == "":
398             hash_s = hash
399
400         print """<a href="/blob.pl?id=%s">
401      <slash type="image" id="%s" title="%s">
402      </a>
403      """ % (hash, hash_s, str_title)
404         
405
406         dic = self.index("figs")
407         dic.append(imgname)
408         if imgname_s != "":
409             dic.append(imgname_s)
410
411         print self._fig_end(str_title);
412
413
414     def _table_start(self, cap):
415         return """<table align="center" border="1" width="90%%">
416     <caption><b>%s</b></caption>
417     """ % cap
418
419     def _table_end(self):
420         return "</table>\n"
421
422     def _table(self, line):
423         str_title = ""
424         self._table_buf1 = ""
425
426         try:
427             str_title = re.search(ur"^☆(表.*)$", line).group(1)
428             fig_name =  re.search(ur"^☆(表[0-9A-Z]+)", line).group(1)
429         except AttributeError:
430             str_title = ""
431             fig_name = ""
432         print self._table_start(str_title)
433         self._table_buf1 =  self._table_start(str_title)
434
435         num_row = 0
436         table_contents = []
437         for line in self.input_iter:
438             line = line.strip(" \n")
439             if re.search(ur"^\s*$", line):
440                 break
441             line = self._default_markup_rule(line)
442             if re.search(ur"^〓", line):
443                 line = re.sub(ur"^〓", "", line)
444                 tag_mode = "th"
445             else:
446                 tag_mode = "td"
447             table_contents.append([])
448             num_col = 0
449             for item in line.split("\t"):
450                 if item == "":
451                     if num_col == 0:
452                         n = 1
453                         try:
454                             while table_contents[num_row-n][num_col]["item"] == "":
455                                 n += 1
456                             table_contents[num_row-n][num_col]["row"] += 1
457                         except IndexError:
458                             pass
459                     else:
460                         n = 1
461                         try:
462                             while table_contents[num_row][num_col-n]["item"] == "":
463                                 n += 1
464                             table_contents[num_row][num_col-n]["col"] += 1
465                         except IndexError:
466                             pass
467
468                 table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
469                 num_col = num_col + 1
470             num_row = num_row + 1
471
472         for row_item in table_contents:
473             line = "<tr>"
474             for item in row_item:
475                 if item["item"] == "":
476                     continue
477                 line = line + "<" + item["tag"]
478                 if not item["row"] == 1:
479                     line = line + (' rowspan="%s"' % item["row"])
480                 if not item["col"] == 1:
481                     line = line + (' colspan="%s"' % item["col"])
482                 line = line +  ">"
483                 line = line + item["item"]
484                 line = line + "</" + item["tag"] + ">"
485             line = line + "</tr>\n"
486             print line,
487             self._table_buf1 = self._table_buf1 + line
488
489             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
490             # line = line.replace("\t", "</th><th>")
491             # print line
492             # else:
493             # line = "<tr><td>" + line + "</td></tr>"
494             # line = line.replace("\t", "</td><td>")
495             # print line
496
497         print self._table_end()
498         self._table_buf1 =  self._table_buf1 + self._table_end()
499         if self.index_haskey("tables"):
500             self.index("tables")[fig_name] = self._table_buf1
501         else:
502             self.index_add("tables", {fig_name:self._table_buf1})
503
504     def _call_tables(self, line):
505         try:
506             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
507         except AttributeError:
508             return
509         print self.index("tables")[fig_name]