OSDN Git Service

some changes...
[otptools/otptools.git] / markupper.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3  
4 import sys
5 import os
6 import re
7 import codecs
8 import pickle
9
10 import HTMLTagFilter
11 import deterfile
12
13 #sys.stdin = codecs.getreader('utf_8')(sys.stdin)
14 #sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
15
16 alist = ["a", "a:href", "a:name", "b", "br" ]
17 dlist = ["*"]
18
19 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
20 path_to_index = "./_markup_index"
21
22 class _InputStream(object):
23     """InputStream base class."""
24     def __init__(self):
25         pass
26
27     def __iter__(self):
28         """return Iterator"""
29         return self
30
31     def next(self):
32         """function for iterator"""
33         pass
34
35
36 class Markupper(object):
37     """
38     """
39     def __init__(self):
40         self._input_iter = None
41         self._index_past = {}
42         self._index = {}
43         self._image_dir = ""
44
45     def index_add(self, key, val):
46         """
47         Add key and value to index.
48
49         @param key:
50          @type key:
51
52         @param val:
53         @key val:
54         """
55         self._index[key] = val
56
57     def index(self, key):
58         """
59         Get index
60         """
61         return self._index[key]
62
63     def index_haskey(self, key):
64         return self._index.has_key(key)
65
66     def markup(self, input_iter, release="0"):
67         """
68         Do markup.
69
70         @param input_iter: iterator to use as input
71         @type input_iter: iterator
72         """
73         self.input_iter = input_iter
74         self._page_counter = 1
75         self._image_border = 0
76         # alist = ["a", "a:href", "a:name", "b", "br" ]
77         # dlist = ["*"]
78         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
79         self._release = release
80
81         self.index_add("figs", [])
82
83         self._anchor = ""
84         for line in self.input_iter:
85             # line = self._default_markup_rule(line)
86             # head-of-line rules
87             if re.search(ur"^☆{{{$", line):
88                 self._inline(line)
89                 continue
90             elif re.search(ur"^☆image_dir:", line):
91                 self._image_dir = re.search(ur"^☆image_dir:\s*(.*)$", line).group(1)
92                 continue
93             elif re.search(ur"^☆image_border:\s(on|On|ON)", line):
94                 self._image_border = 1
95                 continue
96             elif re.search(ur"^☆comment\s{{{$", line):
97                 self._comment(line)
98                 continue
99             elif re.search(ur"^☆\*", line):
100                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
101                 continue
102             elif re.search(ur"^☆clear\s+", line):
103                 self._clear(line)
104                 continue
105             elif re.search(ur"^・", line):
106                 self._ulist(line)
107                 continue
108             elif re.search(ur"^[0-9]\.", line):
109                 self._olist(line)
110                 continue
111             elif re.search(ur"^☆begin-column:", line):
112                 self._begin_column(line)
113                 continue
114             elif re.search(ur"^☆end-column", line):
115                 self._end_column(line)
116                 continue
117             elif re.search(ur"^☆space", line):
118                 self._space(line)
119                 continue
120             elif re.search(ur"^☆call_tables", line):
121                 self._call_tables(line)
122                 continue
123             elif re.search(ur"^●", line):
124                 self._head_l(line)
125                 continue
126             elif re.search(ur"^○", line):
127                 self._head_m(line)
128                 continue
129             elif re.search(ur"^☆----", line):
130                 self._newpage(line)
131                 continue
132             elif re.search(ur"^☆\+---", line):
133                 self._code(line)
134                 continue
135             elif re.search(ur"^☆表", line):
136                 self._table(line)
137                 continue
138             elif re.search(ur"^☆図", line):
139                 self._fig(line)
140                 continue
141             elif re.search(ur"^☆写真", line):
142                 self._photo(line)
143                 continue
144             elif re.search(ur"^☆リスト", line):
145                 self._list(line)
146                 continue
147             elif re.search(ur"^☆flow", line):
148                 self._flow(line)
149                 continue
150
151             if re.search(ur"^ ", line):
152                 self._paragraph(line)
153                 continue
154
155
156             if re.search(r"^\s*$", line):
157                 line = ""
158
159             line = line.strip()
160             print line
161
162         # end-of-loop
163
164     def _clear(self, line):
165         print """<div style="clear:left;"> </div>
166 """
167
168     def _head_l(self, line):
169         line = line.rstrip()
170         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
171             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
172             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
173
174         line = self._default_markup_rule(line)
175         if self._anchor != "":
176             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
177             self._anchor = ""
178         else:
179             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
180         print line
181
182     def _head_m(self, line):
183         line = line.rstrip()
184         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
185             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
186             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
187
188         line = self._default_markup_rule(line)
189         if self._anchor != "":
190             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
191             self._anchor = ""
192         else:
193             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
194         print line
195
196     def _paragraph(self, line):
197         line = self._default_markup_rule(line)
198         line = "<p>" + line + "</p>"
199         print line
200
201     def _newpage(self, line):
202         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
203         print line
204
205     def load_index(self, path_to_index):
206         """
207         load index database.
208
209         @param path_to_index: index db's path
210         @type path_to_index: string
211         """
212         # load index
213         try:
214             index_file = open(path_to_index, "r")
215             self._index_past = pickle.load(index_file)
216             index_file.close()
217         except IOError:
218             sys.stderr.write("warn: cannot read index file,\n")
219
220     def save_index(self, path_to_index):
221         """
222         save index database.
223
224         @param path_to_index: index db's path
225         @type path_to_index: string
226         """
227         # save index
228         try:
229             index_file = open(path_to_index, "w")
230             pickle.dump(self._index, index_file)
231             index_file.close()
232         except IOError:
233             sys.stderr.write("warn: cannot write index file,\n")
234
235     def make_hashlist(self, path_to_hashfile):
236         """
237         create hash list.
238
239         @param path_to_hashfile: hashfile's path
240         @type path_to_hashfile: string
241         """
242         try:
243             file_img_hash = open(path_to_hashfile, "r")
244         except IOError:
245             sys.stderr.write("cannot open file: %s" % path_img_hash)
246             return None;
247
248         self.hashlist = {};
249         for line in file_img_hash:
250             splited = line.strip().split("\t", 2)
251             # hashlist's format: <hash> \t <filename>
252             self.hashlist[splited[1]] = splited[0]
253
254     def _call_tagles(self):
255         pass
256
257     def _escape(self, line):
258         line = re.sub(ur"&", ur"&amp;", line)
259         line = re.sub(ur"<", ur"&lt;", line)
260         line = re.sub(ur">", ur"&gt;", line)
261         return line
262
263     def _default_markup_rule(self, line):
264         """
265         apply default markup rules.
266
267         @param line: string to apply markup
268         @type line: string
269         """
270         line = self._escape(line)
271
272         # apply filter
273         # line = tag_filter.apply(line)
274
275         line = re.sub(ur"[★*](表[0-9〜、]+)", ur"<b>\1</b>", line)
276         line = re.sub(ur"[★*](図[0-9〜、]+)", ur"<b>\1</b>", line)
277         line = re.sub(ur"[★*](写真[0-9〜、]+)", ur"<b>\1</b>", line)
278         line = re.sub(ur"[★*](リスト[0-9〜、]+)", ur"<b>\1</b>", line)
279         line = re.sub(ur"[★*](コラム[0-9〜、]+)", ur"<b>\1</b>", line)
280         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
281         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
282         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
283         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
284
285         # comment
286         if re.search(ur"^☆#", line):
287             line = ""
288
289         return line
290
291
292     def _ulist(self, line):
293         """Proccess ul"""
294         print "<ul>"
295         while re.search(ur"^・", line):
296             line = self._default_markup_rule(line)
297             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
298             line = self.input_iter.next()
299         print "</ul>\n"
300
301     def _olist(self, line):
302         """Proccess ul"""
303         print "<ol>"
304         while re.search(ur"^[0-9]+\.", line):
305             line = self._default_markup_rule(line)
306             print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
307             line = self.input_iter.next()
308         print "</ol>\n"
309
310
311
312     def _begin_column(self, line):
313         """Proccess column"""
314         try:
315             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
316         except AttributeError:
317             str_title = ""
318
319         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
320     <tr><th>%s</th></tr>
321     <tr><td><span style="font-size: 85%%;">
322     """ % (str_title)
323         print html
324
325     def _end_column(self, line):
326         print """</span></td></tr>
327     </table>
328     """
329
330     def _list_start(self):
331         return "<pre>"
332
333     def _list_end(self):
334         return "</pre>"
335
336     def _list(self, line):
337         try:
338             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
339         except AttributeError:
340             str_title = ""
341         print "<p><b>%s</b></p>" % (str_title)
342         print self._list_start()
343
344         for line in self.input_iter:
345             line = line.strip("\n\r")
346             line = self._escape(line)
347             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
348             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
349             if re.search(ur"""^☆\+---""", line):
350                 break
351             print line
352         print self._list_end()
353
354     def _code(self, line):
355         print self._list_start()
356
357         for line in self.input_iter:
358             line = self._escape(line)
359             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
360             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
361
362             if re.search(ur"^☆\+---$", line):
363                 break
364             print line,
365         print self._list_end()
366
367     def _inline(self, line):
368         for line in self.input_iter:
369             line = line.strip()
370             if re.search(ur"^☆}}}", line):
371                 break
372             print line
373
374     def _comment(self, line):
375         for line in self.input_iter:
376             line = line.strip()
377             if re.search(ur"^☆}}}", line):
378                 break
379
380     def _space(self, line):
381         print "<br><br>"
382
383     def _flow(self, line):
384         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
385         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
386 """
387         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
388 %s
389 </div>"""
390         flow_footer = """</div>
391 """
392         flow_item = """<div>
393 %s
394 <p>%s</p>
395 </div>
396
397 """
398         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
399
400         rex_title = re.compile(ur"^☆flow\s+(.*)$")
401         if rex_title.search(line):
402             title = rex_title.search(line).group(1)
403         else:
404             title = ""
405
406         rex_file = re.compile(ur"^([^:]*):(.*)$")
407         outputs = []
408         for line in self.input_iter:
409             if re.search(r"^\s*$", line):
410                 break
411             match = rex_file.search(line)
412             if match:
413                 file = os.path.join(self._image_dir, match.group(1))
414                 cap = self._default_markup_rule(match.group(2))
415             else:
416                 continue
417             fig = self._anchored_fig(file, cap)
418             outputs.append(flow_item % (fig, cap))
419
420         print flow_header
421         print flow_title % (title,)
422         print arrow.join(outputs)
423         print flow_footer
424         
425
426     def _fig_start(self, cap="", styles=[], width=0, height=0):
427         params = dict(style="", tablewidth="")
428         if width != 0:
429             params["style"] = "width:%d;" % (width,)
430             params["tablewidth"] = 'width="%d"' % (width,)
431
432         if "lfloat" in styles:
433             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
434     <tr> <td valign="top" align="center">
435     """ % params
436         elif "left" in styles:
437             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
438     <tr> <td valign="top" align="center">
439     """ %params
440         else:
441             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
442     <tr> <td valign="top" align="center">
443     """ % params
444
445     def _fig_end(self, cap="", styles=[]):
446         return """</td> </tr>
447     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
448     %s
449     </span></td> </tr>
450     </table>
451     """ % (cap)
452
453     def _get_png_geom(self, filepath):
454         desc = deterfile.file(filepath)
455         try:
456             m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
457         except IndexError:
458             err = ",".join(desc)
459             raise Exception("deterfile error: %s, file: %s . " % (err,filepath))
460         if m:
461             w = m.group(1)
462             h = m.group(2)
463             return (int(w), int(h))
464         else:
465             return None
466
467     def _fig(self, line):
468         try:
469             str_title = re.search(ur"^☆(図.*)$", line).group(1)
470         except AttributeError:
471             str_title = ""
472         if str_title.find(u"図*") == 0:
473             str_title = str_title.replace(u"図*", "")
474         line = self.input_iter.next()
475         styles = []
476         if line.find("@") == 0:
477             styles = line.strip().replace("@", "").split(",")
478             line = self.input_iter.next()
479
480         imgname = ""
481         imgname_s = ""
482         hash = ""
483         hash_s = ""
484         match_o1 = re.search(ur"<([^,]*?)>", line)
485         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
486         if not match_o1 == None:
487             imgname = match_o1.group(1)
488             imgname = os.path.join(self._image_dir, imgname)
489             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
490         elif not match_o2 == None:
491             imgname = match_o2.group(1)
492             imgname = os.path.join(self._image_dir, imgname)
493             imgname_s = match_o2.group(2)
494             imgname_s = os.path.join(self._image_dir, imgname_s)
495
496         try:
497             geom = self._get_png_geom(imgname_s)
498         except Exception, e:
499             sys.stderr.write(str(e) + "\nline: " + line.encode("utf-8"))
500             sys.exit(-1)
501
502         if geom:
503             w = geom[0]
504             h = geom[1]
505             print self._fig_start("", styles, width=w, height=h)
506         else:
507             print self._fig_start("", styles)
508         print self._anchored_fig(imgname, str_title, imgname_s)
509         print self._fig_end(str_title, styles);
510
511         dic = self.index("figs")
512         dic.append(imgname)
513         if imgname_s != "":
514             dic.append(imgname_s)
515
516     def _photo(self, line):
517         try:
518             str_title = re.search(ur"^☆(写真.*)$", line).group(1)
519         except AttributeError:
520             str_title = ""
521         if str_title.find(u"写真*") == 0:
522             str_title = str_title.replace(u"写真*", "")
523         line = self.input_iter.next()
524         styles = []
525         if line.find("@") == 0:
526             styles = line.strip().replace("@", "").split(",")
527             line = self.input_iter.next()
528
529         imgname = ""
530         imgname_s = ""
531         hash = ""
532         hash_s = ""
533         match_o1 = re.search(ur"<([^,]*?)>", line)
534         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
535         if not match_o1 == None:
536             imgname = match_o1.group(1)
537             imgname = os.path.join(self._image_dir, imgname)
538             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
539         elif not match_o2 == None:
540             imgname = match_o2.group(1)
541             imgname = os.path.join(self._image_dir, imgname)
542             imgname_s = match_o2.group(2)
543             imgname_s = os.path.join(self._image_dir, imgname_s)
544
545         geom = self._get_png_geom(imgname_s)
546         if geom:
547             w = geom[0]
548             h = geom[1]
549             print self._fig_start("", styles, width=w, height=h)
550         else:
551             print self._fig_start("", styles)
552         print self._anchored_fig(imgname, str_title, imgname_s)
553         print self._fig_end(str_title, styles);
554
555         dic = self.index("figs")
556         dic.append(imgname)
557         if imgname_s != "":
558             dic.append(imgname_s)
559
560         
561     def _anchored_fig(self, file, alt, file_s=""):
562
563         if file_s == "":
564             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
565
566         if not os.path.isfile(file_s):
567             file_s = file
568
569         alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
570
571         ret = """<a href="%s">
572   <img src="%s" alt="%s">
573 </a>
574 """ % (file, file_s, alt)
575
576
577         return ret
578         
579
580     def _fig_release(self, line):
581         try:
582             str_title = re.search(ur"^☆(図.*)$", line).group(1)
583         except AttributeError:
584             str_title = ""
585         print self._fig_start()
586
587         line = self.input_iter.next()
588         imgname = ""
589         imgname_s = ""
590         hash = ""
591         hash_s = ""
592         match_o1 = re.search(ur"<([^,]*?)>", line)
593         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
594         if not match_o1 == None:
595             imgname = match_o1.group(1)
596             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
597         elif not match_o2 == None:
598             imgname = match_o1.group(1)
599             imgname_s = match_o1.group(2)
600
601
602         hash = self.hashlist.get(imgname, "")
603         hash_s = self.hashlist.get(imgname_s, "")
604         if hash_s == "":
605             hash_s = hash
606
607         print """<a href="/blob.pl?id=%s">
608      <slash type="image" id="%s" title="%s">
609      </a>
610      """ % (hash, hash_s, str_title)
611         
612
613         dic = self.index("figs")
614         dic.append(imgname)
615         if imgname_s != "":
616             dic.append(imgname_s)
617
618         print self._fig_end(str_title);
619
620
621     def _table_start(self, cap):
622         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table" width="100%%">
623     <caption><b>%s</b></caption>
624     """ % cap
625
626     def _table_end(self, footnote=""):
627         return "</table>\n%s</div>\n" % (footnote,)
628
629     def _table(self, line):
630         str_title = ""
631         self._table_buf1 = ""
632
633         try:
634             str_title = re.search(ur"^☆(表.*)$", line).group(1)
635             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
636         except AttributeError:
637             str_title = ""
638             fig_name = ""
639         if str_title.find(u"表*") == 0:
640             str_title = str_title.replace(u"表*", "")
641
642         print self._table_start(str_title)
643         self._table_buf1 =  self._table_start(str_title)
644
645         num_row = 0
646         table_contents = []
647         footnote = ""
648         for line in self.input_iter:
649             line = line.strip(" \n")
650             line = self._default_markup_rule(line)
651             if re.search(ur"^\s*$", line):
652                 break
653             if re.search(ur"^※", line):
654                 footnote = re.search(ur"^(※.*)$", line).group(1)
655                 break
656             if re.search(ur"^〓", line):
657                 line = re.sub(ur"^〓", "", line)
658                 tag_mode = "th"
659             else:
660                 tag_mode = "td"
661             table_contents.append([])
662             num_col = 0
663             for item in line.split("\t"):
664                 if item == "":
665                     if num_col == 0:
666                         n = 1
667                         try:
668                             while table_contents[num_row-n][num_col]["item"] == "":
669                                 n += 1
670                             table_contents[num_row-n][num_col]["row"] += 1
671                         except IndexError:
672                             pass
673                     else:
674                         n = 1
675                         try:
676                             while table_contents[num_row][num_col-n]["item"] == "":
677                                 n += 1
678                             table_contents[num_row][num_col-n]["col"] += 1
679                         except IndexError:
680                             pass
681                 if item == u"↓":
682                     n = 1
683                     try:
684                         while table_contents[num_row-n][num_col]["item"] == "":
685                             n += 1
686                         table_contents[num_row-n][num_col]["row"] += 1
687                         item = ""
688                     except IndexError:
689                         pass
690
691                 if re.search(r'^".*"$', item):
692                     item = re.search(r'^"(.*)"$', item).group(1)
693                     table_contents[num_row].append({"tag":"th","item":item,"row":1,"col":1})
694                 else:
695                     table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
696                 num_col = num_col + 1
697             num_row = num_row + 1
698
699         for row_item in table_contents:
700             line = "<tr>"
701             for item in row_item:
702                 if item["item"] == "":
703                     continue
704                 line = line + "<" + item["tag"]
705                 if not item["row"] == 1:
706                     line = line + (' rowspan="%s"' % item["row"])
707                 if not item["col"] == 1:
708                     line = line + (' colspan="%s"' % item["col"])
709                 line = line +  ">"
710                 line = line + item["item"]
711                 line = line + "</" + item["tag"] + ">"
712             line = line + "</tr>\n"
713             print line,
714             self._table_buf1 = self._table_buf1 + line
715
716             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
717             # line = line.replace("\t", "</th><th>")
718             # print line
719             # else:
720             # line = "<tr><td>" + line + "</td></tr>"
721             # line = line.replace("\t", "</td><td>")
722             # print line
723
724         print self._table_end(footnote)
725         self._table_buf1 =  self._table_buf1 + self._table_end()
726         if self.index_haskey("tables"):
727             self.index("tables")[fig_name] = self._table_buf1
728         else:
729             self.index_add("tables", {fig_name:self._table_buf1})
730
731     def _call_tables(self, line):
732         try:
733             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
734         except AttributeError:
735             return
736         print self.index("tables")[fig_name]