OSDN Git Service

create markupper2
[otptools/otptools.git] / markupper2.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 """markupper2.py - markupper v2."""
4
5 import sys
6 import os
7 import re
8 import codecs
9 import pickle
10
11 import deterfile
12
13 class MarkUpper2(object):
14     """
15     """
16     def __init__(self):
17         self._vars = {}
18         self._anchor = ""
19
20     def markup(self, input_iter):
21         """
22         Do markup.
23
24         @param input_iter: iterator to use as input
25         @type input_iter: iterator
26         """
27         self.input = input_iter
28
29         for line in self.input:
30             if re.search(ur"^☆{{{$", line):
31                 self._inline(line)
32                 continue
33             elif re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line):
34                 g = re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line)
35                 self._vars[g.group(1)] = g.group(2)
36                 continue
37             elif re.search(ur"^☆image_border:\s(on|On|ON)", line):
38                 self._image_border = 1
39                 continue
40             elif re.search(ur"^☆comment\s{{{$", line):
41                 self._comment(line)
42                 continue
43             elif re.search(ur"^☆\*", line):
44                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
45                 continue
46             elif re.search(ur"^☆clear\s+", line):
47                 self._clear(line)
48                 continue
49             elif re.search(ur"^・", line):
50                 self._ulist(line)
51                 continue
52             elif re.search(ur"^[0-9]\.", line):
53                 self._olist(line)
54                 continue
55             elif re.search(ur"^☆begin-column:", line):
56                 self._begin_column(line)
57                 continue
58             elif re.search(ur"^☆end-column", line):
59                 self._end_column(line)
60                 continue
61             elif re.search(ur"^☆space", line):
62                 self._space(line)
63                 continue
64             elif re.search(ur"^☆call_tables", line):
65                 self._call_tables(line)
66                 continue
67             elif re.search(ur"^●", line):
68                 self._head_l(line)
69                 continue
70             elif re.search(ur"^○", line):
71                 self._head_m(line)
72                 continue
73             elif re.search(ur"^☆----", line):
74                 self._newpage(line)
75                 continue
76             elif re.search(ur"^☆\+---", line):
77                 self._code(line)
78                 continue
79             elif re.search(ur"^☆表", line):
80                 self._table(line)
81                 continue
82             elif re.search(ur"^☆図", line):
83                 self._fig(line)
84                 continue
85             elif re.search(ur"^☆写真", line):
86                 self._photo(line)
87                 continue
88             elif re.search(ur"^☆リスト", line):
89                 self._list(line)
90                 continue
91             elif re.search(ur"^☆flow", line):
92                 self._flow(line)
93                 continue
94
95             if re.search(ur"^ ", line):
96                 self._paragraph(line)
97                 continue
98
99
100             if re.search(r"^\s*$", line):
101                 line = ""
102
103             line = line.strip()
104             print line
105
106         # end-of-loop
107
108     def _clear(self, line):
109         print """<div style="clear:left;"> </div>
110 """
111
112     def _head_l(self, line):
113         line = line.rstrip()
114         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
115             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
116             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
117
118         line = self._default_markup_rule(line)
119         if self._anchor != "":
120             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
121             self._anchor = ""
122         else:
123             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
124         print line
125
126     def _head_m(self, line):
127         line = line.rstrip()
128         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
129             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
130             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
131
132         line = self._default_markup_rule(line)
133         if self._anchor != "":
134             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
135             self._anchor = ""
136         else:
137             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
138         print line
139
140     def _paragraph(self, line):
141         line = self._default_markup_rule(line)
142         line = "<p>" + line + "</p>"
143         print line
144
145     def _newpage(self, line):
146         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
147         print line
148
149     def make_hashlist(self, path_to_hashfile):
150         """
151         create hash list.
152
153         @param path_to_hashfile: hashfile's path
154         @type path_to_hashfile: string
155         """
156         try:
157             file_img_hash = open(path_to_hashfile, "r")
158         except IOError:
159             sys.stderr.write("cannot open file: %s" % path_img_hash)
160             return None;
161
162         self.hashlist = {};
163         for line in file_img_hash:
164             splited = line.strip().split("\t", 2)
165             # hashlist's format: <hash> \t <filename>
166             self.hashlist[splited[1]] = splited[0]
167
168     def _call_tagles(self):
169         pass
170
171     def _escape(self, line):
172         line = re.sub(ur"&", ur"&amp;", line)
173         line = re.sub(ur"<", ur"&lt;", line)
174         line = re.sub(ur">", ur"&gt;", line)
175         return line
176
177     def _default_markup_rule(self, line):
178         """
179         apply default markup rules.
180
181         @param line: string to apply markup
182         @type line: string
183         """
184         line = self._escape(line)
185
186         # apply filter
187         # line = tag_filter.apply(line)
188
189         line = re.sub(ur"[★*](表[0-9~、]+)", ur"<b>\1</b>", line)
190         line = re.sub(ur"[★*](図[0-9~、]+)", ur"<b>\1</b>", line)
191         line = re.sub(ur"[★*](写真[0-9~、]+)", ur"<b>\1</b>", line)
192         line = re.sub(ur"[★*](リスト[0-9~、]+)", ur"<b>\1</b>", line)
193         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
194         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
195         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
196         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
197
198         # comment
199         if re.search(ur"^☆#", line):
200             line = ""
201
202         return line
203
204
205     def _ulist(self, line):
206         """Proccess ul"""
207         print "<ul>"
208         while re.search(ur"^・", line):
209             line = self._default_markup_rule(line)
210             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
211             line = self.input.next()
212         print "</ul>\n"
213
214     def _olist(self, line):
215         """Proccess ul"""
216         print "<ol>"
217         while re.search(ur"^[0-9]+\.", line):
218             line = self._default_markup_rule(line)
219             print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
220             line = self.input.next()
221         print "</ol>\n"
222
223
224
225     def _begin_column(self, line):
226         """Proccess column"""
227         try:
228             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
229         except AttributeError:
230             str_title = ""
231
232         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
233     <tr><th>%s</th></tr>
234     <tr><td><span style="font-size: 85%%;">
235     """ % (str_title)
236         print html
237
238     def _end_column(self, line):
239         print """</span></td></tr>
240     </table>
241     """
242
243     def _list_start(self):
244         return "<pre>"
245
246     def _list_end(self):
247         return "</pre>"
248
249     def _list(self, line):
250         try:
251             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
252         except AttributeError:
253             str_title = ""
254         print "<p><b>%s</b></p>" % (str_title)
255         print self._list_start()
256
257         for line in self.input:
258             line = line.strip("\n\r")
259             line = self._escape(line)
260             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
261             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
262             if re.search(ur"""^☆\+---""", line):
263                 break
264             print line
265         print self._list_end()
266
267     def _code(self, line):
268         print self._list_start()
269
270         for line in self.input:
271             line = self._escape(line)
272             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
273             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
274
275             if re.search(ur"^☆\+---$", line):
276                 break
277             print line,
278         print self._list_end()
279
280     def _inline(self, line):
281         for line in self.input:
282             #        line = line.strip()
283             if re.search(ur"^☆}}}", line):
284                 break
285             print line
286
287     def _comment(self, line):
288         for line in self.input:
289             line = line.strip()
290             if re.search(ur"^☆}}}", line):
291                 break
292
293     def _space(self, line):
294         print "<br><br>"
295
296     def _flow(self, line):
297         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
298         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
299 """
300         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
301 %s
302 </div>"""
303         flow_footer = """</div>
304 """
305         flow_item = """<div>
306 %s
307 <p>%s</p>
308 </div>
309
310 """
311         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
312
313         rex_title = re.compile(ur"^☆flow\s+(.*)$")
314         if rex_title.search(line):
315             title = rex_title.search(line).group(1)
316         else:
317             title = ""
318
319         rex_file = re.compile(ur"^([^:]*):(.*)$")
320         outputs = []
321         for line in self.input:
322             if re.search(r"^\s*$", line):
323                 break
324             match = rex_file.search(line)
325             if match:
326                 file = os.path.join(self._image_dir, match.group(1))
327                 cap = self._default_markup_rule(match.group(2))
328             else:
329                 continue
330             fig = self._anchored_fig(file, cap)
331             outputs.append(flow_item % (fig, cap))
332
333         print flow_header
334         print flow_title % (title,)
335         print arrow.join(outputs)
336         print flow_footer
337         
338
339     def _fig_start(self, cap="", styles=[], width=0, height=0):
340         params = dict(style="", tablewidth="")
341         if width != 0:
342             params["style"] = "width:%d;" % (width,)
343             params["tablewidth"] = 'width="%d"' % (width,)
344
345         if "lfloat" in styles:
346             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
347     <tr> <td valign="top" align="center">
348     """ % params
349         elif "left" in styles:
350             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
351     <tr> <td valign="top" align="center">
352     """ %params
353         else:
354             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
355     <tr> <td valign="top" align="center">
356     """ % params
357
358     def _fig_end(self, cap="", styles=[]):
359         return """</td> </tr>
360     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
361     %s
362     </span></td> </tr>
363     </table>
364     """ % (cap)
365
366     def _get_png_geom(self, filepath):
367         desc = deterfile.file(filepath)
368         try:
369             m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
370         except IndexError:
371             err = ",".join(desc)
372             raise Exception("deterfile error: %s, file: %s" % (err,filepath))
373         if m:
374             w = m.group(1)
375             h = m.group(2)
376             return (int(w), int(h))
377         else:
378             return None
379
380     def _fig(self, line):
381         try:
382             str_title = re.search(ur"^☆(図.*)$", line).group(1)
383         except AttributeError:
384             str_title = ""
385         if str_title.find(u"図*") == 0:
386             str_title = str_title.replace(u"図*", "")
387         line = self.input.next()
388         styles = []
389         if line.find("@") == 0:
390             styles = line.strip().replace("@", "").split(",")
391             line = self.input.next()
392
393         if line[0] == "<":
394             imgname = ""
395             imgname_s = ""
396             hash = ""
397             hash_s = ""
398             match_o1 = re.search(ur"<([^,]*?)>", line)
399             match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
400             if not match_o1 == None:
401                 imgname = match_o1.group(1)
402                 imgname = os.path.join(self._image_dir, imgname)
403                 imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
404             elif not match_o2 == None:
405                 imgname = match_o2.group(1)
406                 imgname = os.path.join(self._image_dir, imgname)
407                 imgname_s = match_o2.group(2)
408                 imgname_s = os.path.join(self._image_dir, imgname_s)
409
410             geom = self._get_png_geom(imgname_s)
411             if geom:
412                 w = geom[0]
413                 h = geom[1]
414                 print self._fig_start("", styles, width=w, height=h)
415             else:
416                 print self._fig_start("", styles)
417             print self._anchored_fig(imgname, str_title, imgname_s)
418             print self._fig_end(str_title, styles);
419         else:
420             hash = ""
421             hash_s = ""
422             match_o = re.search(ur"\[(.*?),\s*(.*?)\]", line)
423             try:
424                 print """<table width="500" align="center" border="0" cellpadding="0" cellspacing="0">
425     <tr> <td valign="top" align="center">
426 """
427                 print """<a href="/blob.pl?id=%s">
428   <slash-image id="%s" title="%s">
429 </a>""" % (match_o.group(1), match_o.group(2), str_title)
430                 print """</td> </tr>
431     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
432     %s
433     </span></td> </tr>
434     </table>
435 """ % (str_title)
436             except IndexError:
437                 print >> sys.stderr, "error:cannot parse id. " + line
438
439     def _photo(self, line):
440         try:
441             str_title = re.search(ur"^☆(写真.*)$", line).group(1)
442         except AttributeError:
443             str_title = ""
444         if str_title.find(u"写真*") == 0:
445             str_title = str_title.replace(u"写真*", "")
446         line = self.input.next()
447         styles = []
448         if line.find("@") == 0:
449             styles = line.strip().replace("@", "").split(",")
450             line = self.input.next()
451
452         imgname = ""
453         imgname_s = ""
454         hash = ""
455         hash_s = ""
456         match_o1 = re.search(ur"<([^,]*?)>", line)
457         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
458         if not match_o1 == None:
459             imgname = match_o1.group(1)
460             imgname = os.path.join(self._image_dir, imgname)
461             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
462         elif not match_o2 == None:
463             imgname = match_o2.group(1)
464             imgname = os.path.join(self._image_dir, imgname)
465             imgname_s = match_o2.group(2)
466             imgname_s = os.path.join(self._image_dir, imgname_s)
467
468         geom = self._get_png_geom(imgname_s)
469         if geom:
470             w = geom[0]
471             h = geom[1]
472             print self._fig_start("", styles, width=w, height=h)
473         else:
474             print self._fig_start("", styles)
475         print self._anchored_fig(imgname, str_title, imgname_s)
476         print self._fig_end(str_title, styles);
477
478
479         
480     def _anchored_fig(self, file, alt, file_s=""):
481
482         if file_s == "":
483             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
484
485         if not os.path.isfile(file_s):
486             file_s = file
487
488         alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
489
490         ret = """<a href="%s">
491   <img src="%s" alt="%s">
492 </a>
493 """ % (file, file_s, alt)
494
495
496         return ret
497         
498
499     def _fig_release(self, line):
500         try:
501             str_title = re.search(ur"^☆(図.*)$", line).group(1)
502         except AttributeError:
503             str_title = ""
504         print self._fig_start()
505
506         line = self.input.next()
507         imgname = ""
508         imgname_s = ""
509         hash = ""
510         hash_s = ""
511         match_o1 = re.search(ur"<([^,]*?)>", line)
512         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
513         if not match_o1 == None:
514             imgname = match_o1.group(1)
515             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
516         elif not match_o2 == None:
517             imgname = match_o1.group(1)
518             imgname_s = match_o1.group(2)
519
520
521         hash = self.hashlist.get(imgname, "")
522         hash_s = self.hashlist.get(imgname_s, "")
523         if hash_s == "":
524             hash_s = hash
525
526         print """<a href="/blob.pl?id=%s">
527      <slash type="image" id="%s" title="%s">
528      </a>
529      """ % (hash, hash_s, str_title)
530         
531
532
533         print self._fig_end(str_title);
534
535
536     def _table_start(self, cap):
537         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table" width="100%%">
538     <caption><b>%s</b></caption>
539     """ % cap
540
541     def _table_end(self, footnote=""):
542         return "</table>\n%s</div>\n" % (footnote,)
543
544     def _table(self, line):
545         str_title = ""
546         self._table_buf1 = ""
547
548         try:
549             str_title = re.search(ur"^☆(表.*)$", line).group(1)
550             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
551         except AttributeError:
552             str_title = ""
553             fig_name = ""
554         if str_title.find(u"表*") == 0:
555             str_title = str_title.replace(u"表*", "")
556
557         print self._table_start(str_title)
558         self._table_buf1 =  self._table_start(str_title)
559
560         num_row = 0
561         table_contents = []
562         footnote = ""
563         for line in self.input:
564             line = line.strip(" \n")
565             line = self._default_markup_rule(line)
566             if re.search(ur"^\s*$", line):
567                 break
568             if re.search(ur"^※", line):
569                 footnote = re.search(ur"^(※.*)$", line).group(1)
570                 break
571             if re.search(ur"^〓", line):
572                 line = re.sub(ur"^〓", "", line)
573                 tag_mode = "th"
574             else:
575                 tag_mode = "td"
576             table_contents.append([])
577             num_col = 0
578             for item in line.split("\t"):
579                 if item == "":
580                     if num_col == 0:
581                         n = 1
582                         try:
583                             while table_contents[num_row-n][num_col]["item"] == "":
584                                 n += 1
585                             table_contents[num_row-n][num_col]["row"] += 1
586                         except IndexError:
587                             pass
588                     else:
589                         n = 1
590                         try:
591                             while table_contents[num_row][num_col-n]["item"] == "":
592                                 n += 1
593                             table_contents[num_row][num_col-n]["col"] += 1
594                         except IndexError:
595                             pass
596                 if item == u"↓":
597                     n = 1
598                     try:
599                         while table_contents[num_row-n][num_col]["item"] == "":
600                             n += 1
601                         table_contents[num_row-n][num_col]["row"] += 1
602                         item = ""
603                     except IndexError:
604                         pass
605
606                 if re.search(r'^".*"$', item):
607                     item = re.search(r'^"(.*)"$', item).group(1)
608                     table_contents[num_row].append({"tag":"th","item":item,"row":1,"col":1})
609                 else:
610                     table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
611                 num_col = num_col + 1
612             num_row = num_row + 1
613
614         for row_item in table_contents:
615             line = "<tr>"
616             for item in row_item:
617                 if item["item"] == "":
618                     continue
619                 line = line + "<" + item["tag"]
620                 if not item["row"] == 1:
621                     line = line + (' rowspan="%s"' % item["row"])
622                 if not item["col"] == 1:
623                     line = line + (' colspan="%s"' % item["col"])
624                 line = line +  ">"
625                 line = line + item["item"]
626                 line = line + "</" + item["tag"] + ">"
627             line = line + "</tr>\n"
628             print line,
629             self._table_buf1 = self._table_buf1 + line
630
631             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
632             # line = line.replace("\t", "</th><th>")
633             # print line
634             # else:
635             # line = "<tr><td>" + line + "</td></tr>"
636             # line = line.replace("\t", "</td><td>")
637             # print line
638
639         print self._table_end(footnote)
640         self._table_buf1 =  self._table_buf1 + self._table_end()
641
642     def _call_tables(self, line):
643         try:
644             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
645         except AttributeError:
646             return