OSDN Git Service

modify fig: check image size and add width attribute
[otptools/otptools.git] / markupper.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3  
4 import sys
5 import os
6 import re
7 import codecs
8 import pickle
9
10 import HTMLTagFilter
11 import deterfile
12
13 #sys.stdin = codecs.getreader('utf_8')(sys.stdin)
14 #sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
15
16 alist = ["a", "a:href", "a:name", "b", "br" ]
17 dlist = ["*"]
18
19 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
20 path_to_index = "./_markup_index"
21
22 class _InputStream(object):
23     """InputStream base class."""
24     def __init__(self):
25         pass
26
27     def __iter__(self):
28         """return Iterator"""
29         return self
30
31     def next(self):
32         """function for iterator"""
33         pass
34
35
36 class Markupper(object):
37     """
38     """
39     def __init__(self):
40         self._input_iter = None
41         self._index_past = {}
42         self._index = {}
43         self._image_dir = ""
44
45     def index_add(self, key, val):
46         """
47         Add key and value to index.
48
49         @param key:
50         @type key:
51
52         @param val:
53         @key val:
54         """
55         self._index[key] = val
56
57     def index(self, key):
58         """
59         Get index
60         """
61         return self._index[key]
62
63     def index_haskey(self, key):
64         return self._index.has_key(key)
65
66     def markup(self, input_iter, release="0"):
67         """
68         Do markup.
69
70         @param input_iter: iterator to use as input
71         @type input_iter: iterator
72         """
73         self.input_iter = input_iter
74         self._page_counter = 1
75         # alist = ["a", "a:href", "a:name", "b", "br" ]
76         # dlist = ["*"]
77         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
78         self._release = release
79
80         self.index_add("figs", [])
81
82         self._anchor = ""
83         for line in self.input_iter:
84             # line = self._default_markup_rule(line)
85             # head-of-line rules
86             if re.search(ur"^☆{{{$", line):
87                 self._inline(line)
88                 continue
89             elif re.search(ur"^☆image_dir:", line):
90                 self._image_dir = re.search(ur"^☆image_dir:\s*(.*)$", line).group(1)
91                 continue
92             elif re.search(ur"^☆comment\s{{{$", line):
93                 self._comment(line)
94                 continue
95             elif re.search(ur"^☆\*", line):
96                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
97                 continue
98             elif re.search(ur"^☆clear\s+", line):
99                 self._clear(line)
100                 continue
101             elif re.search(ur"^・", line):
102                 self._ulist(line)
103                 continue
104             elif re.search(ur"^☆begin-column:", line):
105                 self._begin_column(line)
106                 continue
107             elif re.search(ur"^☆end-column", line):
108                 self._end_column(line)
109                 continue
110             elif re.search(ur"^☆space", line):
111                 self._space(line)
112                 continue
113             elif re.search(ur"^☆call_tables", line):
114                 self._call_tables(line)
115                 continue
116             elif re.search(ur"^●", line):
117                 self._head_l(line)
118                 continue
119             elif re.search(ur"^○", line):
120                 self._head_m(line)
121                 continue
122             elif re.search(ur"^☆----", line):
123                 self._newpage(line)
124                 continue
125             elif re.search(ur"^☆\+---", line):
126                 self._code(line)
127                 continue
128             elif re.search(ur"^☆表", line):
129                 self._table(line)
130                 continue
131             elif re.search(ur"^☆図", line):
132                 self._fig(line)
133                 continue
134             elif re.search(ur"^☆リスト", line):
135                 self._list(line)
136                 continue
137             elif re.search(ur"^☆flow", line):
138                 self._flow(line)
139                 continue
140
141             if re.search(ur"^ ", line):
142                 self._paragraph(line)
143                 continue
144
145
146             if re.search(r"^\s*$", line):
147                 line = ""
148
149             line = line.strip()
150             print line
151
152         # end-of-loop
153
154     def _clear(self, line):
155         print """<div style="clear:left;"> </div>
156 """
157
158     def _head_l(self, line):
159         line = line.rstrip()
160         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
161             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
162             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
163
164         line = self._default_markup_rule(line)
165         if self._anchor != "":
166             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
167             self._anchor = ""
168         else:
169             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
170         print line
171
172     def _head_m(self, line):
173         line = line.rstrip()
174         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
175             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
176             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
177
178         line = self._default_markup_rule(line)
179         if self._anchor != "":
180             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
181             self._anchor = ""
182         else:
183             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
184         print line
185
186     def _paragraph(self, line):
187         line = self._default_markup_rule(line)
188         line = "<p>" + line + "</p>"
189         print line
190
191     def _newpage(self, line):
192         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
193         print line
194
195     def load_index(self, path_to_index):
196         """
197         load index database.
198
199         @param path_to_index: index db's path
200         @type path_to_index: string
201         """
202         # load index
203         try:
204             index_file = open(path_to_index, "r")
205             self._index_past = pickle.load(index_file)
206             index_file.close()
207         except IOError:
208             sys.stderr.write("warn: cannot read index file,\n")
209
210     def save_index(self, path_to_index):
211         """
212         save index database.
213
214         @param path_to_index: index db's path
215         @type path_to_index: string
216         """
217         # save index
218         try:
219             index_file = open(path_to_index, "w")
220             pickle.dump(self._index, index_file)
221             index_file.close()
222         except IOError:
223             sys.stderr.write("warn: cannot write index file,\n")
224
225     def make_hashlist(self, path_to_hashfile):
226         """
227         create hash list.
228
229         @param path_to_hashfile: hashfile's path
230         @type path_to_hashfile: string
231         """
232         try:
233             file_img_hash = open(path_to_hashfile, "r")
234         except IOError:
235             sys.stderr.write("cannot open file: %s" % path_img_hash)
236             return None;
237
238         self.hashlist = {};
239         for line in file_img_hash:
240             splited = line.strip().split("\t", 2)
241             # hashlist's format: <hash> \t <filename>
242             self.hashlist[splited[1]] = splited[0]
243
244     def _call_tagles(self):
245         pass
246
247     def _escape(self, line):
248         line = re.sub(ur"&", ur"&amp", line)
249         line = re.sub(ur"<", ur"&lt;", line)
250         line = re.sub(ur">", ur"&gt;", line)
251         return line
252
253     def _default_markup_rule(self, line):
254         """
255         apply default markup rules.
256
257         @param line: string to apply markup
258         @type line: string
259         """
260         line = self._escape(line)
261
262         # apply filter
263         # line = tag_filter.apply(line)
264
265         line = re.sub(ur"[★*](表[0-9~、]+)", ur"<b>\1</b>", line)
266         line = re.sub(ur"[★*](図[0-9~、]+)", ur"<b>\1</b>", line)
267         line = re.sub(ur"[★*](リスト[0-9~、]+)", ur"<b>\1</b>", line)
268         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
269         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
270         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
271         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
272
273         # comment
274         if re.search(ur"^☆#", line):
275             line = ""
276
277         return line
278
279
280     def _ulist(self, line):
281         """Proccess ul"""
282         print "<ul>"
283         while re.search(ur"^・", line):
284             line = self._default_markup_rule(line)
285             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
286             line = self.input_iter.next()
287         print "</ul>\n"
288
289     def _begin_column(self, line):
290         """Proccess column"""
291         try:
292             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
293         except AttributeError:
294             str_title = ""
295
296         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
297     <tr><th>%s</th></tr>
298     <tr><td><span style="font-size: 85%%;">
299     """ % (str_title)
300         print html
301
302     def _end_column(self, line):
303         print """</span></td></tr>
304     </table>
305     """
306
307     def _list_start(self):
308         return "<pre>"
309
310     def _list_end(self):
311         return "</pre>"
312
313     def _list(self, line):
314         try:
315             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
316         except AttributeError:
317             str_title = ""
318         print "<p><b>%s</b></p>" % (str_title)
319         print self._list_start()
320
321         for line in self.input_iter:
322             line = line.strip("\n\r")
323             line = self._escape(line)
324             if re.search(ur"""^☆\+---""", line):
325                 break
326             print line
327         print self._list_end()
328
329     def _code(self, line):
330         print self._list_start()
331
332         for line in self.input_iter:
333             line = self._escape(line)
334             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
335             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
336
337             if re.search(ur"^☆\+---$", line):
338                 break
339             print line,
340         print self._list_end()
341
342     def _inline(self, line):
343         for line in self.input_iter:
344             #        line = line.strip()
345             if re.search(ur"^☆}}}", line):
346                 break
347             print line
348
349     def _comment(self, line):
350         for line in self.input_iter:
351             line = line.strip()
352             if re.search(ur"^☆}}}", line):
353                 break
354
355     def _space(self, line):
356         print "<br><br>"
357
358     def _flow(self, line):
359         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
360         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
361 """
362         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
363 %s
364 </div>"""
365         flow_footer = """</div>
366 """
367         flow_item = """<div>
368 %s
369 <p>%s</p>
370 </div>
371
372 """
373         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
374
375         rex_title = re.compile(ur"^☆flow\s+(.*)$")
376         if rex_title.search(line):
377             title = rex_title.search(line).group(1)
378         else:
379             title = ""
380
381         rex_file = re.compile(ur"^([^:]*):(.*)$")
382         outputs = []
383         for line in self.input_iter:
384             if re.search(r"^\s*$", line):
385                 break
386             match = rex_file.search(line)
387             if match:
388                 file = os.path.join(self._image_dir, match.group(1))
389                 cap = match.group(2)
390             else:
391                 continue
392             fig = self._anchored_fig(file, cap)
393             outputs.append(flow_item % (fig, cap))
394
395         print flow_header
396         print flow_title % (title,)
397         print arrow.join(outputs)
398         print flow_footer
399         
400
401     def _fig_start(self, cap="", styles=[], width=0, height=0):
402         params = dict(style="", tablewidth="")
403         if width != 0:
404             params["style"] = "width:%d;" % (width,)
405             params["tablewidth"] = 'width="%d"' % (width,)
406
407         if "lfloat" in styles:
408             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
409     <tr> <td valign="top" align="center">
410     """ % params
411         elif "left" in styles:
412             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
413     <tr> <td valign="top" align="center">
414     """ %params
415         else:
416             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
417     <tr> <td valign="top" align="center">
418     """ % params
419
420     def _fig_end(self, cap="", styles=[]):
421         return """</td> </tr>
422     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
423     %s
424     </span></td> </tr>
425     </table>
426     """ % (cap)
427
428     def _fig(self, line):
429         if self._release == 1:
430             self._fig_release(line)
431         else:
432             self._fig(line)
433
434     def _get_png_geom(self, filepath):
435         desc = deterfile.file(filepath)
436         m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
437         if m:
438             w = m.group(1)
439             h = m.group(2)
440             return (int(w), int(h))
441         else:
442             return None
443
444     def _fig(self, line):
445         try:
446             str_title = re.search(ur"^☆(図.*)$", line).group(1)
447         except AttributeError:
448             str_title = ""
449         if str_title.find(u"図*") == 0:
450             str_title = str_title.replace(u"図*", "")
451         line = self.input_iter.next()
452         styles = []
453         if line.find("@") == 0:
454             styles = line.strip().replace("@", "").split(",")
455             line = self.input_iter.next()
456
457         imgname = ""
458         imgname_s = ""
459         hash = ""
460         hash_s = ""
461         match_o1 = re.search(ur"<([^,]*?)>", line)
462         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
463         if not match_o1 == None:
464             imgname = match_o1.group(1)
465             imgname = os.path.join(self._image_dir, imgname)
466             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
467         elif not match_o2 == None:
468             imgname = match_o1.group(1)
469             imgname = os.path.join(self._image_dir, imgname)
470             imgname_s = match_o1.group(2)
471
472         geom = self._get_png_geom(imgname_s)
473         if geom:
474             w = geom[0]
475             h = geom[1]
476             print self._fig_start("", styles, width=w, height=h)
477         else:
478             print self._fig_start("", styles)
479         print self._anchored_fig(imgname, str_title, imgname_s)
480         print self._fig_end(str_title, styles);
481
482         dic = self.index("figs")
483         dic.append(imgname)
484         if imgname_s != "":
485             dic.append(imgname_s)
486
487         
488     def _anchored_fig(self, file, alt, file_s=""):
489
490         if file_s == "":
491             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
492
493         if not os.path.isfile(file_s):
494             file_s = file
495
496         return """<a href="%s">
497   <img src="%s" alt="%s">
498 </a>
499 """ % (file, file_s, alt)
500         
501
502     def _fig_release(self, line):
503         try:
504             str_title = re.search(ur"^☆(図.*)$", line).group(1)
505         except AttributeError:
506             str_title = ""
507         print self._fig_start()
508
509         line = self.input_iter.next()
510         imgname = ""
511         imgname_s = ""
512         hash = ""
513         hash_s = ""
514         match_o1 = re.search(ur"<([^,]*?)>", line)
515         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
516         if not match_o1 == None:
517             imgname = match_o1.group(1)
518             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
519         elif not match_o2 == None:
520             imgname = match_o1.group(1)
521             imgname_s = match_o1.group(2)
522
523
524         hash = self.hashlist.get(imgname, "")
525         hash_s = self.hashlist.get(imgname_s, "")
526         if hash_s == "":
527             hash_s = hash
528
529         print """<a href="/blob.pl?id=%s">
530      <slash type="image" id="%s" title="%s">
531      </a>
532      """ % (hash, hash_s, str_title)
533         
534
535         dic = self.index("figs")
536         dic.append(imgname)
537         if imgname_s != "":
538             dic.append(imgname_s)
539
540         print self._fig_end(str_title);
541
542
543     def _table_start(self, cap):
544         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table">
545     <caption><b>%s</b></caption>
546     """ % cap
547
548     def _table_end(self, footnote=""):
549         return "</table>\n%s</div>\n" % (footnote,)
550
551     def _table(self, line):
552         str_title = ""
553         self._table_buf1 = ""
554
555         try:
556             str_title = re.search(ur"^☆(表.*)$", line).group(1)
557             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
558         except AttributeError:
559             str_title = ""
560             fig_name = ""
561         if str_title.find(u"表*") == 0:
562             str_title = str_title.replace(u"表*", "")
563
564         print self._table_start(str_title)
565         self._table_buf1 =  self._table_start(str_title)
566
567         num_row = 0
568         table_contents = []
569         footnote = ""
570         for line in self.input_iter:
571             line = line.strip(" \n")
572             line = self._default_markup_rule(line)
573             if re.search(ur"^\s*$", line):
574                 break
575             if re.search(ur"^※", line):
576                 footnote = re.search(ur"^(※.*)$", line).group(1)
577                 break
578             line = self._default_markup_rule(line)
579             if re.search(ur"^〓", line):
580                 line = re.sub(ur"^〓", "", line)
581                 tag_mode = "th"
582             else:
583                 tag_mode = "td"
584             table_contents.append([])
585             num_col = 0
586             for item in line.split("\t"):
587                 if item == "":
588                     if num_col == 0:
589                         n = 1
590                         try:
591                             while table_contents[num_row-n][num_col]["item"] == "":
592                                 n += 1
593                             table_contents[num_row-n][num_col]["row"] += 1
594                         except IndexError:
595                             pass
596                     else:
597                         n = 1
598                         try:
599                             while table_contents[num_row][num_col-n]["item"] == "":
600                                 n += 1
601                             table_contents[num_row][num_col-n]["col"] += 1
602                         except IndexError:
603                             pass
604
605                 table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
606                 num_col = num_col + 1
607             num_row = num_row + 1
608
609         for row_item in table_contents:
610             line = "<tr>"
611             for item in row_item:
612                 if item["item"] == "":
613                     continue
614                 line = line + "<" + item["tag"]
615                 if not item["row"] == 1:
616                     line = line + (' rowspan="%s"' % item["row"])
617                 if not item["col"] == 1:
618                     line = line + (' colspan="%s"' % item["col"])
619                 line = line +  ">"
620                 line = line + item["item"]
621                 line = line + "</" + item["tag"] + ">"
622             line = line + "</tr>\n"
623             print line,
624             self._table_buf1 = self._table_buf1 + line
625
626             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
627             # line = line.replace("\t", "</th><th>")
628             # print line
629             # else:
630             # line = "<tr><td>" + line + "</td></tr>"
631             # line = line.replace("\t", "</td><td>")
632             # print line
633
634         print self._table_end(footnote)
635         self._table_buf1 =  self._table_buf1 + self._table_end()
636         if self.index_haskey("tables"):
637             self.index("tables")[fig_name] = self._table_buf1
638         else:
639             self.index_add("tables", {fig_name:self._table_buf1})
640
641     def _call_tables(self, line):
642         try:
643             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
644         except AttributeError:
645             return
646         print self.index("tables")[fig_name]