markupper2.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 """markupper2.py - markupper v2."""
   4
   5 import sys
   6 import os
   7 import re
   8 import codecs
   9 import pickle
  10
  11 import deterfile
  12
  13 class MarkUpper2(object):
  14     """
  15     """
  16     def __init__(self):
  17         self._vars = {}
  18         self._anchor = ""
  19
  20     def markup(self, input_iter):
  21         """
  22         Do markup.
  23
  24         @param input_iter: iterator to use as input
  25         @type input_iter: iterator
  26         """
  27         self.input = input_iter
  28
  29         for line in self.input:
  30             if re.search(ur"^☆{{{$", line):
  31                 self._inline(line)
  32                 continue
  33             elif re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line):
  34                 g = re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line)
  35                 self._vars[g.group(1)] = g.group(2)
  36                 continue
  37             elif re.search(ur"^☆image_border:\s(on|On|ON)", line):
  38                 self._image_border = 1
  39                 continue
  40             elif re.search(ur"^☆comment\s{{{$", line):
  41                 self._comment(line)
  42                 continue
  43             elif re.search(ur"^☆\*", line):
  44                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
  45                 continue
  46             elif re.search(ur"^☆clear\s+", line):
  47                 self._clear(line)
  48                 continue
  49             elif re.search(ur"^・", line):
  50                 self._ulist(line)
  51                 continue
  52             elif re.search(ur"^[0-9]\.", line):
  53                 self._olist(line)
  54                 continue
  55             elif re.search(ur"^☆begin-column:", line):
  56                 self._begin_column(line)
  57                 continue
  58             elif re.search(ur"^☆end-column", line):
  59                 self._end_column(line)
  60                 continue
  61             elif re.search(ur"^☆space", line):
  62                 self._space(line)
  63                 continue
  64             elif re.search(ur"^☆call_tables", line):
  65                 self._call_tables(line)
  66                 continue
  67             elif re.search(ur"^●", line):
  68                 self._head_l(line)
  69                 continue
  70             elif re.search(ur"^○", line):
  71                 self._head_m(line)
  72                 continue
  73             elif re.search(ur"^☆----", line):
  74                 self._newpage(line)
  75                 continue
  76             elif re.search(ur"^☆\+---", line):
  77                 self._code(line)
  78                 continue
  79             elif re.search(ur"^☆表", line):
  80                 self._table(line)
  81                 continue
  82             elif re.search(ur"^☆図", line):
  83                 self._fig(line)
  84                 continue
  85             elif re.search(ur"^☆写真", line):
  86                 self._photo(line)
  87                 continue
  88             elif re.search(ur"^☆リスト", line):
  89                 self._list(line)
  90                 continue
  91             elif re.search(ur"^☆flow", line):
  92                 self._flow(line)
  93                 continue
  94
  95             if re.search(ur"^　", line):
  96                 self._paragraph(line)
  97                 continue
  98
  99
 100             if re.search(r"^\s*$", line):
 101                 line = ""
 102
 103             line = line.strip()
 104             print line
 105
 106         # end-of-loop
 107
 108     def _clear(self, line):
 109         print """<div style="clear:left;"> </div>
 110 """
 111
 112     def _head_l(self, line):
 113         line = line.rstrip()
 114         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 115             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 116             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 117
 118         line = self._default_markup_rule(line)
 119         if self._anchor != "":
 120             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
 121             self._anchor = ""
 122         else:
 123             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
 124         print line
 125
 126     def _head_m(self, line):
 127         line = line.rstrip()
 128         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 129             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 130             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 131
 132         line = self._default_markup_rule(line)
 133         if self._anchor != "":
 134             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
 135             self._anchor = ""
 136         else:
 137             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
 138         print line
 139
 140     def _paragraph(self, line):
 141         line = self._default_markup_rule(line)
 142         line = "<p>" + line + "</p>"
 143         print line
 144
 145     def _newpage(self, line):
 146         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
 147         print line
 148
 149     def make_hashlist(self, path_to_hashfile):
 150         """
 151         create hash list.
 152
 153         @param path_to_hashfile: hashfile's path
 154         @type path_to_hashfile: string
 155         """
 156         try:
 157             file_img_hash = open(path_to_hashfile, "r")
 158         except IOError:
 159             sys.stderr.write("cannot open file: %s" % path_img_hash)
 160             return None;
 161
 162         self.hashlist = {};
 163         for line in file_img_hash:
 164             splited = line.strip().split("\t", 2)
 165             # hashlist's format: <hash> \t <filename>
 166             self.hashlist[splited[1]] = splited[0]
 167
 168     def _call_tagles(self):
 169         pass
 170
 171     def _escape(self, line):
 172         line = re.sub(ur"&", ur"&amp;", line)
 173         line = re.sub(ur"<", ur"&lt;", line)
 174         line = re.sub(ur">", ur"&gt;", line)
 175         return line
 176
 177     def _default_markup_rule(self, line):
 178         """
 179         apply default markup rules.
 180
 181         @param line: string to apply markup
 182         @type line: string
 183         """
 184         line = self._escape(line)
 185
 186         # apply filter
 187         # line = tag_filter.apply(line)
 188
 189         line = re.sub(ur"[★*](表[0-9～、]+)", ur"<b>\1</b>", line)
 190         line = re.sub(ur"[★*](図[0-9～、]+)", ur"<b>\1</b>", line)
 191         line = re.sub(ur"[★*](写真[0-9～、]+)", ur"<b>\1</b>", line)
 192         line = re.sub(ur"[★*](リスト[0-9～、]+)", ur"<b>\1</b>", line)
 193         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
 194         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
 195         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
 196         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
 197
 198         # comment
 199         if re.search(ur"^☆#", line):
 200             line = ""
 201
 202         return line
 203
 204
 205     def _ulist(self, line):
 206         """Proccess ul"""
 207         print "<ul>"
 208         while re.search(ur"^・", line):
 209             line = self._default_markup_rule(line)
 210             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
 211             line = self.input.next()
 212         print "</ul>\n"
 213
 214     def _olist(self, line):
 215         """Proccess ul"""
 216         print "<ol>"
 217         while re.search(ur"^[0-9]+\.", line):
 218             line = self._default_markup_rule(line)
 219             print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
 220             line = self.input.next()
 221         print "</ol>\n"
 222
 223
 224
 225     def _begin_column(self, line):
 226         """Proccess column"""
 227         try:
 228             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
 229         except AttributeError:
 230             str_title = ""
 231
 232         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
 233     <tr><th>%s</th></tr>
 234     <tr><td><span style="font-size: 85%%;">
 235     """ % (str_title)
 236         print html
 237
 238     def _end_column(self, line):
 239         print """</span></td></tr>
 240     </table>
 241     """
 242
 243     def _list_start(self):
 244         return "<pre>"
 245
 246     def _list_end(self):
 247         return "</pre>"
 248
 249     def _list(self, line):
 250         try:
 251             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
 252         except AttributeError:
 253             str_title = ""
 254         print "<p><b>%s</b></p>" % (str_title)
 255         print self._list_start()
 256
 257         for line in self.input:
 258             line = line.strip("\n\r")
 259             line = self._escape(line)
 260             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
 261             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
 262             if re.search(ur"""^☆\+---""", line):
 263                 break
 264             print line
 265         print self._list_end()
 266
 267     def _code(self, line):
 268         print self._list_start()
 269
 270         for line in self.input:
 271             line = self._escape(line)
 272             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
 273             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
 274
 275             if re.search(ur"^☆\+---$", line):
 276                 break
 277             print line,
 278         print self._list_end()
 279
 280     def _inline(self, line):
 281         for line in self.input:
 282             #        line = line.strip()
 283             if re.search(ur"^☆}}}", line):
 284                 break
 285             print line
 286
 287     def _comment(self, line):
 288         for line in self.input:
 289             line = line.strip()
 290             if re.search(ur"^☆}}}", line):
 291                 break
 292
 293     def _space(self, line):
 294         print "<br><br>"
 295
 296     def _flow(self, line):
 297         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
 298         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
 299 """
 300         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
 301 %s
 302 </div>"""
 303         flow_footer = """</div>
 304 """
 305         flow_item = """<div>
 306 %s
 307 <p>%s</p>
 308 </div>
 309
 310 """
 311         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
 312
 313         rex_title = re.compile(ur"^☆flow\s+(.*)$")
 314         if rex_title.search(line):
 315             title = rex_title.search(line).group(1)
 316         else:
 317             title = ""
 318
 319         rex_file = re.compile(ur"^([^:]*):(.*)$")
 320         outputs = []
 321         for line in self.input:
 322             if re.search(r"^\s*$", line):
 323                 break
 324             match = rex_file.search(line)
 325             if match:
 326                 file = os.path.join(self._image_dir, match.group(1))
 327                 cap = self._default_markup_rule(match.group(2))
 328             else:
 329                 continue
 330             fig = self._anchored_fig(file, cap)
 331             outputs.append(flow_item % (fig, cap))
 332
 333         print flow_header
 334         print flow_title % (title,)
 335         print arrow.join(outputs)
 336         print flow_footer
 337
 338
 339     def _fig_start(self, cap="", styles=[], width=0, height=0):
 340         params = dict(style="", tablewidth="")
 341         if width != 0:
 342             params["style"] = "width:%d;" % (width,)
 343             params["tablewidth"] = 'width="%d"' % (width,)
 344
 345         if "lfloat" in styles:
 346             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
 347     <tr> <td valign="top" align="center">
 348     """ % params
 349         elif "left" in styles:
 350             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
 351     <tr> <td valign="top" align="center">
 352     """ %params
 353         else:
 354             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
 355     <tr> <td valign="top" align="center">
 356     """ % params
 357
 358     def _fig_end(self, cap="", styles=[]):
 359         return """</td> </tr>
 360     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
 361     %s
 362     </span></td> </tr>
 363     </table>
 364     """ % (cap)
 365
 366     def _get_png_geom(self, filepath):
 367         desc = deterfile.file(filepath)
 368         try:
 369             m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
 370         except IndexError:
 371             err = ",".join(desc)
 372             raise Exception("deterfile error: %s, file: %s" % (err,filepath))
 373         if m:
 374             w = m.group(1)
 375             h = m.group(2)
 376             return (int(w), int(h))
 377         else:
 378             return None
 379
 380     def _fig(self, line):
 381         try:
 382             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 383         except AttributeError:
 384             str_title = ""
 385         if str_title.find(u"図*") == 0:
 386             str_title = str_title.replace(u"図*", "")
 387         line = self.input.next()
 388         styles = []
 389         if line.find("@") == 0:
 390             styles = line.strip().replace("@", "").split(",")
 391             line = self.input.next()
 392
 393         if line[0] == "<":
 394             imgname = ""
 395             imgname_s = ""
 396             hash = ""
 397             hash_s = ""
 398             match_o1 = re.search(ur"<([^,]*?)>", line)
 399             match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 400             if not match_o1 == None:
 401                 imgname = match_o1.group(1)
 402                 imgname = os.path.join(self._image_dir, imgname)
 403                 imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
 404             elif not match_o2 == None:
 405                 imgname = match_o2.group(1)
 406                 imgname = os.path.join(self._image_dir, imgname)
 407                 imgname_s = match_o2.group(2)
 408                 imgname_s = os.path.join(self._image_dir, imgname_s)
 409
 410             geom = self._get_png_geom(imgname_s)
 411             if geom:
 412                 w = geom[0]
 413                 h = geom[1]
 414                 print self._fig_start("", styles, width=w, height=h)
 415             else:
 416                 print self._fig_start("", styles)
 417             print self._anchored_fig(imgname, str_title, imgname_s)
 418             print self._fig_end(str_title, styles);
 419         else:
 420             hash = ""
 421             hash_s = ""
 422             match_o = re.search(ur"\[(.*?),\s*(.*?)\]", line)
 423             try:
 424                 print """<table width="500" align="center" border="0" cellpadding="0" cellspacing="0">
 425     <tr> <td valign="top" align="center">
 426 """
 427                 print """<a href="/blob.pl?id=%s">
 428   <slash-image id="%s" title="%s">
 429 </a>""" % (match_o.group(1), match_o.group(2), str_title)
 430                 print """</td> </tr>
 431     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
 432     %s
 433     </span></td> </tr>
 434     </table>
 435 """ % (str_title)
 436             except IndexError:
 437                 print >> sys.stderr, "error:cannot parse id. " + line
 438
 439     def _photo(self, line):
 440         try:
 441             str_title = re.search(ur"^☆(写真.*)$", line).group(1)
 442         except AttributeError:
 443             str_title = ""
 444         if str_title.find(u"写真*") == 0:
 445             str_title = str_title.replace(u"写真*", "")
 446         line = self.input.next()
 447         styles = []
 448         if line.find("@") == 0:
 449             styles = line.strip().replace("@", "").split(",")
 450             line = self.input.next()
 451
 452         imgname = ""
 453         imgname_s = ""
 454         hash = ""
 455         hash_s = ""
 456         match_o1 = re.search(ur"<([^,]*?)>", line)
 457         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 458         if not match_o1 == None:
 459             imgname = match_o1.group(1)
 460             imgname = os.path.join(self._image_dir, imgname)
 461             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
 462         elif not match_o2 == None:
 463             imgname = match_o2.group(1)
 464             imgname = os.path.join(self._image_dir, imgname)
 465             imgname_s = match_o2.group(2)
 466             imgname_s = os.path.join(self._image_dir, imgname_s)
 467
 468         geom = self._get_png_geom(imgname_s)
 469         if geom:
 470             w = geom[0]
 471             h = geom[1]
 472             print self._fig_start("", styles, width=w, height=h)
 473         else:
 474             print self._fig_start("", styles)
 475         print self._anchored_fig(imgname, str_title, imgname_s)
 476         print self._fig_end(str_title, styles);
 477
 478
 479
 480     def _anchored_fig(self, file, alt, file_s=""):
 481
 482         if file_s == "":
 483             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
 484
 485         if not os.path.isfile(file_s):
 486             file_s = file
 487
 488         alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
 489
 490         ret = """<a href="%s">
 491   <img src="%s" alt="%s">
 492 </a>
 493 """ % (file, file_s, alt)
 494
 495
 496         return ret
 497
 498
 499     def _fig_release(self, line):
 500         try:
 501             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 502         except AttributeError:
 503             str_title = ""
 504         print self._fig_start()
 505
 506         line = self.input.next()
 507         imgname = ""
 508         imgname_s = ""
 509         hash = ""
 510         hash_s = ""
 511         match_o1 = re.search(ur"<([^,]*?)>", line)
 512         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 513         if not match_o1 == None:
 514             imgname = match_o1.group(1)
 515             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
 516         elif not match_o2 == None:
 517             imgname = match_o1.group(1)
 518             imgname_s = match_o1.group(2)
 519
 520
 521         hash = self.hashlist.get(imgname, "")
 522         hash_s = self.hashlist.get(imgname_s, "")
 523         if hash_s == "":
 524             hash_s = hash
 525
 526         print """<a href="/blob.pl?id=%s">
 527      <slash type="image" id="%s" title="%s">
 528      </a>
 529      """ % (hash, hash_s, str_title)
 530
 531
 532
 533         print self._fig_end(str_title);
 534
 535
 536     def _table_start(self, cap):
 537         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table" width="100%%">
 538     <caption><b>%s</b></caption>
 539     """ % cap
 540
 541     def _table_end(self, footnote=""):
 542         return "</table>\n%s</div>\n" % (footnote,)
 543
 544     def _table(self, line):
 545         str_title = ""
 546         self._table_buf1 = ""
 547
 548         try:
 549             str_title = re.search(ur"^☆(表.*)$", line).group(1)
 550             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
 551         except AttributeError:
 552             str_title = ""
 553             fig_name = ""
 554         if str_title.find(u"表*") == 0:
 555             str_title = str_title.replace(u"表*", "")
 556
 557         print self._table_start(str_title)
 558         self._table_buf1 =  self._table_start(str_title)
 559
 560         num_row = 0
 561         table_contents = []
 562         footnote = ""
 563         for line in self.input:
 564             line = line.strip(" \n")
 565             line = self._default_markup_rule(line)
 566             if re.search(ur"^\s*$", line):
 567                 break
 568             if re.search(ur"^※", line):
 569                 footnote = re.search(ur"^(※.*)$", line).group(1)
 570                 break
 571             if re.search(ur"^〓", line):
 572                 line = re.sub(ur"^〓", "", line)
 573                 tag_mode = "th"
 574             else:
 575                 tag_mode = "td"
 576             table_contents.append([])
 577             num_col = 0
 578             for item in line.split("\t"):
 579                 if item == "":
 580                     if num_col == 0:
 581                         n = 1
 582                         try:
 583                             while table_contents[num_row-n][num_col]["item"] == "":
 584                                 n += 1
 585                             table_contents[num_row-n][num_col]["row"] += 1
 586                         except IndexError:
 587                             pass
 588                     else:
 589                         n = 1
 590                         try:
 591                             while table_contents[num_row][num_col-n]["item"] == "":
 592                                 n += 1
 593                             table_contents[num_row][num_col-n]["col"] += 1
 594                         except IndexError:
 595                             pass
 596                 if item == u"↓":
 597                     n = 1
 598                     try:
 599                         while table_contents[num_row-n][num_col]["item"] == "":
 600                             n += 1
 601                         table_contents[num_row-n][num_col]["row"] += 1
 602                         item = ""
 603                     except IndexError:
 604                         pass
 605
 606                 if re.search(r'^".*"$', item):
 607                     item = re.search(r'^"(.*)"$', item).group(1)
 608                     table_contents[num_row].append({"tag":"th","item":item,"row":1,"col":1})
 609                 else:
 610                     table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
 611                 num_col = num_col + 1
 612             num_row = num_row + 1
 613
 614         for row_item in table_contents:
 615             line = "<tr>"
 616             for item in row_item:
 617                 if item["item"] == "":
 618                     continue
 619                 line = line + "<" + item["tag"]
 620                 if not item["row"] == 1:
 621                     line = line + (' rowspan="%s"' % item["row"])
 622                 if not item["col"] == 1:
 623                     line = line + (' colspan="%s"' % item["col"])
 624                 line = line +  ">"
 625                 line = line + item["item"]
 626                 line = line + "</" + item["tag"] + ">"
 627             line = line + "</tr>\n"
 628             print line,
 629             self._table_buf1 = self._table_buf1 + line
 630
 631             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
 632             # line = line.replace("\t", "</th><th>")
 633             # print line
 634             # else:
 635             # line = "<tr><td>" + line + "</td></tr>"
 636             # line = line.replace("\t", "</td><td>")
 637             # print line
 638
 639         print self._table_end(footnote)
 640         self._table_buf1 =  self._table_buf1 + self._table_end()
 641
 642     def _call_tables(self, line):
 643         try:
 644             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
 645         except AttributeError:
 646             return