markupper2.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 """markupper2.py - markupper v2."""
   4
   5 import sys
   6 import os
   7 import re
   8 import codecs
   9 import pickle
  10
  11 import deterfile
  12
  13 class MarkUpper2(object):
  14     """
  15     """
  16     def __init__(self):
  17         self._vars = {}
  18         self._anchor = ""
  19
  20     def markup(self, input_iter):
  21         """
  22         Do markup.
  23
  24         @param input_iter: iterator to use as input
  25         @type input_iter: iterator
  26         """
  27         self.input = input_iter
  28
  29         for line in self.input:
  30             if re.search(ur"^☆{{{$", line):
  31                 self._inline(line)
  32                 continue
  33             elif re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line):
  34                 g = re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line)
  35                 self._vars[g.group(1)] = g.group(2)
  36                 continue
  37             elif re.search(ur"^☆image_border:\s(on|On|ON)", line):
  38                 self._image_border = 1
  39                 continue
  40             elif re.search(ur"^☆comment\s{{{$", line):
  41                 self._comment(line)
  42                 continue
  43             elif re.search(ur"^☆\*", line):
  44                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
  45                 continue
  46             elif re.search(ur"^☆clear\s+", line):
  47                 self._clear(line)
  48                 continue
  49             elif re.search(ur"^・", line):
  50                 self._ulist(line)
  51                 continue
  52             elif re.search(ur"^[0-9]\.", line):
  53                 self._olist(line)
  54                 continue
  55             elif re.search(ur"^☆begin-column:", line):
  56                 self._begin_column(line)
  57                 continue
  58             elif re.search(ur"^☆end-column", line):
  59                 self._end_column(line)
  60                 continue
  61             elif re.search(ur"^☆space", line):
  62                 self._space(line)
  63                 continue
  64             elif re.search(ur"^☆call_tables", line):
  65                 self._call_tables(line)
  66                 continue
  67             elif re.search(ur"^●", line):
  68                 self._head_l(line)
  69                 continue
  70             elif re.search(ur"^○", line):
  71                 self._head_m(line)
  72                 continue
  73             elif re.search(ur"^☆----", line):
  74                 self._newpage(line)
  75                 continue
  76             elif re.search(ur"^☆\+---", line):
  77                 self._code(line)
  78                 continue
  79             elif re.search(ur"^☆表", line):
  80                 self._table(line)
  81                 continue
  82             elif re.search(ur"^☆図", line):
  83                 self._fig(line)
  84                 continue
  85             elif re.search(ur"^☆写真", line):
  86                 self._photo(line)
  87                 continue
  88             elif re.search(ur"^☆リスト", line):
  89                 self._list(line)
  90                 continue
  91             elif re.search(ur"^☆flow", line):
  92                 self._flow(line)
  93                 continue
  94
  95             if re.search(ur"^　", line):
  96                 self._paragraph(line)
  97                 continue
  98
  99
 100             if re.search(r"^\s*$", line):
 101                 line = ""
 102
 103             line = line.strip()
 104             print line
 105
 106         # end-of-loop
 107
 108     def _clear(self, line):
 109         print """<div style="clear:left;"> </div>
 110 """
 111
 112     def _head_l(self, line):
 113         line = line.rstrip()
 114         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 115             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 116             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 117
 118         line = self._default_markup_rule(line)
 119         if self._anchor != "":
 120             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
 121             self._anchor = ""
 122         else:
 123             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
 124         print line
 125
 126     def _head_m(self, line):
 127         line = line.rstrip()
 128         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 129             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 130             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 131
 132         line = self._default_markup_rule(line)
 133         if self._anchor != "":
 134             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
 135             self._anchor = ""
 136         else:
 137             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
 138         print line
 139
 140     def _paragraph(self, line):
 141         line = self._default_markup_rule(line)
 142         line = "<p>" + line + "</p>"
 143         print line
 144
 145     def _newpage(self, line):
 146         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
 147         print line
 148
 149     def make_hashlist(self, path_to_hashfile):
 150         """
 151         create hash list.
 152
 153         @param path_to_hashfile: hashfile's path
 154         @type path_to_hashfile: string
 155         """
 156         try:
 157             file_img_hash = open(path_to_hashfile, "r")
 158         except IOError:
 159             sys.stderr.write("cannot open file: %s" % path_img_hash)
 160             return None;
 161
 162         self.hashlist = {};
 163         for line in file_img_hash:
 164             splited = line.strip().split("\t", 2)
 165             # hashlist's format: <hash> \t <filename>
 166             self.hashlist[splited[1]] = splited[0]
 167
 168     def _call_tagles(self):
 169         pass
 170
 171     def _escape(self, line):
 172         line = re.sub(ur"&", ur"&amp;", line)
 173         line = re.sub(ur"<", ur"&lt;", line)
 174         line = re.sub(ur">", ur"&gt;", line)
 175         return line
 176
 177     def _default_markup_rule(self, line):
 178         """
 179         apply default markup rules.
 180
 181         @param line: string to apply markup
 182         @type line: string
 183         """
 184         line = self._escape(line)
 185
 186         # apply filter
 187         # line = tag_filter.apply(line)
 188
 189         line = re.sub(ur"[★*](表[0-9～、]+)", ur"<b>\1</b>", line)
 190         line = re.sub(ur"[★*](図[0-9～、]+)", ur"<b>\1</b>", line)
 191         line = re.sub(ur"[★*](写真[0-9～、]+)", ur"<b>\1</b>", line)
 192         line = re.sub(ur"[★*](リスト[0-9～、]+)", ur"<b>\1</b>", line)
 193         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
 194         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
 195         line = re.sub(ur"[★*]g\[(.*?)]", ur"<span style='color:#BBBBBB'>\1</b>", line)
 196         line = re.sub(ur"[★*]g{(.*?)}", ur"<span style='color:#BBBBBB'>\1</b>", line)
 197         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
 198         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
 199
 200         # comment
 201         if re.search(ur"^☆#", line):
 202             line = ""
 203
 204         return line
 205
 206
 207     def _ulist(self, line):
 208         """Proccess ul"""
 209         print "<ul>"
 210         while re.search(ur"^・", line):
 211             line = self._default_markup_rule(line)
 212             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
 213             line = self.input.next()
 214         print "</ul>\n"
 215
 216     def _olist(self, line):
 217         """Proccess ul"""
 218         print "<ol>"
 219         while re.search(ur"^[0-9]+\.", line):
 220             line = self._default_markup_rule(line)
 221             print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
 222             line = self.input.next()
 223         print "</ol>\n"
 224
 225
 226
 227     def _begin_column(self, line):
 228         """Proccess column"""
 229         try:
 230             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
 231         except AttributeError:
 232             str_title = ""
 233
 234         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
 235     <tr><th>%s</th></tr>
 236     <tr><td><span style="font-size: 85%%;">
 237     """ % (str_title)
 238         print html
 239
 240     def _end_column(self, line):
 241         print """</span></td></tr>
 242     </table>
 243     """
 244
 245     def _list_start(self):
 246         return "<pre>"
 247
 248     def _list_end(self):
 249         return "</pre>"
 250
 251     def _list(self, line):
 252         try:
 253             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
 254         except AttributeError:
 255             str_title = ""
 256         print "<p><b>%s</b></p>" % (str_title)
 257         print self._list_start()
 258
 259         for line in self.input:
 260             line = line.strip("\n\r")
 261             line = self._escape(line)
 262             line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
 263             line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
 264             line = re.sub(ur"[★*]g\[(.*?)]", ur"<span style='color:#BBBBBB'>\1</b>", line)
 265             line = re.sub(ur"[★*]g{(.*?)}", ur"<span style='color:#BBBBBB'>\1</b>", line)
 266             if re.search(ur"""^☆\+---""", line):
 267                 break
 268             print line
 269         print self._list_end()
 270
 271     def _code(self, line):
 272         print self._list_start()
 273
 274         for line in self.input:
 275             line = self._escape(line)
 276             line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
 277             line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
 278             line = re.sub(ur"[★*]g\[(.*?)]", ur"<span style='color:#BBBBBB'>\1</b>", line)
 279             line = re.sub(ur"[★*]g{(.*?)}", ur"<span style='color:#BBBBBB'>\1</b>", line)
 280
 281             if re.search(ur"^☆\+---$", line):
 282                 break
 283             print line,
 284         print self._list_end()
 285
 286     def _inline(self, line):
 287         for line in self.input:
 288             line = line.strip()
 289             if re.search(ur"^☆}}}", line):
 290                 break
 291             print line
 292
 293     def _comment(self, line):
 294         for line in self.input:
 295             line = line.strip()
 296             if re.search(ur"^☆}}}", line):
 297                 break
 298
 299     def _space(self, line):
 300         print "<br><br>"
 301
 302     def _flow(self, line):
 303         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
 304         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
 305 """
 306         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
 307 %s
 308 </div>"""
 309         flow_footer = """</div>
 310 """
 311         flow_item = """<div>
 312 %s
 313 <p>%s</p>
 314 </div>
 315
 316 """
 317         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
 318
 319         rex_title = re.compile(ur"^☆flow\s+(.*)$")
 320         if rex_title.search(line):
 321             title = rex_title.search(line).group(1)
 322         else:
 323             title = ""
 324
 325         rex_file = re.compile(ur"^([^:]*):(.*)$")
 326         outputs = []
 327         for line in self.input:
 328             if re.search(r"^\s*$", line):
 329                 break
 330             match = rex_file.search(line)
 331             if match:
 332                 file = os.path.join(self._image_dir, match.group(1))
 333                 cap = self._default_markup_rule(match.group(2))
 334             else:
 335                 continue
 336             fig = self._anchored_fig(file, cap)
 337             outputs.append(flow_item % (fig, cap))
 338
 339         print flow_header
 340         print flow_title % (title,)
 341         print arrow.join(outputs)
 342         print flow_footer
 343
 344
 345     def _fig_start(self, cap="", styles=[], width=0, height=0):
 346         params = dict(style="", tablewidth="")
 347         if width != 0:
 348             params["style"] = "width:%d;" % (width,)
 349             params["tablewidth"] = 'width="%d"' % (width,)
 350
 351         if "lfloat" in styles:
 352             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
 353     <tr> <td valign="top" align="center">
 354     """ % params
 355         elif "left" in styles:
 356             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
 357     <tr> <td valign="top" align="center">
 358     """ %params
 359         else:
 360             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
 361     <tr> <td valign="top" align="center">
 362     """ % params
 363
 364     def _fig_end(self, cap="", styles=[]):
 365         return """</td> </tr>
 366     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
 367     %s
 368     </span></td> </tr>
 369     </table>
 370     """ % (cap)
 371
 372     def _get_png_geom(self, filepath):
 373         desc = deterfile.file(filepath)
 374         try:
 375             m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
 376         except IndexError:
 377             err = ",".join(desc)
 378             raise Exception("deterfile error: %s, file: %s" % (err,filepath))
 379         if m:
 380             w = m.group(1)
 381             h = m.group(2)
 382             return (int(w), int(h))
 383         else:
 384             return None
 385
 386     def _fig(self, line):
 387         try:
 388             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 389         except AttributeError:
 390             str_title = ""
 391         if str_title.find(u"図*") == 0:
 392             str_title = str_title.replace(u"図*", "")
 393         line = self.input.next()
 394         styles = []
 395         if line.find("@") == 0:
 396             styles = line.strip().replace("@", "").split(",")
 397             line = self.input.next()
 398
 399         if line[0] == "<":
 400             imgname = ""
 401             imgname_s = ""
 402             hash = ""
 403             hash_s = ""
 404             match_o1 = re.search(ur"<([^,]*?)>", line)
 405             match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 406             if not match_o1 == None:
 407                 imgname = match_o1.group(1)
 408                 imgname = os.path.join(self._image_dir, imgname)
 409                 imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
 410             elif not match_o2 == None:
 411                 imgname = match_o2.group(1)
 412                 imgname = os.path.join(self._image_dir, imgname)
 413                 imgname_s = match_o2.group(2)
 414                 imgname_s = os.path.join(self._image_dir, imgname_s)
 415
 416             geom = self._get_png_geom(imgname_s)
 417             if geom:
 418                 w = geom[0]
 419                 h = geom[1]
 420                 print self._fig_start("", styles, width=w, height=h)
 421             else:
 422                 print self._fig_start("", styles)
 423             print self._anchored_fig(imgname, str_title, imgname_s)
 424             print self._fig_end(str_title, styles);
 425         else:
 426             hash = ""
 427             hash_s = ""
 428             match_o = re.search(ur"\[(.*?),\s*(.*?)\]", line)
 429             try:
 430                 print """<table width="500" align="center" border="0" cellpadding="0" cellspacing="0">
 431     <tr> <td valign="top" align="center">
 432 """
 433                 print """<a href="/blob.pl?id=%s">
 434   <slash-image id="%s" title="%s">
 435 </a>""" % (match_o.group(1), match_o.group(2), str_title)
 436                 print """</td> </tr>
 437     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
 438     %s
 439     </span></td> </tr>
 440     </table>
 441 """ % (str_title)
 442             except IndexError:
 443                 print >> sys.stderr, "error:cannot parse id. " + line
 444
 445     def _photo(self, line):
 446         try:
 447             str_title = re.search(ur"^☆(写真.*)$", line).group(1)
 448         except AttributeError:
 449             str_title = ""
 450         if str_title.find(u"写真*") == 0:
 451             str_title = str_title.replace(u"写真*", "")
 452         line = self.input.next()
 453         styles = []
 454         if line.find("@") == 0:
 455             styles = line.strip().replace("@", "").split(",")
 456             line = self.input.next()
 457
 458         imgname = ""
 459         imgname_s = ""
 460         hash = ""
 461         hash_s = ""
 462         match_o1 = re.search(ur"<([^,]*?)>", line)
 463         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 464         if not match_o1 == None:
 465             imgname = match_o1.group(1)
 466             imgname = os.path.join(self._image_dir, imgname)
 467             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
 468         elif not match_o2 == None:
 469             imgname = match_o2.group(1)
 470             imgname = os.path.join(self._image_dir, imgname)
 471             imgname_s = match_o2.group(2)
 472             imgname_s = os.path.join(self._image_dir, imgname_s)
 473
 474         geom = self._get_png_geom(imgname_s)
 475         if geom:
 476             w = geom[0]
 477             h = geom[1]
 478             print self._fig_start("", styles, width=w, height=h)
 479         else:
 480             print self._fig_start("", styles)
 481         print self._anchored_fig(imgname, str_title, imgname_s)
 482         print self._fig_end(str_title, styles);
 483
 484
 485
 486     def _anchored_fig(self, file, alt, file_s=""):
 487
 488         if file_s == "":
 489             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
 490
 491         if not os.path.isfile(file_s):
 492             file_s = file
 493
 494         alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
 495
 496         ret = """<a href="%s">
 497   <img src="%s" alt="%s">
 498 </a>
 499 """ % (file, file_s, alt)
 500
 501
 502         return ret
 503
 504
 505     def _fig_release(self, line):
 506         try:
 507             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 508         except AttributeError:
 509             str_title = ""
 510         print self._fig_start()
 511
 512         line = self.input.next()
 513         imgname = ""
 514         imgname_s = ""
 515         hash = ""
 516         hash_s = ""
 517         match_o1 = re.search(ur"<([^,]*?)>", line)
 518         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 519         if not match_o1 == None:
 520             imgname = match_o1.group(1)
 521             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
 522         elif not match_o2 == None:
 523             imgname = match_o1.group(1)
 524             imgname_s = match_o1.group(2)
 525
 526
 527         hash = self.hashlist.get(imgname, "")
 528         hash_s = self.hashlist.get(imgname_s, "")
 529         if hash_s == "":
 530             hash_s = hash
 531
 532         print """<a href="/blob.pl?id=%s">
 533      <slash type="image" id="%s" title="%s">
 534      </a>
 535      """ % (hash, hash_s, str_title)
 536
 537
 538
 539         print self._fig_end(str_title);
 540
 541
 542     def _table_start(self, cap):
 543         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table" width="100%%">
 544     <caption><b>%s</b></caption>
 545     """ % cap
 546
 547     def _table_end(self, footnote=""):
 548         return "</table>\n%s</div>\n" % (footnote,)
 549
 550     def _table(self, line):
 551         str_title = ""
 552         self._table_buf1 = ""
 553
 554         try:
 555             str_title = re.search(ur"^☆(表.*)$", line).group(1)
 556             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
 557         except AttributeError:
 558             str_title = ""
 559             fig_name = ""
 560         if str_title.find(u"表*") == 0:
 561             str_title = str_title.replace(u"表*", "")
 562
 563         print self._table_start(str_title)
 564         self._table_buf1 =  self._table_start(str_title)
 565
 566         num_row = 0
 567         table_contents = []
 568         footnote = ""
 569         for line in self.input:
 570             line = line.strip(" \n")
 571             line = self._default_markup_rule(line)
 572             if re.search(ur"^\s*$", line):
 573                 break
 574             if re.search(ur"^※", line):
 575                 footnote = re.search(ur"^(※.*)$", line).group(1)
 576                 break
 577             if re.search(ur"^〓", line):
 578                 line = re.sub(ur"^〓", "", line)
 579                 tag_mode = "th"
 580             else:
 581                 tag_mode = "td"
 582             table_contents.append([])
 583             num_col = 0
 584             for item in line.split("\t"):
 585                 if item == "":
 586                     if num_col == 0:
 587                         n = 1
 588                         try:
 589                             while table_contents[num_row-n][num_col]["item"] == "":
 590                                 n += 1
 591                             table_contents[num_row-n][num_col]["row"] += 1
 592                         except IndexError:
 593                             pass
 594                     else:
 595                         n = 1
 596                         try:
 597                             while table_contents[num_row][num_col-n]["item"] == "":
 598                                 n += 1
 599                             table_contents[num_row][num_col-n]["col"] += 1
 600                         except IndexError:
 601                             pass
 602                 if item == u"↓":
 603                     n = 1
 604                     try:
 605                         while table_contents[num_row-n][num_col]["item"] == "":
 606                             n += 1
 607                         table_contents[num_row-n][num_col]["row"] += 1
 608                         item = ""
 609                     except IndexError:
 610                         pass
 611
 612                 if re.search(r'^".*"$', item):
 613                     item = re.search(r'^"(.*)"$', item).group(1)
 614                     table_contents[num_row].append({"tag":"th","item":item,"row":1,"col":1})
 615                 else:
 616                     table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
 617                 num_col = num_col + 1
 618             num_row = num_row + 1
 619
 620         for row_item in table_contents:
 621             line = "<tr>"
 622             for item in row_item:
 623                 if item["item"] == "":
 624                     continue
 625                 line = line + "<" + item["tag"]
 626                 if not item["row"] == 1:
 627                     line = line + (' rowspan="%s"' % item["row"])
 628                 if not item["col"] == 1:
 629                     line = line + (' colspan="%s"' % item["col"])
 630                 line = line +  ">"
 631                 line = line + item["item"]
 632                 line = line + "</" + item["tag"] + ">"
 633             line = line + "</tr>\n"
 634             print line,
 635             self._table_buf1 = self._table_buf1 + line
 636
 637             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
 638             # line = line.replace("\t", "</th><th>")
 639             # print line
 640             # else:
 641             # line = "<tr><td>" + line + "</td></tr>"
 642             # line = line.replace("\t", "</td><td>")
 643             # print line
 644
 645         print self._table_end(footnote)
 646         self._table_buf1 =  self._table_buf1 + self._table_end()
 647
 648     def _call_tables(self, line):
 649         try:
 650             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
 651         except AttributeError:
 652             return