markupper.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import sys
   5 import os
   6 import re
   7 import codecs
   8 import pickle
   9
  10 import HTMLTagFilter
  11 import deterfile
  12
  13 #sys.stdin = codecs.getreader('utf_8')(sys.stdin)
  14 #sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
  15
  16 alist = ["a", "a:href", "a:name", "b", "br" ]
  17 dlist = ["*"]
  18
  19 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
  20 path_to_index = "./_markup_index"
  21
  22 class _InputStream(object):
  23     """InputStream base class."""
  24     def __init__(self):
  25         pass
  26
  27     def __iter__(self):
  28         """return Iterator"""
  29         return self
  30
  31     def next(self):
  32         """function for iterator"""
  33         pass
  34
  35
  36 class Markupper(object):
  37     """
  38     """
  39     def __init__(self):
  40         self._input_iter = None
  41         self._index_past = {}
  42         self._index = {}
  43         self._image_dir = ""
  44
  45     def index_add(self, key, val):
  46         """
  47         Add key and value to index.
  48
  49         @param key:
  50         @type key:
  51
  52         @param val:
  53         @key val:
  54         """
  55         self._index[key] = val
  56
  57     def index(self, key):
  58         """
  59         Get index
  60         """
  61         return self._index[key]
  62
  63     def index_haskey(self, key):
  64         return self._index.has_key(key)
  65
  66     def markup(self, input_iter, release="0"):
  67         """
  68         Do markup.
  69
  70         @param input_iter: iterator to use as input
  71         @type input_iter: iterator
  72         """
  73         self.input_iter = input_iter
  74         self._page_counter = 1
  75         # alist = ["a", "a:href", "a:name", "b", "br" ]
  76         # dlist = ["*"]
  77         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
  78         self._release = release
  79
  80         self.index_add("figs", [])
  81
  82         self._anchor = ""
  83         for line in self.input_iter:
  84             # line = self._default_markup_rule(line)
  85             # head-of-line rules
  86             if re.search(ur"^☆{{{$", line):
  87                 self._inline(line)
  88                 continue
  89             elif re.search(ur"^☆image_dir:", line):
  90                 self._image_dir = re.search(ur"^☆image_dir:\s*(.*)$", line).group(1)
  91                 continue
  92             elif re.search(ur"^☆comment\s{{{$", line):
  93                 self._comment(line)
  94                 continue
  95             elif re.search(ur"^☆\*", line):
  96                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
  97                 continue
  98             elif re.search(ur"^☆clear\s+", line):
  99                 self._clear(line)
 100                 continue
 101             elif re.search(ur"^・", line):
 102                 self._ulist(line)
 103                 continue
 104             elif re.search(ur"^☆begin-column:", line):
 105                 self._begin_column(line)
 106                 continue
 107             elif re.search(ur"^☆end-column", line):
 108                 self._end_column(line)
 109                 continue
 110             elif re.search(ur"^☆space", line):
 111                 self._space(line)
 112                 continue
 113             elif re.search(ur"^☆call_tables", line):
 114                 self._call_tables(line)
 115                 continue
 116             elif re.search(ur"^●", line):
 117                 self._head_l(line)
 118                 continue
 119             elif re.search(ur"^○", line):
 120                 self._head_m(line)
 121                 continue
 122             elif re.search(ur"^☆----", line):
 123                 self._newpage(line)
 124                 continue
 125             elif re.search(ur"^☆\+---", line):
 126                 self._code(line)
 127                 continue
 128             elif re.search(ur"^☆表", line):
 129                 self._table(line)
 130                 continue
 131             elif re.search(ur"^☆図", line):
 132                 self._fig(line)
 133                 continue
 134             elif re.search(ur"^☆リスト", line):
 135                 self._list(line)
 136                 continue
 137             elif re.search(ur"^☆flow", line):
 138                 self._flow(line)
 139                 continue
 140
 141             if re.search(ur"^　", line):
 142                 self._paragraph(line)
 143                 continue
 144
 145
 146             if re.search(r"^\s*$", line):
 147                 line = ""
 148
 149             line = line.strip()
 150             print line
 151
 152         # end-of-loop
 153
 154     def _clear(self, line):
 155         print """<div style="clear:left;"> </div>
 156 """
 157
 158     def _head_l(self, line):
 159         line = line.rstrip()
 160         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 161             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 162             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 163
 164         line = self._default_markup_rule(line)
 165         if self._anchor != "":
 166             line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
 167             self._anchor = ""
 168         else:
 169             line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
 170         print line
 171
 172     def _head_m(self, line):
 173         line = line.rstrip()
 174         if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
 175             self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
 176             line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
 177
 178         line = self._default_markup_rule(line)
 179         if self._anchor != "":
 180             line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
 181             self._anchor = ""
 182         else:
 183             line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
 184         print line
 185
 186     def _paragraph(self, line):
 187         line = self._default_markup_rule(line)
 188         line = "<p>" + line + "</p>"
 189         print line
 190
 191     def _newpage(self, line):
 192         line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
 193         print line
 194
 195     def load_index(self, path_to_index):
 196         """
 197         load index database.
 198
 199         @param path_to_index: index db's path
 200         @type path_to_index: string
 201         """
 202         # load index
 203         try:
 204             index_file = open(path_to_index, "r")
 205             self._index_past = pickle.load(index_file)
 206             index_file.close()
 207         except IOError:
 208             sys.stderr.write("warn: cannot read index file,\n")
 209
 210     def save_index(self, path_to_index):
 211         """
 212         save index database.
 213
 214         @param path_to_index: index db's path
 215         @type path_to_index: string
 216         """
 217         # save index
 218         try:
 219             index_file = open(path_to_index, "w")
 220             pickle.dump(self._index, index_file)
 221             index_file.close()
 222         except IOError:
 223             sys.stderr.write("warn: cannot write index file,\n")
 224
 225     def make_hashlist(self, path_to_hashfile):
 226         """
 227         create hash list.
 228
 229         @param path_to_hashfile: hashfile's path
 230         @type path_to_hashfile: string
 231         """
 232         try:
 233             file_img_hash = open(path_to_hashfile, "r")
 234         except IOError:
 235             sys.stderr.write("cannot open file: %s" % path_img_hash)
 236             return None;
 237
 238         self.hashlist = {};
 239         for line in file_img_hash:
 240             splited = line.strip().split("\t", 2)
 241             # hashlist's format: <hash> \t <filename>
 242             self.hashlist[splited[1]] = splited[0]
 243
 244     def _call_tagles(self):
 245         pass
 246
 247     def _escape(self, line):
 248         line = re.sub(ur"&", ur"&amp", line)
 249         line = re.sub(ur"<", ur"&lt;", line)
 250         line = re.sub(ur">", ur"&gt;", line)
 251         return line
 252
 253     def _default_markup_rule(self, line):
 254         """
 255         apply default markup rules.
 256
 257         @param line: string to apply markup
 258         @type line: string
 259         """
 260         line = self._escape(line)
 261
 262         # apply filter
 263         # line = tag_filter.apply(line)
 264
 265         line = re.sub(ur"[★*](表[0-9～、]+)", ur"<b>\1</b>", line)
 266         line = re.sub(ur"[★*](図[0-9～、]+)", ur"<b>\1</b>", line)
 267         line = re.sub(ur"[★*](リスト[0-9～、]+)", ur"<b>\1</b>", line)
 268         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
 269         line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
 270         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
 271         line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
 272
 273         # comment
 274         if re.search(ur"^☆#", line):
 275             line = ""
 276
 277         return line
 278
 279
 280     def _ulist(self, line):
 281         """Proccess ul"""
 282         print "<ul>"
 283         while re.search(ur"^・", line):
 284             line = self._default_markup_rule(line)
 285             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
 286             line = self.input_iter.next()
 287         print "</ul>\n"
 288
 289     def _begin_column(self, line):
 290         """Proccess column"""
 291         try:
 292             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
 293         except AttributeError:
 294             str_title = ""
 295
 296         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
 297     <tr><th>%s</th></tr>
 298     <tr><td><span style="font-size: 85%%;">
 299     """ % (str_title)
 300         print html
 301
 302     def _end_column(self, line):
 303         print """</span></td></tr>
 304     </table>
 305     """
 306
 307     def _list_start(self):
 308         return "<pre>"
 309
 310     def _list_end(self):
 311         return "</pre>"
 312
 313     def _list(self, line):
 314         try:
 315             str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
 316         except AttributeError:
 317             str_title = ""
 318         print "<p><b>%s</b></p>" % (str_title)
 319         print self._list_start()
 320
 321         for line in self.input_iter:
 322             line = line.strip("\n\r")
 323             line = self._escape(line)
 324             if re.search(ur"""^☆\+---""", line):
 325                 break
 326             print line
 327         print self._list_end()
 328
 329     def _code(self, line):
 330         print self._list_start()
 331
 332         for line in self.input_iter:
 333             line = self._escape(line)
 334             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
 335             line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
 336
 337             if re.search(ur"^☆\+---$", line):
 338                 break
 339             print line,
 340         print self._list_end()
 341
 342     def _inline(self, line):
 343         for line in self.input_iter:
 344             #        line = line.strip()
 345             if re.search(ur"^☆}}}", line):
 346                 break
 347             print line
 348
 349     def _comment(self, line):
 350         for line in self.input_iter:
 351             line = line.strip()
 352             if re.search(ur"^☆}}}", line):
 353                 break
 354
 355     def _space(self, line):
 356         print "<br><br>"
 357
 358     def _flow(self, line):
 359         down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
 360         flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
 361 """
 362         flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
 363 %s
 364 </div>"""
 365         flow_footer = """</div>
 366 """
 367         flow_item = """<div>
 368 %s
 369 <p>%s</p>
 370 </div>
 371
 372 """
 373         arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
 374
 375         rex_title = re.compile(ur"^☆flow\s+(.*)$")
 376         if rex_title.search(line):
 377             title = rex_title.search(line).group(1)
 378         else:
 379             title = ""
 380
 381         rex_file = re.compile(ur"^([^:]*):(.*)$")
 382         outputs = []
 383         for line in self.input_iter:
 384             if re.search(r"^\s*$", line):
 385                 break
 386             match = rex_file.search(line)
 387             if match:
 388                 file = os.path.join(self._image_dir, match.group(1))
 389                 cap = match.group(2)
 390             else:
 391                 continue
 392             fig = self._anchored_fig(file, cap)
 393             outputs.append(flow_item % (fig, cap))
 394
 395         print flow_header
 396         print flow_title % (title,)
 397         print arrow.join(outputs)
 398         print flow_footer
 399
 400
 401     def _fig_start(self, cap="", styles=[], width=0, height=0):
 402         params = dict(style="", tablewidth="")
 403         if width != 0:
 404             params["style"] = "width:%d;" % (width,)
 405             params["tablewidth"] = 'width="%d"' % (width,)
 406
 407         if "lfloat" in styles:
 408             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
 409     <tr> <td valign="top" align="center">
 410     """ % params
 411         elif "left" in styles:
 412             return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
 413     <tr> <td valign="top" align="center">
 414     """ %params
 415         else:
 416             return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
 417     <tr> <td valign="top" align="center">
 418     """ % params
 419
 420     def _fig_end(self, cap="", styles=[]):
 421         return """</td> </tr>
 422     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
 423     %s
 424     </span></td> </tr>
 425     </table>
 426     """ % (cap)
 427
 428     def _fig(self, line):
 429         if self._release == 1:
 430             self._fig_release(line)
 431         else:
 432             self._fig(line)
 433
 434     def _get_png_geom(self, filepath):
 435         desc = deterfile.file(filepath)
 436         m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
 437         if m:
 438             w = m.group(1)
 439             h = m.group(2)
 440             return (int(w), int(h))
 441         else:
 442             return None
 443
 444     def _fig(self, line):
 445         try:
 446             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 447         except AttributeError:
 448             str_title = ""
 449         if str_title.find(u"図*") == 0:
 450             str_title = str_title.replace(u"図*", "")
 451         line = self.input_iter.next()
 452         styles = []
 453         if line.find("@") == 0:
 454             styles = line.strip().replace("@", "").split(",")
 455             line = self.input_iter.next()
 456
 457         imgname = ""
 458         imgname_s = ""
 459         hash = ""
 460         hash_s = ""
 461         match_o1 = re.search(ur"<([^,]*?)>", line)
 462         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 463         if not match_o1 == None:
 464             imgname = match_o1.group(1)
 465             imgname = os.path.join(self._image_dir, imgname)
 466             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
 467         elif not match_o2 == None:
 468             imgname = match_o1.group(1)
 469             imgname = os.path.join(self._image_dir, imgname)
 470             imgname_s = match_o1.group(2)
 471
 472         geom = self._get_png_geom(imgname_s)
 473         if geom:
 474             w = geom[0]
 475             h = geom[1]
 476             print self._fig_start("", styles, width=w, height=h)
 477         else:
 478             print self._fig_start("", styles)
 479         print self._anchored_fig(imgname, str_title, imgname_s)
 480         print self._fig_end(str_title, styles);
 481
 482         dic = self.index("figs")
 483         dic.append(imgname)
 484         if imgname_s != "":
 485             dic.append(imgname_s)
 486
 487
 488     def _anchored_fig(self, file, alt, file_s=""):
 489
 490         if file_s == "":
 491             file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
 492
 493         if not os.path.isfile(file_s):
 494             file_s = file
 495
 496         return """<a href="%s">
 497   <img src="%s" alt="%s">
 498 </a>
 499 """ % (file, file_s, alt)
 500
 501
 502     def _fig_release(self, line):
 503         try:
 504             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 505         except AttributeError:
 506             str_title = ""
 507         print self._fig_start()
 508
 509         line = self.input_iter.next()
 510         imgname = ""
 511         imgname_s = ""
 512         hash = ""
 513         hash_s = ""
 514         match_o1 = re.search(ur"<([^,]*?)>", line)
 515         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 516         if not match_o1 == None:
 517             imgname = match_o1.group(1)
 518             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
 519         elif not match_o2 == None:
 520             imgname = match_o1.group(1)
 521             imgname_s = match_o1.group(2)
 522
 523
 524         hash = self.hashlist.get(imgname, "")
 525         hash_s = self.hashlist.get(imgname_s, "")
 526         if hash_s == "":
 527             hash_s = hash
 528
 529         print """<a href="/blob.pl?id=%s">
 530      <slash type="image" id="%s" title="%s">
 531      </a>
 532      """ % (hash, hash_s, str_title)
 533
 534
 535         dic = self.index("figs")
 536         dic.append(imgname)
 537         if imgname_s != "":
 538             dic.append(imgname_s)
 539
 540         print self._fig_end(str_title);
 541
 542
 543     def _table_start(self, cap):
 544         return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table">
 545     <caption><b>%s</b></caption>
 546     """ % cap
 547
 548     def _table_end(self, footnote=""):
 549         return "</table>\n%s</div>\n" % (footnote,)
 550
 551     def _table(self, line):
 552         str_title = ""
 553         self._table_buf1 = ""
 554
 555         try:
 556             str_title = re.search(ur"^☆(表.*)$", line).group(1)
 557             fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
 558         except AttributeError:
 559             str_title = ""
 560             fig_name = ""
 561         if str_title.find(u"表*") == 0:
 562             str_title = str_title.replace(u"表*", "")
 563
 564         print self._table_start(str_title)
 565         self._table_buf1 =  self._table_start(str_title)
 566
 567         num_row = 0
 568         table_contents = []
 569         footnote = ""
 570         for line in self.input_iter:
 571             line = line.strip(" \n")
 572             line = self._default_markup_rule(line)
 573             if re.search(ur"^\s*$", line):
 574                 break
 575             if re.search(ur"^※", line):
 576                 footnote = re.search(ur"^(※.*)$", line).group(1)
 577                 break
 578             line = self._default_markup_rule(line)
 579             if re.search(ur"^〓", line):
 580                 line = re.sub(ur"^〓", "", line)
 581                 tag_mode = "th"
 582             else:
 583                 tag_mode = "td"
 584             table_contents.append([])
 585             num_col = 0
 586             for item in line.split("\t"):
 587                 if item == "":
 588                     if num_col == 0:
 589                         n = 1
 590                         try:
 591                             while table_contents[num_row-n][num_col]["item"] == "":
 592                                 n += 1
 593                             table_contents[num_row-n][num_col]["row"] += 1
 594                         except IndexError:
 595                             pass
 596                     else:
 597                         n = 1
 598                         try:
 599                             while table_contents[num_row][num_col-n]["item"] == "":
 600                                 n += 1
 601                             table_contents[num_row][num_col-n]["col"] += 1
 602                         except IndexError:
 603                             pass
 604
 605                 table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
 606                 num_col = num_col + 1
 607             num_row = num_row + 1
 608
 609         for row_item in table_contents:
 610             line = "<tr>"
 611             for item in row_item:
 612                 if item["item"] == "":
 613                     continue
 614                 line = line + "<" + item["tag"]
 615                 if not item["row"] == 1:
 616                     line = line + (' rowspan="%s"' % item["row"])
 617                 if not item["col"] == 1:
 618                     line = line + (' colspan="%s"' % item["col"])
 619                 line = line +  ">"
 620                 line = line + item["item"]
 621                 line = line + "</" + item["tag"] + ">"
 622             line = line + "</tr>\n"
 623             print line,
 624             self._table_buf1 = self._table_buf1 + line
 625
 626             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
 627             # line = line.replace("\t", "</th><th>")
 628             # print line
 629             # else:
 630             # line = "<tr><td>" + line + "</td></tr>"
 631             # line = line.replace("\t", "</td><td>")
 632             # print line
 633
 634         print self._table_end(footnote)
 635         self._table_buf1 =  self._table_buf1 + self._table_end()
 636         if self.index_haskey("tables"):
 637             self.index("tables")[fig_name] = self._table_buf1
 638         else:
 639             self.index_add("tables", {fig_name:self._table_buf1})
 640
 641     def _call_tables(self, line):
 642         try:
 643             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
 644         except AttributeError:
 645             return
 646         print self.index("tables")[fig_name]