markupper.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import sys
   5 import os
   6 import re
   7 import codecs
   8 import pickle
   9
  10 import HTMLTagFilter
  11
  12 sys.stdin = codecs.getreader('utf_8')(sys.stdin)
  13 sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
  14
  15 alist = ["a", "a:href", "a:name", "b", "br" ]
  16 dlist = ["*"]
  17
  18 tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
  19 path_to_index = "./_markup_index"
  20
  21 class _InputStream(object):
  22     """InputStream base class."""
  23     def __init__(self):
  24         pass
  25
  26     def __iter__(self):
  27         """return Iterator"""
  28         return self
  29
  30     def next(self):
  31         """function for iterator"""
  32         pass
  33
  34
  35 class Markupper(object):
  36     """
  37     """
  38     def __init__(self):
  39         self._input_iter = None
  40         self._index_past = {}
  41         self._index = {}
  42
  43     def index_add(self, key, val):
  44         """
  45         Add key and value to index.
  46
  47         @param key:
  48         @type key:
  49
  50         @param val:
  51         @key val:
  52         """
  53         self._index[key] = val
  54
  55     def index(self, key):
  56         """
  57         Get index
  58         """
  59         return self._index[key]
  60
  61     def index_haskey(self, key):
  62         return self._index.has_key(key)
  63
  64     def markup(self, input_iter, release="0"):
  65         """
  66         Do markup.
  67
  68         @param input_iter: iterator to use as input
  69         @type input_iter: iterator
  70         """
  71         self.input_iter = input_iter
  72         self._page_counter = 1
  73         # alist = ["a", "a:href", "a:name", "b", "br" ]
  74         # dlist = ["*"]
  75         # tag_filter = HTMLTagFilter.HTMLTagFilter(HTMLTagFilter.DENY_ALLOW, alist, dlist)
  76         self._release = release
  77
  78         self.index_add("figs", [])
  79
  80         anchor = ""
  81         for line in self.input_iter:
  82             line = self._default_markup_rule(line)
  83             # head-of-line rules
  84             if re.search(ur"^☆{{{$", line):
  85                 self._inline(line)
  86                 continue
  87             elif re.search(ur"^☆comment\s{{{$", line):
  88                 self._comment(line)
  89                 continue
  90             elif re.search(ur"^☆\*", line):
  91                 self._anchor = re.sub(ur"^☆\*", "", line).strip()
  92                 continue
  93             elif re.search(ur"^・", line):
  94                 self._ulist(line)
  95                 continue
  96             elif re.search(ur"^☆begin-column:", line):
  97                 self._begin_column(line)
  98                 continue
  99             elif re.search(ur"^☆end-column", line):
 100                 self._end_column(line)
 101                 continue
 102             elif re.search(ur"^☆space", line):
 103                 self._space(line)
 104                 continue
 105             elif re.search(ur"^☆call_tables", line):
 106                 self._call_tables(line)
 107                 continue
 108             elif re.search(ur"^●", line):
 109                 if anchor != "":
 110                     line = re.sub(ur"^●(.*)$", ur'<h4 id="%s">\1</h4>' % anchor, line)
 111                     anchor = ""
 112                 else:
 113                     line = re.sub(ur"^●(.*)$", ur"<h4>\1</h4>", line)
 114                 print line
 115                 continue
 116             elif re.search(ur"^○", line):
 117                 if anchor != "":
 118                     line = re.sub(ur"^○(.*)$", ur'<b id="%s">\1</b>' % anchor, line)
 119                     anchor = ""
 120                 else:
 121                     line = re.sub(ur"^○(.*)$", ur"<b>\1</b>", line)
 122                 print line
 123                 continue
 124             elif re.search(ur"^☆----", line):
 125                 line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
 126                 print line
 127                 continue
 128             elif re.search(ur"^☆\+---", line):
 129                 self._code(line)
 130                 continue
 131             elif re.search(ur"^☆表", line):
 132                 self._table(line)
 133                 continue
 134             elif re.search(ur"^☆図", line):
 135                 self._fig(line)
 136                 continue
 137             elif re.search(ur"^☆リスト", line):
 138                 self._list(line)
 139                 continue
 140
 141             if re.search(ur"^　", line):
 142                 line = "<p>" + line + "</p>"
 143
 144             if re.search(r"^\s*$", line):
 145                 line = ""
 146
 147             print line
 148
 149         # end-of-loop
 150
 151
 152     def load_index(self, path_to_index):
 153         """
 154         load index database.
 155
 156         @param path_to_index: index db's path
 157         @type path_to_index: string
 158         """
 159         # load index
 160         try:
 161             index_file = open(path_to_index, "r")
 162             self._index_past = pickle.load(index_file)
 163             index_file.close()
 164         except IOError:
 165             sys.stderr.write("warn: cannot read index file,\n")
 166
 167     def save_index(self, path_to_index):
 168         """
 169         save index database.
 170
 171         @param path_to_index: index db's path
 172         @type path_to_index: string
 173         """
 174         # save index
 175         try:
 176             index_file = open(path_to_index, "w")
 177             pickle.dump(self._index, index_file)
 178             index_file.close()
 179         except IOError:
 180             sys.stderr.write("warn: cannot write index file,\n")
 181
 182     def make_hashlist(self, path_to_hashfile):
 183         """
 184         create hash list.
 185
 186         @param path_to_hashfile: hashfile's path
 187         @type path_to_hashfile: string
 188         """
 189         try:
 190             file_img_hash = open(path_to_hashfile, "r")
 191         except IOError:
 192             sys.stderr.write("cannot open file: %s" % path_img_hash)
 193             return None;
 194
 195         self.hashlist = {};
 196         for line in file_img_hash:
 197             splited = line.strip().split("\t", 2)
 198             # hashlist's format: <hash> \t <filename>
 199             self.hashlist[splited[1]] = splited[0]
 200
 201     def _call_tagles(self):
 202         pass
 203
 204     def _default_markup_rule(self, line):
 205         """
 206         apply default markup rules.
 207
 208         @param line: string to apply markup
 209         @type line: string
 210         """
 211         # line = re.sub(ur"&", ur"&amp", line)
 212         # line = re.sub(ur"<", ur"&lt;", line)
 213         # line = re.sub(ur">", ur"&gt;", line)
 214
 215         # apply filter
 216         # line = tag_filter.apply(line)
 217
 218         line = re.sub(ur"[★*](表[0-9～]+)", ur"<b>\1</b>", line)
 219         line = re.sub(ur"[★*](図[0-9～]+)", ur"<b>\1</b>", line)
 220         line = re.sub(ur"[★*](リスト[0-9～]+)", ur"<b>\1</b>", line)
 221         line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
 222         line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
 223
 224         # comment
 225         if re.search(ur"^☆#", line):
 226             line = ""
 227
 228         return line
 229
 230
 231     def _ulist(self, line):
 232         """Proccess ul"""
 233         print "<ul>"
 234         while re.search(ur"^・", line):
 235             print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
 236             line = self.input_iter.next()
 237         print "</ul>\n"
 238
 239     def _begin_column(self, line):
 240         """Proccess column"""
 241         try:
 242             str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
 243         except AttributeError:
 244             str_title = ""
 245
 246         html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
 247     <tr><th>%s</th></tr>
 248     <tr><td><span style="font-size: 85%%;">
 249     """ % (str_title)
 250         print html
 251
 252     def _end_column(self, line):
 253         print """</span></td></tr>
 254     </table>
 255     """
 256
 257     def _list_start(self):
 258         return "<pre>"
 259
 260     def _list_end(self):
 261         return "</pre>"
 262
 263     def _list(self, line):
 264         try:
 265             str_title = re.search("^☆(リスト.*)$", line).group(1)
 266         except AttributeError:
 267             str_title = ""
 268         print "<p><b>%s</b></p>" % (str_title)
 269         print self._list_start(line)
 270
 271         for line in self.input_iter:
 272             line = line.strip()
 273             line = line.replace("&", "&amp;")
 274             line = line.replace("<", "&lt;")
 275             line = line.replace(">", "&gt;")
 276             if line == "----":
 277                 break
 278             print line
 279         print self._list_end()
 280
 281     def _code(self, line):
 282         print self._list_start()
 283
 284         for line in self.input_iter:
 285             #        line = line.strip()
 286             line = line.replace("&", "&amp;")
 287             line = line.replace("<", "&lt;")
 288             line = line.replace(">", "&gt;")
 289             line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
 290
 291             if re.search(ur"^☆\+---$", line):
 292                 break
 293             print line,
 294         print self._list_end()
 295
 296
 297     def _inline(self, line):
 298         for line in self.input_iter:
 299             #        line = line.strip()
 300             if re.search(ur"^☆}}}", line):
 301                 break
 302             print line
 303
 304
 305     def _comment(self, line):
 306         for line in self.input_iter:
 307             line = line.strip()
 308             if re.search(ur"^☆}}}", line):
 309                 break
 310
 311     def _space(self, line):
 312         print "<br><br>"
 313
 314
 315     def _fig_start(self, cap=""):
 316         return """<table align="center" border="0" cellpadding="0" cellspacing="0">
 317     <tr> <td valign="top" align="center">
 318     """
 319
 320     def _fig_end(self, cap=""):
 321         return """</td> </tr>
 322     <tr> <td><span style="font-size: 80%%; font-weight: bold;">
 323     %s
 324     </span></td> </tr>
 325     </table>
 326     """ % (cap)
 327
 328     def _fig(self, line):
 329         if self._release == 1:
 330             self._fig_release(line)
 331         else:
 332             self._fig(line)
 333
 334
 335     def _fig(self, line):
 336         try:
 337             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 338         except AttributeError:
 339             str_title = ""
 340         print self._fig_start()
 341
 342         line = self.input_iter.next()
 343         imgname = ""
 344         imgname_s = ""
 345         hash = ""
 346         hash_s = ""
 347         match_o1 = re.search(ur"<([^,]*?)>", line)
 348         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 349         if not match_o1 == None:
 350             imgname = match_o1.group(1)
 351             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
 352         elif not match_o2 == None:
 353             imgname = match_o1.group(1)
 354             imgname_s = match_o1.group(2)
 355
 356         if not os.path.isfile(imgname_s):
 357             imgname_s = imgname
 358
 359         print """<a href="%s">
 360      <img src="%s" alt="%s">
 361 </a>
 362      """ % (imgname, imgname_s, str_title)
 363
 364
 365         dic = self.index("figs")
 366         dic.append(imgname)
 367         if imgname_s != "":
 368             dic.append(imgname_s)
 369
 370         print self._fig_end(str_title);
 371
 372
 373     def _fig_release(self, line):
 374         try:
 375             str_title = re.search(ur"^☆(図.*)$", line).group(1)
 376         except AttributeError:
 377             str_title = ""
 378         print self._fig_start()
 379
 380         line = self.input_iter.next()
 381         imgname = ""
 382         imgname_s = ""
 383         hash = ""
 384         hash_s = ""
 385         match_o1 = re.search(ur"<([^,]*?)>", line)
 386         match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
 387         if not match_o1 == None:
 388             imgname = match_o1.group(1)
 389             imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
 390         elif not match_o2 == None:
 391             imgname = match_o1.group(1)
 392             imgname_s = match_o1.group(2)
 393
 394
 395         hash = self.hashlist.get(imgname, "")
 396         hash_s = self.hashlist.get(imgname_s, "")
 397         if hash_s == "":
 398             hash_s = hash
 399
 400         print """<a href="/blob.pl?id=%s">
 401      <slash type="image" id="%s" title="%s">
 402      </a>
 403      """ % (hash, hash_s, str_title)
 404
 405
 406         dic = self.index("figs")
 407         dic.append(imgname)
 408         if imgname_s != "":
 409             dic.append(imgname_s)
 410
 411         print self._fig_end(str_title);
 412
 413
 414     def _table_start(self, cap):
 415         return """<table align="center" border="1" width="90%%">
 416     <caption><b>%s</b></caption>
 417     """ % cap
 418
 419     def _table_end(self):
 420         return "</table>\n"
 421
 422     def _table(self, line):
 423         str_title = ""
 424         self._table_buf1 = ""
 425
 426         try:
 427             str_title = re.search(ur"^☆(表.*)$", line).group(1)
 428             fig_name =  re.search(ur"^☆(表[0-9A-Z]+)", line).group(1)
 429         except AttributeError:
 430             str_title = ""
 431             fig_name = ""
 432         print self._table_start(str_title)
 433         self._table_buf1 =  self._table_start(str_title)
 434
 435         num_row = 0
 436         table_contents = []
 437         for line in self.input_iter:
 438             line = line.strip(" \n")
 439             if re.search(ur"^\s*$", line):
 440                 break
 441             line = self._default_markup_rule(line)
 442             if re.search(ur"^〓", line):
 443                 line = re.sub(ur"^〓", "", line)
 444                 tag_mode = "th"
 445             else:
 446                 tag_mode = "td"
 447             table_contents.append([])
 448             num_col = 0
 449             for item in line.split("\t"):
 450                 if item == "":
 451                     if num_col == 0:
 452                         n = 1
 453                         try:
 454                             while table_contents[num_row-n][num_col]["item"] == "":
 455                                 n += 1
 456                             table_contents[num_row-n][num_col]["row"] += 1
 457                         except IndexError:
 458                             pass
 459                     else:
 460                         n = 1
 461                         try:
 462                             while table_contents[num_row][num_col-n]["item"] == "":
 463                                 n += 1
 464                             table_contents[num_row][num_col-n]["col"] += 1
 465                         except IndexError:
 466                             pass
 467
 468                 table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
 469                 num_col = num_col + 1
 470             num_row = num_row + 1
 471
 472         for row_item in table_contents:
 473             line = "<tr>"
 474             for item in row_item:
 475                 if item["item"] == "":
 476                     continue
 477                 line = line + "<" + item["tag"]
 478                 if not item["row"] == 1:
 479                     line = line + (' rowspan="%s"' % item["row"])
 480                 if not item["col"] == 1:
 481                     line = line + (' colspan="%s"' % item["col"])
 482                 line = line +  ">"
 483                 line = line + item["item"]
 484                 line = line + "</" + item["tag"] + ">"
 485             line = line + "</tr>\n"
 486             print line,
 487             self._table_buf1 = self._table_buf1 + line
 488
 489             # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
 490             # line = line.replace("\t", "</th><th>")
 491             # print line
 492             # else:
 493             # line = "<tr><td>" + line + "</td></tr>"
 494             # line = line.replace("\t", "</td><td>")
 495             # print line
 496
 497         print self._table_end()
 498         self._table_buf1 =  self._table_buf1 + self._table_end()
 499         if self.index_haskey("tables"):
 500             self.index("tables")[fig_name] = self._table_buf1
 501         else:
 502             self.index_add("tables", {fig_name:self._table_buf1})
 503
 504     def _call_tables(self, line):
 505         try:
 506             fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
 507         except AttributeError:
 508             return
 509         print self.index("tables")[fig_name]