OSDN Git Service

create markupper2
authorhylom <hylom@users.sourceforge.jp>
Fri, 30 Oct 2009 11:55:10 +0000 (20:55 +0900)
committerhylom <hylom@users.sourceforge.jp>
Fri, 30 Oct 2009 11:55:10 +0000 (20:55 +0900)
StoryLists/retrive_storylist.py [changed mode: 0644->0755]
markup2.py [new file with mode: 0755]
markupper2.py [new file with mode: 0644]

old mode 100644 (file)
new mode 100755 (executable)
diff --git a/markup2.py b/markup2.py
new file mode 100755 (executable)
index 0000000..24e8d02
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+import os
+import codecs
+
+rootpath = r"C:\Users\hirom\bin\otptools"
+sys.path.insert(0, rootpath)
+
+import markupper2
+
+
+str_usage = "markup.pl hashfile targetfile\n"
+
+sys.stdin = codecs.getreader('utf_8')(sys.stdin)
+sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
+
+try:
+#    path_img_hash = sys.argv[1]
+    path_target = sys.argv[1]
+except IndexError:
+    sys.exit(str_usage)
+
+markupper = markupper2.MarkUpper2()
+
+file_target = codecs.open(path_target, "r", "utf_8" )
+
+markupper.markup(file_target)
diff --git a/markupper2.py b/markupper2.py
new file mode 100644 (file)
index 0000000..d0e05e9
--- /dev/null
@@ -0,0 +1,646 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""markupper2.py - markupper v2."""
+
+import sys
+import os
+import re
+import codecs
+import pickle
+
+import deterfile
+
+class MarkUpper2(object):
+    """
+    """
+    def __init__(self):
+        self._vars = {}
+        self._anchor = ""
+
+    def markup(self, input_iter):
+        """
+        Do markup.
+
+        @param input_iter: iterator to use as input
+        @type input_iter: iterator
+        """
+        self.input = input_iter
+
+        for line in self.input:
+            if re.search(ur"^☆{{{$", line):
+                self._inline(line)
+                continue
+            elif re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line):
+                g = re.search(ur'^\set{([A-Za-z0-9_]+)\s*,\s*"([^"]*)"\s*}', line)
+                self._vars[g.group(1)] = g.group(2)
+                continue
+            elif re.search(ur"^☆image_border:\s(on|On|ON)", line):
+                self._image_border = 1
+                continue
+            elif re.search(ur"^☆comment\s{{{$", line):
+                self._comment(line)
+                continue
+            elif re.search(ur"^☆\*", line):
+                self._anchor = re.sub(ur"^☆\*", "", line).strip()
+                continue
+            elif re.search(ur"^☆clear\s+", line):
+                self._clear(line)
+                continue
+            elif re.search(ur"^・", line):
+                self._ulist(line)
+                continue
+            elif re.search(ur"^[0-9]\.", line):
+                self._olist(line)
+                continue
+            elif re.search(ur"^☆begin-column:", line):
+                self._begin_column(line)
+                continue
+            elif re.search(ur"^☆end-column", line):
+                self._end_column(line)
+                continue
+            elif re.search(ur"^☆space", line):
+                self._space(line)
+                continue
+            elif re.search(ur"^☆call_tables", line):
+                self._call_tables(line)
+                continue
+            elif re.search(ur"^●", line):
+                self._head_l(line)
+                continue
+            elif re.search(ur"^○", line):
+                self._head_m(line)
+                continue
+            elif re.search(ur"^☆----", line):
+                self._newpage(line)
+                continue
+            elif re.search(ur"^☆\+---", line):
+                self._code(line)
+                continue
+            elif re.search(ur"^☆表", line):
+                self._table(line)
+                continue
+            elif re.search(ur"^☆図", line):
+                self._fig(line)
+                continue
+            elif re.search(ur"^☆写真", line):
+                self._photo(line)
+                continue
+            elif re.search(ur"^☆リスト", line):
+                self._list(line)
+                continue
+            elif re.search(ur"^☆flow", line):
+                self._flow(line)
+                continue
+
+            if re.search(ur"^ ", line):
+                self._paragraph(line)
+                continue
+
+
+            if re.search(r"^\s*$", line):
+                line = ""
+
+            line = line.strip()
+            print line
+
+        # end-of-loop
+
+    def _clear(self, line):
+        print """<div style="clear:left;"> </div>
+"""
+
+    def _head_l(self, line):
+        line = line.rstrip()
+        if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
+            self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
+            line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
+
+        line = self._default_markup_rule(line)
+        if self._anchor != "":
+            line = re.sub(ur"^●(.*)$", ur'<div id="%s"><h3>\1</h3></div>' % self._anchor, line)
+            self._anchor = ""
+        else:
+            line = re.sub(ur"^●(.*)$", ur"<h3>\1</h3>", line)
+        print line
+
+    def _head_m(self, line):
+        line = line.rstrip()
+        if re.search(ur"\*{[a-zA-Z0-9_]*}\s*$", line):
+            self._anchor = re.search(ur"\*\{([a-zA-Z0-9_]*)\}\s*$", line).group(1)
+            line = re.sub(ur"\s*\*\{[a-zA-Z0-9_]*\}\s*$", "", line)
+
+        line = self._default_markup_rule(line)
+        if self._anchor != "":
+            line = re.sub(ur"^○(.*)$", ur'<div id="%s"><h4>\1</h4></div>' % self._anchor, line)
+            self._anchor = ""
+        else:
+            line = re.sub(ur"^○(.*)$", ur"<h4>\1</h4>", line)
+        print line
+
+    def _paragraph(self, line):
+        line = self._default_markup_rule(line)
+        line = "<p>" + line + "</p>"
+        print line
+
+    def _newpage(self, line):
+        line = re.sub(ur"☆----.*-{0,1}", u"<hr>", line)
+        print line
+
+    def make_hashlist(self, path_to_hashfile):
+        """
+        create hash list.
+
+        @param path_to_hashfile: hashfile's path
+        @type path_to_hashfile: string
+        """
+        try:
+            file_img_hash = open(path_to_hashfile, "r")
+        except IOError:
+            sys.stderr.write("cannot open file: %s" % path_img_hash)
+            return None;
+
+        self.hashlist = {};
+        for line in file_img_hash:
+            splited = line.strip().split("\t", 2)
+            # hashlist's format: <hash> \t <filename>
+            self.hashlist[splited[1]] = splited[0]
+
+    def _call_tagles(self):
+        pass
+
+    def _escape(self, line):
+        line = re.sub(ur"&", ur"&amp;", line)
+        line = re.sub(ur"<", ur"&lt;", line)
+        line = re.sub(ur">", ur"&gt;", line)
+        return line
+
+    def _default_markup_rule(self, line):
+        """
+        apply default markup rules.
+
+        @param line: string to apply markup
+        @type line: string
+        """
+        line = self._escape(line)
+
+        # apply filter
+        # line = tag_filter.apply(line)
+
+        line = re.sub(ur"[★*](表[0-9~、]+)", ur"<b>\1</b>", line)
+        line = re.sub(ur"[★*](図[0-9~、]+)", ur"<b>\1</b>", line)
+        line = re.sub(ur"[★*](写真[0-9~、]+)", ur"<b>\1</b>", line)
+        line = re.sub(ur"[★*](リスト[0-9~、]+)", ur"<b>\1</b>", line)
+        line = re.sub(ur"[★*]b\[(.*?)\]", ur"<b>\1</b>", line)
+        line = re.sub(ur"[★*]b\{(.*?)\}", ur"<b>\1</b>", line)
+        line = re.sub(ur"[★*]\[(\S*) (.*?)\]", r'<a href="\1">\2</a>', line)
+        line = re.sub(ur"[★*]\[(\S*)\]", r'<a href="\1">\1</a>', line)
+
+        # comment
+        if re.search(ur"^☆#", line):
+            line = ""
+
+        return line
+
+
+    def _ulist(self, line):
+        """Proccess ul"""
+        print "<ul>"
+        while re.search(ur"^・", line):
+            line = self._default_markup_rule(line)
+            print re.sub(ur"^・(.*)$", ur"<li>\1</li>", line.strip())
+            line = self.input.next()
+        print "</ul>\n"
+
+    def _olist(self, line):
+        """Proccess ul"""
+        print "<ol>"
+        while re.search(ur"^[0-9]+\.", line):
+            line = self._default_markup_rule(line)
+            print re.sub(ur"^[0-9]+\.(.*)$", ur"<li>\1</li>", line.strip())
+            line = self.input.next()
+        print "</ol>\n"
+
+
+
+    def _begin_column(self, line):
+        """Proccess column"""
+        try:
+            str_title = re.search(ur"^☆begin-column:(.*)$", line).group(1)
+        except AttributeError:
+            str_title = ""
+
+        html = """<table bgcolor="#DDDDDD" border="0" cellpadding="6" width="95%%">
+    <tr><th>%s</th></tr>
+    <tr><td><span style="font-size: 85%%;">
+    """ % (str_title)
+        print html
+
+    def _end_column(self, line):
+        print """</span></td></tr>
+    </table>
+    """
+
+    def _list_start(self):
+        return "<pre>"
+
+    def _list_end(self):
+        return "</pre>"
+
+    def _list(self, line):
+        try:
+            str_title = re.search(ur"^☆(リスト.*)$", line).group(1)
+        except AttributeError:
+            str_title = ""
+        print "<p><b>%s</b></p>" % (str_title)
+        print self._list_start()
+
+        for line in self.input:
+            line = line.strip("\n\r")
+            line = self._escape(line)
+            line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
+            line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
+            if re.search(ur"""^☆\+---""", line):
+                break
+            print line
+        print self._list_end()
+
+    def _code(self, line):
+        print self._list_start()
+
+        for line in self.input:
+            line = self._escape(line)
+            line = line = re.sub(ur"[★*]b\[(.*?)]", ur"<b>\1</b>", line)
+            line = line = re.sub(ur"[★*]b{(.*?)}", ur"<b>\1</b>", line)
+
+            if re.search(ur"^☆\+---$", line):
+                break
+            print line,
+        print self._list_end()
+
+    def _inline(self, line):
+        for line in self.input:
+            #        line = line.strip()
+            if re.search(ur"^☆}}}", line):
+                break
+            print line
+
+    def _comment(self, line):
+        for line in self.input:
+            line = line.strip()
+            if re.search(ur"^☆}}}", line):
+                break
+
+    def _space(self, line):
+        print "<br><br>"
+
+    def _flow(self, line):
+        down_arrow = "http://static.sourceforge.jp/crystal/22x22/actions/1downarrow.png"
+        flow_header = """<div style="text-align:center; border: 1px solid; background-color:#EFF2F0; width:90%; margin: 0 auto 1em;">
+"""
+        flow_title = """<div style="text-align:left; padding:4px 4px 4px 1em; margin-bottom: 1em; border-bottom: 1px solid; font-weight: bold; background-color:#BCD;">
+%s
+</div>"""
+        flow_footer = """</div>
+"""
+        flow_item = """<div>
+%s
+<p>%s</p>
+</div>
+
+"""
+        arrow = '<div style="margin:1em auto;"><img src="%s"></div>\n' % (down_arrow,)
+
+        rex_title = re.compile(ur"^☆flow\s+(.*)$")
+        if rex_title.search(line):
+            title = rex_title.search(line).group(1)
+        else:
+            title = ""
+
+        rex_file = re.compile(ur"^([^:]*):(.*)$")
+        outputs = []
+        for line in self.input:
+            if re.search(r"^\s*$", line):
+                break
+            match = rex_file.search(line)
+            if match:
+                file = os.path.join(self._image_dir, match.group(1))
+                cap = self._default_markup_rule(match.group(2))
+            else:
+                continue
+            fig = self._anchored_fig(file, cap)
+            outputs.append(flow_item % (fig, cap))
+
+        print flow_header
+        print flow_title % (title,)
+        print arrow.join(outputs)
+        print flow_footer
+        
+
+    def _fig_start(self, cap="", styles=[], width=0, height=0):
+        params = dict(style="", tablewidth="")
+        if width != 0:
+            params["style"] = "width:%d;" % (width,)
+            params["tablewidth"] = 'width="%d"' % (width,)
+
+        if "lfloat" in styles:
+            return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0" style="float:left; padding-left: 0.5em; %(style)s">
+    <tr> <td valign="top" align="center">
+    """ % params
+        elif "left" in styles:
+            return """<table %(tablewidth)s border="0" cellpadding="0" cellspacing="0" style="padding-left: 0.5em; %(style)s">
+    <tr> <td valign="top" align="center">
+    """ %params
+        else:
+            return """<table %(tablewidth)s align="center" border="0" cellpadding="0" cellspacing="0">
+    <tr> <td valign="top" align="center">
+    """ % params
+
+    def _fig_end(self, cap="", styles=[]):
+        return """</td> </tr>
+    <tr> <td><span style="font-size: 80%%; font-weight: bold;">
+    %s
+    </span></td> </tr>
+    </table>
+    """ % (cap)
+
+    def _get_png_geom(self, filepath):
+        desc = deterfile.file(filepath)
+        try:
+            m = re.match(r"([0-9]+)\s*x\s*([0-9]+)", desc[1])
+        except IndexError:
+            err = ",".join(desc)
+            raise Exception("deterfile error: %s, file: %s" % (err,filepath))
+        if m:
+            w = m.group(1)
+            h = m.group(2)
+            return (int(w), int(h))
+        else:
+            return None
+
+    def _fig(self, line):
+        try:
+            str_title = re.search(ur"^☆(図.*)$", line).group(1)
+        except AttributeError:
+            str_title = ""
+        if str_title.find(u"図*") == 0:
+            str_title = str_title.replace(u"図*", "")
+        line = self.input.next()
+        styles = []
+        if line.find("@") == 0:
+            styles = line.strip().replace("@", "").split(",")
+            line = self.input.next()
+
+        if line[0] == "<":
+            imgname = ""
+            imgname_s = ""
+            hash = ""
+            hash_s = ""
+            match_o1 = re.search(ur"<([^,]*?)>", line)
+            match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
+            if not match_o1 == None:
+                imgname = match_o1.group(1)
+                imgname = os.path.join(self._image_dir, imgname)
+                imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
+            elif not match_o2 == None:
+                imgname = match_o2.group(1)
+                imgname = os.path.join(self._image_dir, imgname)
+                imgname_s = match_o2.group(2)
+                imgname_s = os.path.join(self._image_dir, imgname_s)
+
+            geom = self._get_png_geom(imgname_s)
+            if geom:
+                w = geom[0]
+                h = geom[1]
+                print self._fig_start("", styles, width=w, height=h)
+            else:
+                print self._fig_start("", styles)
+            print self._anchored_fig(imgname, str_title, imgname_s)
+            print self._fig_end(str_title, styles);
+        else:
+            hash = ""
+            hash_s = ""
+            match_o = re.search(ur"\[(.*?),\s*(.*?)\]", line)
+            try:
+                print """<table width="500" align="center" border="0" cellpadding="0" cellspacing="0">
+    <tr> <td valign="top" align="center">
+"""
+                print """<a href="/blob.pl?id=%s">
+  <slash-image id="%s" title="%s">
+</a>""" % (match_o.group(1), match_o.group(2), str_title)
+                print """</td> </tr>
+    <tr> <td><span style="font-size: 80%%; font-weight: bold;">
+    %s
+    </span></td> </tr>
+    </table>
+""" % (str_title)
+            except IndexError:
+                print >> sys.stderr, "error:cannot parse id. " + line
+
+    def _photo(self, line):
+        try:
+            str_title = re.search(ur"^☆(写真.*)$", line).group(1)
+        except AttributeError:
+            str_title = ""
+        if str_title.find(u"写真*") == 0:
+            str_title = str_title.replace(u"写真*", "")
+        line = self.input.next()
+        styles = []
+        if line.find("@") == 0:
+            styles = line.strip().replace("@", "").split(",")
+            line = self.input.next()
+
+        imgname = ""
+        imgname_s = ""
+        hash = ""
+        hash_s = ""
+        match_o1 = re.search(ur"<([^,]*?)>", line)
+        match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
+        if not match_o1 == None:
+            imgname = match_o1.group(1)
+            imgname = os.path.join(self._image_dir, imgname)
+            imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", imgname)
+        elif not match_o2 == None:
+            imgname = match_o2.group(1)
+            imgname = os.path.join(self._image_dir, imgname)
+            imgname_s = match_o2.group(2)
+            imgname_s = os.path.join(self._image_dir, imgname_s)
+
+        geom = self._get_png_geom(imgname_s)
+        if geom:
+            w = geom[0]
+            h = geom[1]
+            print self._fig_start("", styles, width=w, height=h)
+        else:
+            print self._fig_start("", styles)
+        print self._anchored_fig(imgname, str_title, imgname_s)
+        print self._fig_end(str_title, styles);
+
+
+        
+    def _anchored_fig(self, file, alt, file_s=""):
+
+        if file_s == "":
+            file_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", file)
+
+        if not os.path.isfile(file_s):
+            file_s = file
+
+        alt = re.sub(r"""<[A-Za-z0-9!/]+.*?>""", "", alt)
+
+        ret = """<a href="%s">
+  <img src="%s" alt="%s">
+</a>
+""" % (file, file_s, alt)
+
+
+        return ret
+        
+
+    def _fig_release(self, line):
+        try:
+            str_title = re.search(ur"^☆(図.*)$", line).group(1)
+        except AttributeError:
+            str_title = ""
+        print self._fig_start()
+
+        line = self.input.next()
+        imgname = ""
+        imgname_s = ""
+        hash = ""
+        hash_s = ""
+        match_o1 = re.search(ur"<([^,]*?)>", line)
+        match_o2 = re.search(ur"<(.*?),\s*(.*?)>", line)
+        if not match_o1 == None:
+            imgname = match_o1.group(1)
+            imgname_s = re.sub(r"(.[A-Za-z0-9_]+)$", r"_s\1", match_o1.group(1))
+        elif not match_o2 == None:
+            imgname = match_o1.group(1)
+            imgname_s = match_o1.group(2)
+
+
+        hash = self.hashlist.get(imgname, "")
+        hash_s = self.hashlist.get(imgname_s, "")
+        if hash_s == "":
+            hash_s = hash
+
+        print """<a href="/blob.pl?id=%s">
+     <slash type="image" id="%s" title="%s">
+     </a>
+     """ % (hash, hash_s, str_title)
+        
+
+
+        print self._fig_end(str_title);
+
+
+    def _table_start(self, cap):
+        return """<div style="width:90%%; margin-left:auto;margin-right:auto;"><table align="center" border="1" class="table" width="100%%">
+    <caption><b>%s</b></caption>
+    """ % cap
+
+    def _table_end(self, footnote=""):
+        return "</table>\n%s</div>\n" % (footnote,)
+
+    def _table(self, line):
+        str_title = ""
+        self._table_buf1 = ""
+
+        try:
+            str_title = re.search(ur"^☆(表.*)$", line).group(1)
+            fig_name =  re.search(ur"^☆(表[0-9A-Z]*)", line).group(1)
+        except AttributeError:
+            str_title = ""
+            fig_name = ""
+        if str_title.find(u"表*") == 0:
+            str_title = str_title.replace(u"表*", "")
+
+        print self._table_start(str_title)
+        self._table_buf1 =  self._table_start(str_title)
+
+        num_row = 0
+        table_contents = []
+        footnote = ""
+        for line in self.input:
+            line = line.strip(" \n")
+            line = self._default_markup_rule(line)
+            if re.search(ur"^\s*$", line):
+                break
+            if re.search(ur"^※", line):
+                footnote = re.search(ur"^(※.*)$", line).group(1)
+                break
+            if re.search(ur"^〓", line):
+                line = re.sub(ur"^〓", "", line)
+                tag_mode = "th"
+            else:
+                tag_mode = "td"
+            table_contents.append([])
+            num_col = 0
+            for item in line.split("\t"):
+                if item == "":
+                    if num_col == 0:
+                        n = 1
+                        try:
+                            while table_contents[num_row-n][num_col]["item"] == "":
+                                n += 1
+                            table_contents[num_row-n][num_col]["row"] += 1
+                        except IndexError:
+                            pass
+                    else:
+                        n = 1
+                        try:
+                            while table_contents[num_row][num_col-n]["item"] == "":
+                                n += 1
+                            table_contents[num_row][num_col-n]["col"] += 1
+                        except IndexError:
+                            pass
+                if item == u"↓":
+                    n = 1
+                    try:
+                        while table_contents[num_row-n][num_col]["item"] == "":
+                            n += 1
+                        table_contents[num_row-n][num_col]["row"] += 1
+                        item = ""
+                    except IndexError:
+                        pass
+
+                if re.search(r'^".*"$', item):
+                    item = re.search(r'^"(.*)"$', item).group(1)
+                    table_contents[num_row].append({"tag":"th","item":item,"row":1,"col":1})
+                else:
+                    table_contents[num_row].append({"tag":tag_mode,"item":item,"row":1,"col":1})
+                num_col = num_col + 1
+            num_row = num_row + 1
+
+        for row_item in table_contents:
+            line = "<tr>"
+            for item in row_item:
+                if item["item"] == "":
+                    continue
+                line = line + "<" + item["tag"]
+                if not item["row"] == 1:
+                    line = line + (' rowspan="%s"' % item["row"])
+                if not item["col"] == 1:
+                    line = line + (' colspan="%s"' % item["col"])
+                line = line +  ">"
+                line = line + item["item"]
+                line = line + "</" + item["tag"] + ">"
+            line = line + "</tr>\n"
+            print line,
+            self._table_buf1 = self._table_buf1 + line
+
+            # line = "<tr><th>" + re.sub(ur"^〓", "", line) + "</th></tr>"
+            # line = line.replace("\t", "</th><th>")
+            # print line
+            # else:
+            # line = "<tr><td>" + line + "</td></tr>"
+            # line = line.replace("\t", "</td><td>")
+            # print line
+
+        print self._table_end(footnote)
+        self._table_buf1 =  self._table_buf1 + self._table_end()
+
+    def _call_tables(self, line):
+        try:
+            fig_name =  re.search(ur"^☆call_tables\((表[0-9A-Z]+)", line).group(1)
+        except AttributeError:
+            return