import sys
from urlparse import urlparse
-
class Html2SfjpWiki(HTMLParser.HTMLParser):
"html to sfjpwiki-style text converter"
def __init__(self):
self._add_handlers()
self._block = False
+ def _post_proc(self, str):
+ rex_caption = re.compile(r"^'''(.+)'''$", re.M)
+ return rex_caption.sub(r"====== \1 ======", str)
+
def parse(self, html_string, target_id):
"""Parse html_string with url, and return anchors"""
self._anchors = []
if self._id == "":
self._thru = False
self.feed(html_string)
- return "".join(self._buf)
+ ret = "".join(self._buf)
+ return self._post_proc(ret)
def handle_starttag(self, tag, attrs):
self._prevtag = self._currenttag
# tag specific handlers
+ def _expand_attrs(self, tag, attrs):
+ if attrs:
+ attrlist = ["=".join((key, '"%s"' % val)) for (key,val) in attrs]
+ s = " ".join(attrlist)
+ return "<" + " ".join((tag,s)) + ">"
+ else:
+ return "<" + tag + ">"
+
+
def _h_start_table(self, tag, attrs):
# if tag has "class" attribute, and those value is "table":
if ("class", "table") in attrs:
#self._table_h_th = (self._start_handlers["th"], self._end_handlers["th"])
# set new handlers
- self._start_handlers["tr"] = lambda s, t, a: "|"
- self._start_handlers["td"] = lambda s, t, a: "|"
- self._start_handlers["th"] = lambda s, t, a: "|'''"
- self._end_handlers["tr"] = lambda s, a: "|\n"
- self._end_handlers["td"] = lambda s, a: "|"
- self._end_handlers["th"] = lambda s, a: "'''|"
+ self._start_handlers["tr"] = lambda s, t, a: s._expand_attrs(t, a)
+ self._start_handlers["td"] = lambda s, t, a: s._expand_attrs(t, a)
+ self._start_handlers["th"] = lambda s, t, a: s._expand_attrs(t, a)
+ self._end_handlers["tr"] = lambda s, t: "</" + t + ">\n"
+ self._end_handlers["td"] = lambda s, t: "</" + t + ">"
+ self._end_handlers["th"] = lambda s, t: "</" + t + ">"
self._block = False
self._in_table = True
+ return """{{{ html
+"""
else:
self._block = True
self._in_table = False
del self._end_handlers["td"]
del self._start_handlers["th"]
del self._end_handlers["th"]
- return "\n"
+ return """
+</table>
+}}}
+"""
else:
self._in_table = False
self._block = False
def _h_start_caption(self, tag, attrs):
del self._start_handlers["b"]
del self._end_handlers["b"]
- return "====== "
+ return "<h6>"
def _h_end_caption(self, tagd):
self._start_handlers["b"] = lambda s, t, a: "'''"
self._end_handlers["b"] = lambda s, t: "'''"
- return " ======\n"
+ return """</h6>\n<table class="wikitable" border="1">\n\n"""
def _h_start_img(self, tag, attrs):
src = ""