--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import os.path
+import dircache
+import re
+import shutil
+import urlparse
+
+import html2wiki.h2wconverter as h2wconverter
+
+usage = "%s <input> <output>" % sys.argv[0]
+
+try:
+ input = sys.argv[1]
+except IndexError:
+ sys.exit(usage)
+
+try:
+ output = sys.argv[2]
+except IndexError:
+ output = None
+
+def main(input, output):
+ if os.path.isfile(input):
+ conv(input, output)
+ elif os.path.isdir(input):
+ r_scan(input, input, output)
+
+def src_conv(url, input, basedir):
+ p = urlparse.urlparse(url)
+ # p => <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+ if p[1] != "": # when url is link to external site:
+ return url
+ if p[2] == "": # when path is not given:
+ return url
+
+ path = p[2]
+ if path[0] == "/":
+ # absolute path
+ path = path[1:]
+ else:
+ # relative path
+ # convert: ../foo/bar.png => hoge/foo/bar.png
+ dir = os.path.relpath(input, basedir)
+ dir = os.path.dirname(dir)
+ dir = os.path.join(dir, path)
+ path = os.path.normpath(dir)
+
+ # convert: hoge/foo/bar.png => hoge/foo:bar.png
+ dir = os.path.dirname(path)
+ file = os.path.basename(path)
+ path = ":".join((dir, file))
+
+ return path
+
+def link_conv(url, input, basedir):
+ p = urlparse.urlparse(url)
+ # p => <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+ if p[1] != "": # when url is link to external site:
+ return url
+ if p[2] == "":
+ return url
+
+ path = p[2]
+ path = re.sub(r"\.(html|htm|txt)$", "", path)
+ if path[0] == "/":
+ # absolute path
+ path = path[1:]
+ else:
+ # relative path
+ dir = os.path.relpath(input, basedir)
+ dir = os.path.dirname(dir)
+ dir = os.path.join(dir, path)
+ path = os.path.normpath(dir)
+ return path
+
+
+def conv(input, output, basedir=""):
+ print "processing %s ..." % input
+ fi = open(input, "r")
+ html_str = fi.read()
+ fi.close()
+
+ c = h2wconverter.Html2WikiConverter()
+
+ lc = lambda x: link_conv(x, input, basedir)
+ sc = lambda x: src_conv(x, input, basedir)
+ c.set_link_converter(lc)
+ c.set_src_converter(sc)
+
+ text = c.convert(html_str)
+
+ if output:
+ fo = open(output, "w")
+ fo.write(text)
+ fo.close()
+ else:
+ print text
+
+def r_scan(dir, input, output):
+ for i in dircache.opendir(dir):
+ next = os.path.join(dir, i)
+ next = os.path.normpath(next)
+ if os.path.isdir(next):
+ r_scan(next, input, output)
+ else:
+ dir = os.path.dirname(next)
+ o_dir = dir.replace(input, output)
+ file = os.path.basename(next)
+
+ o_pathname = os.path.join(o_dir, file)
+ if not os.path.exists(o_dir):
+ os.makedirs(o_dir)
+ if re.search(r"\.html$", file):
+ conv(next, o_pathname, input)
+ else:
+ shutil.copy(next, o_pathname)
+
+# main routine
+main(input, output)
+