OSDN Git Service

add uploader to html2wiki
[otptools/otptools.git] / html2wiki / convert2wiki.py
diff --git a/html2wiki/convert2wiki.py b/html2wiki/convert2wiki.py
new file mode 100755 (executable)
index 0000000..6205443
--- /dev/null
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import os.path
+import dircache
+import re
+import shutil
+import urlparse
+
+import html2wiki.h2wconverter as h2wconverter
+
+usage = "%s <input> <output>" % sys.argv[0]
+
+try:
+    input = sys.argv[1]
+except IndexError:
+    sys.exit(usage)
+
+try:
+    output = sys.argv[2]
+except IndexError:
+    output = None
+
+def main(input, output):
+    if os.path.isfile(input):
+        conv(input, output)
+    elif os.path.isdir(input):
+        r_scan(input, input, output)
+
+def src_conv(url, input, basedir):
+    p = urlparse.urlparse(url)
+    # p => <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+    if p[1] != "":  # when url is link to external site:
+        return url
+    if p[2] == "":  # when path is not given:
+        return url
+
+    path = p[2]
+    if path[0] == "/":
+        # absolute path
+        path = path[1:]
+    else:
+        # relative path
+        # convert: ../foo/bar.png => hoge/foo/bar.png
+        dir = os.path.relpath(input, basedir)
+        dir = os.path.dirname(dir)
+        dir = os.path.join(dir, path)
+        path = os.path.normpath(dir)
+
+        # convert: hoge/foo/bar.png => hoge/foo:bar.png
+        dir = os.path.dirname(path)
+        file = os.path.basename(path)
+        path = ":".join((dir, file))
+        
+    return path
+
+def link_conv(url, input, basedir):
+    p = urlparse.urlparse(url)
+    # p => <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+    if p[1] != "":  # when url is link to external site:
+        return url
+    if p[2] == "":
+        return url
+
+    path = p[2]
+    path = re.sub(r"\.(html|htm|txt)$", "", path)
+    if path[0] == "/":
+        # absolute path
+        path = path[1:]
+    else:
+        # relative path
+        dir = os.path.relpath(input, basedir)
+        dir = os.path.dirname(dir)
+        dir = os.path.join(dir, path)
+        path = os.path.normpath(dir)
+    return path
+    
+
+def conv(input, output, basedir=""):
+    print "processing %s ..." % input
+    fi = open(input, "r")
+    html_str = fi.read()
+    fi.close()
+
+    c = h2wconverter.Html2WikiConverter()
+
+    lc = lambda x: link_conv(x, input, basedir)
+    sc = lambda x: src_conv(x, input, basedir)
+    c.set_link_converter(lc)
+    c.set_src_converter(sc)
+
+    text = c.convert(html_str)
+
+    if output:
+        fo = open(output, "w")
+        fo.write(text)
+        fo.close()
+    else:
+        print text
+
+def r_scan(dir, input, output):
+    for i in dircache.opendir(dir):
+        next = os.path.join(dir, i)
+        next = os.path.normpath(next)
+        if os.path.isdir(next):
+            r_scan(next, input, output)
+        else:
+            dir = os.path.dirname(next)
+            o_dir = dir.replace(input, output)
+            file = os.path.basename(next)
+
+            o_pathname = os.path.join(o_dir, file)
+            if not os.path.exists(o_dir):
+                os.makedirs(o_dir)
+            if re.search(r"\.html$", file):
+                conv(next, o_pathname, input)
+            else:
+                shutil.copy(next, o_pathname)
+    
+# main routine
+main(input, output)
+