OSDN Git Service

move sfmag2wiki to subdirectory
[otptools/otptools.git] / sfmag2wiki / sfmag_html2wiki.py
diff --git a/sfmag2wiki/sfmag_html2wiki.py b/sfmag2wiki/sfmag_html2wiki.py
new file mode 100755 (executable)
index 0000000..b5d280c
--- /dev/null
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""convert sfjpmagazine's story to sfjpwiki-style text."""
+
+import sys
+import re
+import os
+import os.path
+import urlparse
+
+import html2sfjpwiki
+
+usage = """%s <html>""" % sys.argv[0]
+
+try:
+    html = sys.argv[1]
+except IndexError:
+    sys.exit(usage)
+
+
+try:
+    fh = open(html, "r")
+except IOError:
+    sys.exit(usage)
+
+body = fh.read()
+fh.close()
+
+rex = re.compile(r"<\s*script[^>]*?>.*?</script>", re.S)
+rex2 = re.compile(r"<\s*noscript[^>]*?>.*?</noscript>", re.S)
+tmp = rex.sub("", body)
+tmp = rex2.sub("", tmp)
+
+c = html2sfjpwiki.Html2SfjpWiki()
+r = c.parse(tmp, "article-body")
+
+output = os.path.join(os.path.dirname(html), "wiki.txt")
+
+try:
+    fo = open(output, "w")
+except IOError:
+    sys.exit("cannot open output file: %s." % output)
+
+fo.write("[[PageNavi(NavigationList)]]\n\n\n")
+fo.write(r)
+fo.write("\n\n[[PageNavi(NavigationList)]]\n\n")
+fo.close()
+