2 # -*- coding: utf-8 -*-
3 """convert sfjpmagazine's story to sfjpwiki-style text."""
16 usage = """%s <html_pathname> <base_dir> <output_base_dir>""" % sys.argv[0]
20 html_pathname = sys.argv[1]
21 base_dir = sys.argv[2]
22 output_base_dir = sys.argv[3]
27 fh = open(html_pathname, "r")
31 print >> sys.stderr, "converting %s..." % html_pathname
36 rex = re.compile(r"<\s*script[^>]*?>.*?</script>", re.S)
37 rex2 = re.compile(r"<\s*noscript[^>]*?>.*?</noscript>", re.S)
38 tmp = rex.sub("", body)
39 tmp = rex2.sub("", tmp)
42 c = html2sfjpwiki.Html2SfjpWiki()
43 r = c.parse(tmp, "article-body")
44 title = c.title.replace(" - SourceForge.JP Magazine", "").strip()
47 html_dir = os.path.dirname(html_pathname)
48 rel_dir = os.path.relpath(html_dir, base_dir)
49 output_dir = os.path.join(output_base_dir, rel_dir.replace(os.path.sep, "-"))
52 pdir = os.path.dirname(html_dir)
54 while os.path.isdir(os.path.join(pdir, str(p))):
57 current_page = int(os.path.basename(html_dir))
64 page_title = title + "(%d/%d)" % (current_page, last_page)
65 page_filename = title + "_p%d" % current_page
67 page_filename_quoted = urllib.quote(page_filename.replace(" ", "_").replace("+", "_"))
70 if not os.path.isdir(output_dir):
71 os.makedirs(output_dir)
72 o_pathname = os.path.join(output_dir, "wiki.txt")
74 fo = open(o_pathname, "w")
76 sys.exit("cannot open output file: %s." % o_pathname)
78 fo.write(page_filename_quoted + "\n")
79 fo.write(page_title + "\n\n")
80 fo.write("[[PageNavi(NavigationList)]]\n\n")
82 fo.write("== %s ==\n\n" % title)
85 fo.write("\n\n[[PageNavi(NavigationList)]]\n\n")
88 def copy_attachments(from_dir, dest_dir):
89 d = dircache.listdir(from_dir)
90 rex = re.compile(r"\.(png|jpg|gif)$")
92 p = os.path.join(from_dir, item)
93 if rex.search(item) and (not os.path.isdir(p)):
94 shutil.copy2(p, dest_dir)
96 copy_attachments(html_dir, output_dir)
101 rex = re.compile(r"^\s*$")
109 preface = preface + p + "\n\n"
114 if current_page == 1 and last_page != 1:
115 index_dir = re.sub(r"-[^-]*$", "", output_dir)
116 title_name = title + " PDF"
117 file_name = urllib.quote(title.replace(" ", "_").replace("+", "_"))
121 if not os.path.isdir(index_dir):
122 os.makedirs(index_dir)
123 o_pathname = os.path.join(index_dir, "wiki.txt")
125 fo = open(o_pathname, "w")
127 sys.exit("cannot open output file: %s." % o_pathname)
130 fo.write(file_name + "\n")
131 fo.write(title_name + "\n")
133 fo.write("== %s ==\n\n" % title)
136 t = """ * [[http://sourceforge.jp/projects/test11/wiki/!pdf/%s_all.pdf 全ページをPDF形式でダウンロード]]
139 fo.write(t % title.replace(" ", "_").replace("+", "_"))
143 for n in range(1, last_page+1):
144 t1 = "%s_p%d" % (title.replace(" ", "_").replace("+", "_"), n)
145 t2 = "%s(%d/%d)" % (title, n, last_page)
146 t = " * [%s %s]" % (t1, t2)
154 index_dir = re.sub(r"-[^-]*$", "_all", output_dir)
156 file_name = urllib.quote(title.replace(" ", "_").replace("+", "_") + "_all")
159 if not os.path.isdir(index_dir):
160 os.makedirs(index_dir)
161 o_pathname = os.path.join(index_dir, "wiki.txt")
163 fo = open(o_pathname, "w")
165 sys.exit("cannot open output file: %s." % o_pathname)
167 fo.write(file_name + "\n")
168 fo.write(title_name + "\n")
170 for n in range(1, last_page+1):
171 t = "%s_p%d" % (title.replace(" ", "_").replace("+", "_"), n)
172 fo.write("[[include(%s)]]" % t)