2 # -*- coding: utf-8 -*-
3 """convert sfjpmagazine's story to sfjpwiki-style text."""
16 usage = """%s <html_pathname> <base_dir> <output_base_dir>""" % sys.argv[0]
19 return str.replace(" ", "_").replace("+", "_").replace("/", "_")
23 html_pathname = sys.argv[1]
24 base_dir = sys.argv[2]
25 output_base_dir = sys.argv[3]
30 fh = open(html_pathname, "r")
34 print >> sys.stderr, "converting %s..." % html_pathname
39 rex = re.compile(r"<\s*script[^>]*?>.*?</script>", re.S)
40 rex2 = re.compile(r"<\s*noscript[^>]*?>.*?</noscript>", re.S)
41 tmp = rex.sub("", body)
42 tmp = rex2.sub("", tmp)
45 c = html2sfjpwiki.Html2SfjpWiki()
46 c.set_url_replace_list("/Users/hylom/otptools/sfmag2wiki/urltitle.txt")
47 r = c.parse(tmp, "article-body")
48 title = c.title.replace("- SourceForge.JP Magazine : オープンソースの話題満載", "").strip()
50 html_dir = os.path.dirname(html_pathname)
53 pdir = os.path.dirname(html_dir)
55 while os.path.isdir(os.path.join(pdir, str(p))):
58 current_page = int(os.path.basename(html_dir))
61 rel_dir = os.path.relpath(html_dir, base_dir)
63 output_dir = os.path.join(output_base_dir, os.path.dirname(rel_dir).replace(os.path.sep, "-"))
65 output_dir = os.path.join(output_base_dir, rel_dir.replace(os.path.sep, "-"))
73 page_title = title + "(%d/%d)" % (current_page, last_page)
74 page_filename = title + "_p%d" % current_page
76 page_filename_quoted = urllib.quote(quote_title(page_filename))
79 if not os.path.isdir(output_dir):
80 os.makedirs(output_dir)
81 o_pathname = os.path.join(output_dir, "wiki.txt")
83 fo = open(o_pathname, "w")
85 sys.exit("cannot open output file: %s." % o_pathname)
87 fo.write(page_filename_quoted + "\n")
88 fo.write(page_title + "\n\n")
89 fo.write("[[PageNavi(NavigationList)]]\n\n")
91 fo.write("== %s ==\n\n" % title)
94 fo.write("\n\n[[PageNavi(NavigationList)]]\n\n")
97 def copy_attachments(from_dir, dest_dir):
98 d = dircache.listdir(from_dir)
99 rex = re.compile(r"\.(png|jpg|gif)$")
101 p = os.path.join(from_dir, item)
102 dest = os.path.join(dest_dir, item)
103 if rex.search(item) and (not os.path.isdir(p)) and (not os.path.exists(dest)):
104 shutil.copy2(p, dest_dir)
106 copy_attachments(html_dir, output_dir)
108 # generate index page
109 def get_preface(str):
111 rex = re.compile(r"^\s*$")
119 preface = preface + p + "\n\n"
124 if current_page == 1 and last_page != 1:
125 index_dir = re.sub(r"-[^-]*$", "", output_dir)
126 title_name = title + " PDF"
127 file_name = urllib.quote(quote_title(title))
131 if not os.path.isdir(index_dir):
132 os.makedirs(index_dir)
133 o_pathname = os.path.join(index_dir, "wiki.txt")
135 fo = open(o_pathname, "w")
137 sys.exit("cannot open output file: %s." % o_pathname)
140 fo.write(file_name + "\n")
141 fo.write(title_name + "\n")
143 fo.write("== %s ==\n\n" % title)
146 t = """ * [[http://sourceforge.jp/projects/test11/wiki/!pdf/%s_all.pdf 全ページをPDF形式でダウンロード]]
149 fo.write(t % quote_title(title))
153 for n in range(1, last_page+1):
154 t1 = "%s_p%d" % (quote_title(title), n)
155 t2 = "%s(%d/%d)" % (title, n, last_page)
156 t = " * [%s %s]" % (t1, t2)
164 index_dir = re.sub(r"-[^-]*$", "_all", output_dir)
166 file_name = urllib.quote(quote_title(title) + "_all")
169 if not os.path.isdir(index_dir):
170 os.makedirs(index_dir)
171 o_pathname = os.path.join(index_dir, "wiki.txt")
173 fo = open(o_pathname, "w")
175 sys.exit("cannot open output file: %s." % o_pathname)
177 fo.write(file_name + "\n")
178 fo.write(title_name + "\n")
180 for n in range(1, last_page+1):
181 t = "%s_p%d" % (quote_title(title), n)
182 fo.write("[[include(%s)]]" % t)
187 # generate navigation page
188 o_pathname = os.path.join(index_dir, "navigation.txt")
190 fo = open(o_pathname, "w")
192 sys.exit("cannot open output file: %s." % o_pathname)
194 quoted_title = quote_title(title)
195 fo.write(" * [FrontPage HPC/並列プログラミングポータルトップページ]\n")
196 fo.write(" * [%s [記事全文PDFのダウンロード]]\n" % quoted_title)
197 fo.write(" * [FrontPage HPC/並列プログラミングポータルトップページ]\n")
199 fo.write(" * [%s %s]\n" % (quoted_title, title))
201 for n in range(1, last_page+1):
202 fo.write(" * [%s_p%d %s(%d/%d)]\n" % (quoted_title, n, title, n, last_page))
203 fo.write(" * [FrontPage HPC/並列プログラミングポータルトップページ]\n")