2 # -*- coding: utf-8 -*-
14 usage = sys.argv[0] + """ start end <output_file>
17 DATABASE = "./story.db"
25 if (not start.isdigit()) or (not start.isdigit()):
28 def login(otp, uname=""):
31 uname = raw_input("user: ")
32 except KeyboardInterrupt:
35 passwd = getpass.getpass("login password:")
36 except KeyboardInterrupt:
38 if not otp.login(uname, passwd):
39 sys.exit("login error!")
42 def prepare_database():
43 if os.path.exists(DATABASE):
45 con = sqlite3.connect(DATABASE)
48 cmd = """create table stories (
50 title text default "",
51 author text default "",
52 section text default "",
53 topic text default "",
55 comments int default 0,
60 cmd = """create table topics (
72 "01/01 01:01 -> datetime"
73 m = re.search(r"([0-9][0-9])/([0-9][0-9]) ([0-9][0-9]):([0-9][0-9])", str)
75 dateint = [int(m.group(x)) for x in xrange(1,5)]
76 year = datetime.datetime.today().year
77 month = datetime.datetime.today().month
78 if month < dateint[0]:
81 dt = datetime.datetime(year, dateint[0], dateint[1], dateint[2], dateint[3])
87 con = sqlite3.connect(DATABASE)
89 cmd = """select sid from stories where ?"""
90 cur.execute(cmd, (sid,))
95 def insert_story_info(otp, story_info):
96 con = sqlite3.connect(DATABASE)
100 rex_sid = re.compile(r"/magazine.sourceforge.jp/article.pl\?sid=([0-9/]*)$")
102 m = rex_sid.search(story_info["url"])
108 si["title"] = story_info["title"]
109 si["author"] = story_info["author"]
111 dt = str2date(story_info["datetime"])
112 si["date"] = time.mktime(dt.timetuple())
114 cur.execute("""insert into stories ( sid, title, author, date )
115 values ( :sid, :title, :author, :date )""",
117 except sqlite3.IntegrityError:
118 cur.execute("""update stories set title = :title,
124 tags = otp.get_tags("/magazine.sourceforge.jp/article.pl?sid=" + sid)
125 title = otp.get_title("/magazine.sourceforge.jp/article.pl?sid=" + sid)
127 sys.stderr.write(".")
130 cur.execute("""delete from topics where sid=?""", (sid,))
133 tag = tag.decode("utf-8")
134 cur.execute("""insert into topics ( sid, topic )
135 values ( :sid, :topic )""",
136 dict(sid=sid, topic=tag))
139 title = title.decode("utf-8").replace("Open Tech Press |", "").strip()
140 cur.execute("""update stories set title = :title
142 dict(sid=sid, title=title))
148 def main(start, end):
149 otp = otptools2.otptools("./cookies.txt")
153 sys.stderr.write("cannot use cookie file. create.\n")
155 if otp.get_cookie() == "":
160 start_index = int(start)
164 sys.stderr.write( "retrieving lists..." )
166 for index in range(start_index, end_index, 40):
167 html = otp.get_list(index)
168 story_infos = otp.parse_list(html, story_infos)
170 for key in story_infos:
171 insert_story_info(otp, story_infos[key])
173 sys.stderr.write( "done.\n" )
174 #### end of functions