OSDN Git Service

change markup.py's options: md5file isn't require.
[otptools/otptools.git] / retrive_storylist.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os, sys
5 import otptools
6 import getpass
7 import codecs
8
9 sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
10
11 usage = sys.argv[0] + " start end <output_file>"
12 output_encoding = "utf_8"
13 output_encoding = "cp932"
14 try:
15         start = sys.argv[1]
16         end = sys.argv[2]
17         file = sys.argv[3]
18 except IndexError:
19         uname = ""
20         sys.exit(usage)
21
22 if (not start.isdigit()) or (not start.isdigit()):
23         sys.exit(usage)
24
25 output_file = codecs.open(file, "w", output_encoding)
26
27 uname = ""
28
29 otp = otptools.otptools("./cookies.txt", uname)
30 try:
31         otp.load_cookie()
32 except IOError:
33         sys.stderr.write("cannot use cookie file. create.\n")
34
35         if otp.get_cookie() == "":
36                 if uname == "":
37                         try:
38                                 uname = raw_input("user: ")
39                         except KeyboardInterrupt:
40                                 sys.exit("\nabort.")
41                 try:
42                         passwd = getpass.getpass("login password:")
43                 except KeyboardInterrupt:
44                         sys.exit("\nabort.")
45                 if otp.login(uname, passwd) != 1:
46                         sys.exit("login error!")
47
48 otp.save_cookie()
49 story_infos = {}
50 start_index = int(start)
51 max_index = int(end)
52
53 sys.stderr.write( "retrieving lists..." )
54
55 for index in range(start_index, max_index, 40):
56         html = otp.get_list(index)
57         story_infos = otp.parse_list( html, story_infos )
58
59 sys.stderr.write( "sorting..." )
60 sort_keys = story_infos.keys()
61 sort_keys.sort(lambda x,y: cmp(story_infos[y], story_infos[x]))
62
63 sys.stderr.write( "retrieving tags..." )
64 for url in sort_keys:
65         tags = otp.get_tags( "http:" + url )
66
67         story_infos[url]["tags"] = unicode( "|".join(tags), "utf_8" )
68         sys.stderr.write(".")
69
70 for url in sort_keys:
71         for item in story_infos[url]:
72                 str =story_infos[url][item]
73         output_file.write("%(url)s,%(title)s,%(editor)s,%(page_views)s,%(comments)s,%(datetime)s,%(tags)s" % story_infos[url] + "\n")
74
75 output_file.close()
76 sys.stderr.write( "done.\n" )