OSDN Git Service

change markup.py's options: md5file isn't require.
[otptools/otptools.git] / merge_csv.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os, sys
5 import codecs
6
7 sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
8 sys.stderr = codecs.getwriter('cp932')(sys.stderr)
9 input_codec = "cp932"
10 output_codec = "cp932"
11
12 try:
13         ga_data_path = sys.argv[1]
14         otp_data_path = sys.argv[2]
15         output_path = sys.argv[3]
16 except IndexError:
17         sys.exit(sys.argv[0] + " <ga file> <otp file> <output_file>")
18
19 ga_file = codecs.open(ga_data_path, "r", input_codec)
20 output_file = codecs.open(output_path, "w", output_codec)
21
22 ga_dict = {}
23 ga_titles = []
24 for row in ga_file:
25 #Page Title,Pageviews,Unique Pageviews,Avg. Time on Page,Bounce Rate,% Exit,$ Index
26         
27         items = row.strip().split( "," )
28         title = items.pop(0)
29         ga_dict[title] = items
30         ga_titles.append(title)
31 #       print title
32 ga_file.close()
33
34 otp_file = codecs.open(otp_data_path, "r", input_codec)
35 updated_dict = {}
36 ga_notfounds = []
37 for row in otp_file:
38 #0   1     2      3   4        5    6
39 #url,title,editor,PVs,comments,date,tags
40         items = row.strip().split( "," )
41
42         for ga_title in ga_titles:
43                 if ga_title.find( items[1] ) != -1:
44                         ga_info = ga_dict.pop(ga_title)
45                         ga_info.append( items[5] )
46                         ga_info.append( items[6] )
47                         updated_dict[ga_title] = ga_info
48                         break
49         else:
50                 sys.stderr.write( "! %s - %s\n" % (items[1],items[5]) )
51
52 otp_file.close()
53 for title in updated_dict:
54         print >> output_file, title, ",",  ",".join( updated_dict[title] )
55
56 for title in ga_dict:
57         print >> output_file, title, ",",  ",".join( ga_dict[title] )
58
59 output_file.close()