OSDN Git Service

b4afb9dcbb87b2578ed372aa07cc897b163a6746
[otptools/otptools.git] / merge_csv.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os, sys
5 import codecs
6
7 sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
8 sys.stderr = codecs.getwriter('utf_8')(sys.stderr)
9
10 try:
11         ga_data_path = sys.argv[1]
12         otp_data_path = sys.argv[2]
13 except IndexError:
14         sys.exit(sys.argv[0] + " <ga file> <otp file>")
15
16 ga_file = codecs.open(ga_data_path, "r", "utf_8")
17
18 ga_dict = {}
19 ga_titles = []
20 for row in ga_file:
21 #Page Title,Pageviews,Unique Pageviews,Avg. Time on Page,Bounce Rate,% Exit,$ Index
22         
23         items = row.strip().split( "," )
24         title = items.pop(0)
25         ga_dict[title] = items
26         ga_titles.append(title)
27 #       print title
28 ga_file.close()
29
30 otp_file = codecs.open(otp_data_path, "r", "utf_8")
31 updated_dict = {}
32 ga_notfounds = []
33 for row in otp_file:
34 #0   1     2      3   4        5    6
35 #url,title,editor,PVs,comments,date,tags
36         items = row.strip().split( "," )
37
38         for ga_title in ga_titles:
39                 if ga_title.find( items[1] ) != -1:
40                         ga_info = ga_dict.pop(ga_title)
41                         ga_info.append( items[5] )
42                         ga_info.append( items[6] )
43                         updated_dict[ga_title] = ga_info
44                         break
45         else:
46                 sys.stderr.write( "! %s - %s\n" % (items[1],items[5]) )
47
48 otp_file.close()
49 for title in updated_dict:
50         print title, ",",  ",".join( updated_dict[title] )
51
52 for title in ga_dict:
53         print title, ",",  ",".join( ga_dict[title] )