OSDN Git Service

add wp_imgswap2.py for new OSDN Magazine
[otptools/otptools.git] / merge_csv.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import os
5 import sys
6 import codecs
7
8 sys.stdout = codecs.getwriter('utf_8')(sys.stdout)
9 sys.stderr = codecs.getwriter('cp932')(sys.stderr)
10 input_codec = "cp932"
11 output_codec = "cp932"
12
13 try:
14         ga_data_path = sys.argv[1]
15         otp_data_path = sys.argv[2]
16         output_path = sys.argv[3]
17 except IndexError:
18         sys.exit(sys.argv[0] + " <ga file> <otp file> <output_file>")
19
20 ga_file = codecs.open(ga_data_path, "r", input_codec)
21 output_file = codecs.open(output_path, "w", output_codec)
22
23 #ga_data = []
24 ga_titles = []
25 for row in ga_file:
26 #Page Title,Pageviews,Unique Pageviews,Avg. Time on Page,Bounce Rate,% Exit,$ Index
27         
28         items = row.strip().split( "," )
29         title = items.pop(0)
30 #       ga_dict.append(items)
31         ga_titles.append((title,items))
32 #       print title
33 ga_file.close()
34
35 otp_file = codecs.open(otp_data_path, "r", input_codec)
36 updated_dict = {}
37 ga_notfounds = []
38 for row in otp_file:
39 #0   1     2    3
40 #url,title,date,tags
41         items = row.strip().split( "," )
42
43         for (ga_title, ga_item) in ga_titles:
44                 if ga_title.find( items[1] ) != -1:
45                         ga_item.append( items[2] )
46                         ga_item.append( items[3] )
47                         break
48         else:
49                 sys.stderr.write( "! %s - %s\n" % (items[1],items[2]) )
50
51 otp_file.close()
52 for (title, item) in ga_titles:
53         print >> output_file, title, ",", ",".join(item)
54
55 #for title in updated_dict:
56 #       print >> output_file, title, ",",  ",".join( updated_dict[title] )
57
58 #for title in ga_dict:
59 #       print >> output_file, title, ",",  ",".join( ga_dict[title] )
60
61 output_file.close()