OSDN Git Service

sys.setdefaultencoding('UTF-8') required version.
[rec10/rec10-git.git] / rec10 / branches / 0.9.0 / src / xml2db.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009 Yukikaze
5 #import sys
6 #import os
7 import xml.parsers.expat
8 import auto_rec
9 import chdb
10 import rec10d
11 import zenhan
12 import datetime
13 import recdb
14 flags = {}
15 lastflag = ""
16 xmldate = {}
17 db = ""
18 bctype = ""
19 chlists = []
20 tvlists = []
21 bayes={}
22 dtb=""
23 def xml2db(xmlpath, bctypetemp):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
24     global db, bctype
25     bctype = bctypetemp
26     p = xml.parsers.expat.ParserCreate()
27     p.buffer_text = True
28     p.StartElementHandler = start_element
29     p.EndElementHandler = end_element
30     p.CharacterDataHandler = char_data
31     parseDocument(p, xmlpath)
32 def writemultitvDB(tvlists):
33     global bctype
34     rec10d.rec10db.new_epg_timeline(bctype)
35     rec10d.rec10db.add_multi_epg_timeline(tvlists)
36 def writemultichDB(chlists):
37     for bctype,ontv,dn in chlists:
38         rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
39 def parseDocument(p, xmlfile):
40     f = open(xmlfile, 'r')
41     p.ParseFile(f)
42     f.close()
43 def start_element(name, attrs):
44     global flags, lastflag, xmldate,dtb
45     flags[name] = 1
46     lastflag = name
47     for (key, val) in attrs.iteritems():
48         xmldate[name + ":" + key] = val
49     dtb=datetime.datetime.now()
50 def end_element(name):
51     global flags, lastflag, xmldate, db, chlists, tvlists ,bayes,dtb
52     if flags[name] == 1:
53         if name == "channel":
54             ubctype = unicode(bctype)
55             uid = unicode(xmldate['channel:id'])
56             udn = unicode(xmldate['display-name'])
57             #print uid
58             bcsearch = None
59             bs = chdb.ontvsearch(xmldate['channel:id'])
60             if bs != None:
61                 #print bcsearch
62                 bcsearch = bs['bctype']
63             if ((bcsearch != None) and (bcsearch == bctype)):
64                 chlists.append([bctype, uid, udn])
65             xmldate['channel:id'] = ""
66             xmldate['display-name'] = ""
67         elif name == "programme":
68             ubctype = unicode(bctype)
69             uch = unicode(xmldate['programme:channel'])
70             ustart = unicode(xmldate['programme:start'].replace(" +0900", ""))
71             ustop = unicode(xmldate['programme:stop'].replace(" +0900", ""))
72             utitle = unicode(xmldate['title'].replace(",", "-"))
73             udesc = unicode(xmldate['desc'])
74             ulongdesc = unicode(xmldate['longdesc'])
75             ucategory = unicode(xmldate['category'])
76             tch=chdb.ontvsearch(uch)
77             if tch!=None:
78                 if tch.get('bctype',"")==ubctype:
79                     bt=bayes.get(tch['chtxt'],["","",0])
80                     bt2=[bt[0]+utitle+" ",bt[1]+udesc+" "+ulongdesc+" ",bt[2]+1]
81                     bayes[tch['chtxt']]=bt2
82                     tvlists.append([ubctype, uch, ustart, ustop, utitle, udesc,ulongdesc,ucategory])
83                     if auto_rec.calc_key(tch['chtxt'], utitle, udesc+" "+ulongdesc):
84                         recdb.add_auto_bayes(utitle, tch['chtxt'], ustart,ustop)
85             xmldate['title'] = ""
86             xmldate['desc'] = ""
87             xmldate['longdesc'] = ""
88             xmldate['category'] =""
89         if name == "tv":
90             if len(chlists) > 0:
91                 writemultichDB(chlists)
92             chlists = []
93             if len(tvlists) > 0:
94                 writemultitvDB(tvlists)
95             tvlists = []
96             for ct,list in bayes.items():
97                 auto_rec.update_recall(ct,list[0],list[1],list[2])
98                 auto_rec.update_recall("ALL",list[0],list[1],list[2])
99             dtb=datetime.datetime.now()-dtb
100             print bctype + " epg取り出し終了"
101             print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"seconds taken for updating bayes-auto DB ."
102     flags[name] = 0
103 def char_data(data):
104     global flags, lastflag, xmldate
105     data = data.replace("\n", "")
106     data = data.replace("  ", "")
107     data = data.replace("\"", "")
108     data = data.replace("\'", "")
109     try:
110         datat = zenhan.toHankaku_ABC123(data)
111     except:
112         datat = data
113     data = datat.encode('utf-8')
114     if xmldate.has_key(lastflag):
115         if xmldate[lastflag] != data:
116             xmldate[lastflag] += data
117     else:
118         xmldate[lastflag] = data