OSDN Git Service

08b080889b59291a09b5b3988a02e534fb5e47b9
[rec10/rec10-git.git] / rec10 / trunk / src / xml2db.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009 Yukikaze
5 #import sys
6 #import os
7 import xml.parsers.expat
8 import auto_rec
9 import chdb
10 import rec10d
11 import zenhan
12 import datetime
13 flags = {}
14 lastflag = ""
15 xmldate = {}
16 db = ""
17 bctype = ""
18 chlists = []
19 tvlists = []
20 bayes={}
21 def xml2db(xmlpath, bctypetemp):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
22     global db, bctype
23     bctype = bctypetemp
24     p = xml.parsers.expat.ParserCreate()
25     p.buffer_text = True
26     p.StartElementHandler = start_element
27     p.EndElementHandler = end_element
28     p.CharacterDataHandler = char_data
29     parseDocument(p, xmlpath)
30 def writemultitvDB(tvlists):
31     global bctype
32     rec10d.rec10db.new_tv(bctype)
33     rec10d.rec10db.add_multi_tv(tvlists)
34 def writemultichDB(chlists):
35     global bctype
36     rec10d.rec10db.new_ch(bctype)
37     rec10d.rec10db.add_multi_ch(chlists)
38 def parseDocument(p, xmlfile):
39     f = open(xmlfile, 'r')
40     p.ParseFile(f)
41     f.close()
42 def start_element(name, attrs):
43     global flags, lastflag, xmldate
44     flags[name] = 1
45     lastflag = name
46     for (key, val) in attrs.iteritems():
47         xmldate[name + ":" + key] = val
48 def end_element(name):
49     global flags, lastflag, xmldate, db, chlists, tvlists ,bayes
50     if flags[name] == 1:
51         if name == "channel":
52             ubctype = unicode(bctype)
53             uid = unicode(xmldate['channel:id'])
54             udn = unicode(xmldate['display-name'])
55             #print uid
56             bcsearch = None
57             bs = chdb.ontvsearch(xmldate['channel:id'])
58             if bs != None:
59                 #print bcsearch
60                 bcsearch = bs['bctype']
61             if ((bcsearch != None) and (bcsearch == bctype)):
62                 chlists.append([bctype, uid, udn])
63             xmldate['channel:id'] = ""
64             xmldate['display-name'] = ""
65         elif name == "programme":
66             ubctype = unicode(bctype)
67             uch = unicode(xmldate['programme:channel'])
68             ustart = unicode(xmldate['programme:start'].replace(" +0900", ""))
69             ustop = unicode(xmldate['programme:stop'].replace(" +0900", ""))
70             utitle = unicode(xmldate['title'].replace(",", "-"))
71             udesc = unicode(xmldate['desc'])
72             ulongdesc = unicode(xmldate['longdesc'])
73             ucategory = unicode(xmldate['category'])
74             tch=chdb.ontvsearch(uch)
75             if tch!=None:
76                 if tch.get('bctype',"")==ubctype:
77                     bt=bayes.get(tch['chtxt'],["","",0])
78                     bt2=[bt[0]+utitle+" ",bt[1]+udesc+" "+ulongdesc+" ",bt[2]+1]
79                     bayes[tch['chtxt']]=bt2
80                     tvlists.append([ubctype, uch, ustart, ustop, utitle, udesc,ulongdesc,ucategory])
81             if auto_rec.calc_key(chtxt, utitle, udesc+" "+ulongdesc):
82                 ""
83             xmldate['title'] = ""
84             xmldate['desc'] = ""
85             xmldate['longdesc'] = ""
86             xmldate['category'] =""
87         if name == "tv":
88             if len(chlists) > 0:
89                 writemultichDB(chlists)
90             chlists = []
91             if len(tvlists) > 0:
92                 writemultitvDB(tvlists)
93             tvlists = []
94             dtb=datetime.datetime.now()
95             for ct,list in bayes.items():
96                 auto_rec.update_recall(ct,list[0],list[1],list[2])
97             dtb=datetime.datetime.now()-dtb
98             print bctype + " epg取り出し終了"
99             print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"second taken for updating bayes-auto DB ."
100     flags[name] = 0
101 def char_data(data):
102     global flags, lastflag, xmldate
103     data = data.replace("\n", "")
104     data = data.replace("  ", "")
105     data = data.replace("\"", "")
106     data = data.replace("\'", "")
107     try:
108         datat = zenhan.toHankaku_ABC123(data)
109     except:
110         datat = data
111     data = datat.encode('utf-8')
112     if xmldate.has_key(lastflag):
113         if xmldate[lastflag] != data:
114             xmldate[lastflag] += data
115     else:
116         xmldate[lastflag] = data