OSDN Git Service

fix large number of bugs.
[rec10/rec10-git.git] / rec10 / trunk / src / xml2db_dom.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009 Yukikaze
5
6 import xml.dom.minidom
7 import datetime
8
9 import zenhan
10 import chdb
11 import auto_rec
12 import recdb
13 import rec10d
14 import n_gram
15 import recdblist
16 def getText(elm):
17     nodelist = elm.childNodes
18     rc = ""
19     # 全てのノードに対して
20     for node in nodelist:
21         # テキストノードなら値を取得
22         if node.nodeType == node.TEXT_NODE:
23             rc = rc + node.data
24     return rc
25 def getText_item(obj):
26     rc=""
27     for o in obj:
28         rc=rc+getText(o)
29     return rc
30 def writemultitvDB(bctype,tvlists):
31     rec10d.rec10db.new_epg_timeline(bctype)
32     for channel,start,stop,title,desc,longdesc,category in tvlists:
33         rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
34 def writemultichDB(chlists):
35     for ontv,dn in chlists:
36         rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
37 def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
38     dtb=datetime.datetime.now()
39     dom=xml.dom.minidom.parse(file(xmlpath))
40     chlist=[]
41     tvlist=[]
42     bayes={}
43     for ch in dom.getElementsByTagName('channel'):
44         ontv=ch.getAttribute("id")
45         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
46         chlist.append([ontv,chname])
47     for tv in dom.getElementsByTagName('programme'):
48         channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
49         start=tv.getAttribute("start").replace(" +0900", "")
50         stop=tv.getAttribute("stop").replace(" +0900", "")
51         title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")
52         desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
53         longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
54         category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
55         tch=chdb.ontvsearch(channel)
56         if tch!=None:
57             if tch.get('bctype',"")==bctype:
58                 bt=bayes.get(tch['chtxt'],["","",0])
59                 bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
60                 bayes[tch['chtxt']]=bt2
61                 tvlist.append([channel,start,stop,title,desc,longdesc,category])
62                 p=auto_rec.calc_key(tch['chtxt'], title,desc+" "+longdesc)
63                 if p>2000:
64                     bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")
65                     bttt=bttt-datetime.timedelta(seconds=1200)
66                     bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")
67                     ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")
68                     ettt=ettt+datetime.timedelta(seconds=1200)
69                     ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")
70                     chs=rec10d.rec10db.select_bytime_all_timeline(bttime,ettime,tch['chtxt'])
71                     p2=0
72                     if len(chs)>0:
73                         for ch in chs:
74                             p2t=n_gram.trigram(ch['title'],title)
75                             if p2t>p2:
76                                 p2=p2t
77                     if p2<400:
78                         recdb.add_auto_bayes(tch['chtxt'],title,start,stop,p)
79     dom.unlink()
80     if len(chlist) > 0:
81         writemultichDB(chlist)
82         chlist = []
83         if len(tvlist) > 0:
84             writemultitvDB(bctype,tvlist)
85         tvlist = []
86         for ct,list in bayes.items():
87             auto_rec.update_recall(ct,list[0],list[1],list[2])
88             auto_rec.update_recall("ALL",list[0],list[1],list[2])
89     dtb=datetime.datetime.now()-dtb
90     recdblist.printutf8(bctype + u" epg取り出し終了")
91     recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
92 def getChdata(xmlpath):
93     dom=xml.dom.minidom.parse(file(xmlpath))
94     chlist=[]
95     for ch in dom.getElementsByTagName('channel'):
96         ontv=ch.getAttribute("id")
97         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
98         chlist.append([ontv,chname])
99     dom.unlink()
100     if len(chlist)>0:
101         return chlist[0]
102     else:
103         return ["",""]