OSDN Git Service

fix to read xml using dom.
[rec10/rec10-git.git] / rec10 / trunk / src / xml2db_dom.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009 Yukikaze
5
6 import xml.dom.minidom
7 import zenhan
8 import chdb
9 import auto_rec
10 import recdb
11
12 def getText(elm):
13     nodelist = elm.childNodes
14     rc = ""
15     # 全てのノードに対して
16     for node in nodelist:
17         # テキストノードなら値を取得
18         if node.nodeType == node.TEXT_NODE:
19             rc = rc + node.data
20     return rc
21 def getText_item(obj):
22     rc=""
23     for o in obj:
24         rc=rc+getText(o)
25     return rc
26 def writemultitvDB(bctype,tvlists):
27     rec10d.rec10db.new_epg_timeline(bctype)
28     for channel,start,stop,title,desc,longdesc,category in tvlists:
29         rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
30 def writemultichDB(bctype,chlists):
31     for ontv,dn in chlists:
32         rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
33 def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
34     dtb=datetime.datetime.now()
35     dom=xml.dom.minidom.parse(file(xmlpath))
36     chlist=[]
37     tvlist=[]
38     bayes={}
39     for ch in dom.getElementsByTagName('channel'):
40         ontv=ch.getAttribute("id")
41         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
42         chlist.append([ontv,chname])
43     for tv in dom.getElementsByTagName('programme'):
44         channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
45         start=tv.getAttribute("start").replace(" +0900", "")
46         stop=tv.getAttribute("stop").replace(" +0900", "")
47         title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")
48         desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
49         longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
50         category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
51         tch=chdb.ontvsearch(channel)
52         if tch!=None:
53             if tch.get('bctype',"")==ubctype:
54                 bt=bayes.get(tch['chtxt'],["","",0])
55                 bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
56                 bayes[tch['chtxt']]=bt2
57                 tvlist.append([channel,start,stop,title,desc,longdesc,category])
58                 p=auto_rec.calc_key(tch['chtxt'], title,desc+" "+longdesc)
59                 if p>2000:
60                     recdb.add_auto_bayes(tch['chtxt'],title,start,stop,p)
61     dom.unlink()
62     if len(chlist) > 0:
63         writemultichDB(chlist)
64         chlist = []
65         if len(tvlist) > 0:
66             writemultitvDB(tvlist)
67         tvlist = []
68         for ct,list in bayes.items():
69             auto_rec.update_recall(ct,list[0],list[1],list[2])
70             auto_rec.update_recall("ALL",list[0],list[1],list[2])
71     dtb=datetime.datetime.now()-dtb
72     recdblist.printutf8(bctype + u" epg取り出し終了")
73     recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
74