OSDN Git Service

implement auto jbk rec.
[rec10/rec10-git.git] / rec10 / trunk / src / xml2db_dom.py
1 from timerec import epgdb
2 import status
3 #!/usr/bin/python
4 # coding: UTF-8
5 # Rec10 TS Recording Tools
6 # Copyright (C) 2009 Yukikaze
7
8 import xml.dom.minidom
9 import datetime
10
11 import zenhan
12 import chdb
13 import auto_rec
14 import recdb
15 import rec10d
16 import n_gram
17 import recdblist
18 def getText(elm):
19     nodelist = elm.childNodes
20     rc = ""
21     # 全てのノードに対して
22     for node in nodelist:
23         # テキストノードなら値を取得
24         if node.nodeType == node.TEXT_NODE:
25             rc = rc + node.data
26     return rc
27 def getText_item(obj):
28     rc=""
29     for o in obj:
30         rc=rc+getText(o)
31     return rc
32 def writemultitvDB(bctype,tvlists):
33     rec10d.rec10db.new_epg_timeline(bctype)
34     for channel,start,stop,title,desc,longdesc,category in tvlists:
35         rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
36 def writemultichDB(chlists):
37     for ontv,dn in chlists:
38         rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
39 def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
40     dtb=datetime.datetime.now()
41     dom=xml.dom.minidom.parse(file(xmlpath))
42     chlist=[]
43     tvlist=[]
44     bayes={}
45     for ch in dom.getElementsByTagName('channel'):
46         ontv=ch.getAttribute("id")
47         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
48         chlist.append([ontv,chname])
49     for tv in dom.getElementsByTagName('programme'):
50         channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
51         start=tv.getAttribute("start").replace(" +0900", "")
52         stop=tv.getAttribute("stop").replace(" +0900", "")
53         title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")
54         desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
55         longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
56         category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
57         tch=chdb.ontvsearch(channel)
58         if tch!=None:
59             if tch.get('bctype',"")==bctype:
60                 bt=bayes.get(tch['chtxt'],["","",0])
61                 bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
62                 bayes[tch['chtxt']]=bt2
63                 tvlist.append([channel,start,stop,title,desc,longdesc,category])
64                 p=auto_rec.calc_key(tch['chtxt'], title,desc+" "+longdesc)
65                 if p>2000:
66                     bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")
67                     bttt=bttt-datetime.timedelta(seconds=1200)
68                     bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")
69                     ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")
70                     ettt=ettt+datetime.timedelta(seconds=1200)
71                     ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")
72                     chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])
73                     p2=0
74                     if len(chs)>0:
75                         for ch in chs:
76                             p2t=n_gram.trigram(ch['title'],title)
77                             if p2t>p2:
78                                 p2=p2t
79                     if p2<350:
80                         if status.getSettings_auto_bayes()==1:
81                             if p2<200:
82                                 maxnum=0
83                                 if len(ch['ch'])>2:
84                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]
85                                     maxnum=int(configreader.getenv("bscs_max"))-maxnum
86                                 else:
87                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]
88                                     maxnum=int(configreader.getenv("te_max"))-maxnum
89                                 if maxnum>0:
90                                     topt=status.getSettings_auto_opt()
91                                     recdb.recreserv(title,tch['chtxt'], bttime, ettime,topt)
92                         else:
93                             recdb.add_auto_bayes(tch['chtxt'],title,start,stop,p)
94
95     dom.unlink()
96     if len(chlist) > 0:
97         writemultichDB(chlist)
98         chlist = []
99         if len(tvlist) > 0:
100             writemultitvDB(bctype,tvlist)
101         tvlist = []
102         for ct,list in bayes.items():
103             auto_rec.update_recall(ct,list[0],list[1],list[2])
104             auto_rec.update_recall("ALL",list[0],list[1],list[2])
105     dtb=datetime.datetime.now()-dtb
106     recdblist.printutf8(bctype + u" epg取り出し終了")
107     recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
108 def getChdata(xmlpath):
109     dom=xml.dom.minidom.parse(file(xmlpath))
110     chlist=[]
111     for ch in dom.getElementsByTagName('channel'):
112         ontv=ch.getAttribute("id")
113         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
114         chlist.append([ontv,chname])
115     dom.unlink()
116     if len(chlist)>0:
117         return chlist[0]
118     else:
119         return ["",""]