OSDN Git Service

classify : fix DB id undefined.
[rec10/rec10-git.git] / rec10 / xml2db_dom.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009-2011 Yukikaze
5
6 import xml.dom.minidom
7 import datetime
8 import re
9 import traceback
10
11 import zenhan
12 import chdb
13 import auto_rec
14 import recdb
15 import rec10d
16 import n_gram
17 import recdblist
18 import epgdb
19 import status
20 def getText(elm):
21     nodelist = elm.childNodes
22     rc = ""
23     # 全てのノードに対して
24     for node in nodelist:
25         # テキストノードなら値を取得
26         if node.nodeType == node.TEXT_NODE:
27             rc = rc + node.data
28     return rc
29 def getText_item(obj):
30     rc=""
31     for o in obj:
32         rc=rc+getText(o)
33     return rc
34 def writeMultiTVDB(bctype,tvlists):
35     rec10d.rec10db.new_epg_timeline(bctype)
36     for channel,start,stop,title,desc,longdesc,category in tvlists:
37         rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
38 def writeMultiCHDB(chlists):
39     for chtxt,dn in chlists:
40         rec10d.rec10db.update_chname_by_chtxt_epg_ch(chtxt,dn)
41         if dn==None:
42             chdb.changeCHShow(chtxt,"0")
43         else:
44             if len(dn)<1:
45                 chdb.changeCHShow(chtxt,"0")
46 def writeMultiLogoDB(logolists):
47     #[type,sv,pngdata]
48     for type,sv,pngdata in logolists:
49         chtxt=chdb.searchCHFromCsch(sv)['chtxt']
50         if len(chtxt)>0:
51             rec10d.rec10db.change_logodata_epg_ch(chtxt,type,pngdata)
52 def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
53     dtb=datetime.datetime.now()
54     dom=xml.dom.minidom.parse(file(xmlpath))
55     chlist=[]
56     tvlist=[]
57     bayes={}
58     rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01  exp:「#01説明」をタイトルに含める
59     for ch in dom.getElementsByTagName('channel'):
60         chtxtt=ch.getAttribute("id")
61         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
62         chlist.append([chtxtt,chname])
63     for tv in dom.getElementsByTagName('programme'):
64         channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
65         start=tv.getAttribute("start").replace(" +0900", "")
66         stop=tv.getAttribute("stop").replace(" +0900", "")
67         title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")
68         desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
69         longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
70         category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
71         title=addTitle_Subtitle(rHisch,1,title,desc)
72         tch=chdb.searchCHFromChtxt(channel)
73         if tch!=None:
74             if tch.get('bctype',"")==bctype and not tch['visible']==0:
75                 bt=bayes.get(tch['chtxt'],["","",0])
76                 bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
77                 bayes[tch['chtxt']]=bt2
78                 tvlist.append([channel,start,stop,title,desc,longdesc,category])
79                 p=auto_rec.calcKey(tch['chtxt'], title,desc+" "+longdesc)
80                 if p>2000:
81                     bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")
82                     bttt=bttt-datetime.timedelta(seconds=1200)
83                     bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")
84                     ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")
85                     ettt=ettt+datetime.timedelta(seconds=1200)
86                     ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")
87                     chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])
88                     p2=0
89                     if len(chs)>0:
90                         for ch in chs:
91                             p2t=n_gram.trigram(ch['title'],title)
92                             if p2t>p2:
93                                 p2=p2t
94                     if p2<350:
95                         if status.getSettings_auto_bayes()==1:
96                             if p2<200:
97                                 maxnum=0
98                                 if len(ch['ch'])>2:
99                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]
100                                     maxnum=int(configreader.getConfEnv("bscs_max"))-maxnum
101                                 else:
102                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]
103                                     maxnum=int(configreader.getConfEnv("te_max"))-maxnum
104                                 if maxnum>0:
105                                     topt=status.getSettings_auto_opt()
106                                     recdb.reserveReckey(recdblist.REC_RESERVE,title,tch['chtxt'], bttime, ettime,topt)
107                         else:
108                             recdb.addAutoBayesKeyword(tch['chtxt'],title,start,stop,p)
109     dom.unlink()
110     if len(chlist) > 0:
111         writeMultiCHDB(chlist)
112         chlist = []
113         if len(tvlist) > 0:
114             writeMultiTVDB(bctype,tvlist)
115         tvlist = []
116         for ct,list in bayes.items():
117             auto_rec.updateRecall(ct,list[0],list[1],list[2])
118             auto_rec.updateRecall("ALL",list[0],list[1],list[2])
119     dtb=datetime.datetime.now()-dtb
120     recdblist.printutf8(bctype + u" epg取り出し終了")
121     recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
122 def xml2db_dom_logo(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
123     dtb=datetime.datetime.now()
124     dom=xml.dom.minidom.parse(file(xmlpath))
125     logolist=[]
126     for logo in dom.getElementsByTagName('logo'):
127         type=int(logo.getAttribute("type"))
128         sv=logo.getAttribute("sv")
129         pngdata=logo.childNodes[0].data
130         logolist.append([type,sv,pngdata])
131     dom.unlink()
132     if len(logolist) > 0:
133         writeMultiLogoDB(logolist)
134     dtb=datetime.datetime.now()-dtb
135     recdblist.printutf8(bctype + u" epg取り出し終了")
136     recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating logo DB .")
137 def addTitle_Subtitle(recompiled,num,title,exp):
138     try:
139         tST=recompiled.match(exp)
140         newtitle=u""
141         if tST:
142             newtitle=title+u"『"+tST.group(num)+u"』"
143         else:
144             newtitle=title
145     except Exception, inst:
146         recdblist.addCommonlogEX("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst)+traceback.format_exc(),log_level=200)
147         newtitle=title
148     return newtitle