OSDN Git Service

history-ch title modify.
[rec10/rec10-git.git] / rec10 / trunk / src / xml2db_dom.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009-2010 Yukikaze
5
6 import xml.dom.minidom
7 import datetime
8 import re
9
10 import zenhan
11 import chdb
12 import auto_rec
13 import recdb
14 import rec10d
15 import n_gram
16 import recdblist
17 import epgdb
18 import status
19 def getText(elm):
20     nodelist = elm.childNodes
21     rc = ""
22     # 全てのノードに対して
23     for node in nodelist:
24         # テキストノードなら値を取得
25         if node.nodeType == node.TEXT_NODE:
26             rc = rc + node.data
27     return rc
28 def getText_item(obj):
29     rc=""
30     for o in obj:
31         rc=rc+getText(o)
32     return rc
33 def writemultitvDB(bctype,tvlists):
34     rec10d.rec10db.new_epg_timeline(bctype)
35     for channel,start,stop,title,desc,longdesc,category in tvlists:
36         rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
37 def writemultichDB(chlists):
38     for ontv,dn in chlists:
39         rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
40 def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
41     dtb=datetime.datetime.now()
42     dom=xml.dom.minidom.parse(file(xmlpath))
43     chlist=[]
44     tvlist=[]
45     bayes={}
46     rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01  exp:「#01説明」をタイトルに含める
47     for ch in dom.getElementsByTagName('channel'):
48         ontv=ch.getAttribute("id")
49         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
50         chlist.append([ontv,chname])
51     for tv in dom.getElementsByTagName('programme'):
52         channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
53         start=tv.getAttribute("start").replace(" +0900", "")
54         stop=tv.getAttribute("stop").replace(" +0900", "")
55         title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")
56         desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
57         longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
58         category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
59         title=addTitle_Subtitle(rHisch,1,title,desc)
60         tch=chdb.ontvsearch(channel)
61         if tch!=None:
62             if tch.get('bctype',"")==bctype:
63                 bt=bayes.get(tch['chtxt'],["","",0])
64                 bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
65                 bayes[tch['chtxt']]=bt2
66                 tvlist.append([channel,start,stop,title,desc,longdesc,category])
67                 p=auto_rec.calc_key(tch['chtxt'], title,desc+" "+longdesc)
68                 if p>2000:
69                     bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")
70                     bttt=bttt-datetime.timedelta(seconds=1200)
71                     bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")
72                     ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")
73                     ettt=ettt+datetime.timedelta(seconds=1200)
74                     ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")
75                     chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])
76                     p2=0
77                     if len(chs)>0:
78                         for ch in chs:
79                             p2t=n_gram.trigram(ch['title'],title)
80                             if p2t>p2:
81                                 p2=p2t
82                     if p2<350:
83                         if status.getSettings_auto_bayes()==1:
84                             if p2<200:
85                                 maxnum=0
86                                 if len(ch['ch'])>2:
87                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]
88                                     maxnum=int(configreader.getenv("bscs_max"))-maxnum
89                                 else:
90                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]
91                                     maxnum=int(configreader.getenv("te_max"))-maxnum
92                                 if maxnum>0:
93                                     topt=status.getSettings_auto_opt()
94                                     recdb.recreserv(title,tch['chtxt'], bttime, ettime,topt)
95                         else:
96                             recdb.add_auto_bayes(tch['chtxt'],title,start,stop,p)
97
98     dom.unlink()
99     if len(chlist) > 0:
100         writemultichDB(chlist)
101         chlist = []
102         if len(tvlist) > 0:
103             writemultitvDB(bctype,tvlist)
104         tvlist = []
105         for ct,list in bayes.items():
106             auto_rec.update_recall(ct,list[0],list[1],list[2])
107             auto_rec.update_recall("ALL",list[0],list[1],list[2])
108     dtb=datetime.datetime.now()-dtb
109     recdblist.printutf8(bctype + u" epg取り出し終了")
110     recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
111 def getChdata(xmlpath):
112     dom=xml.dom.minidom.parse(file(xmlpath))
113     chlist=[]
114     for ch in dom.getElementsByTagName('channel'):
115         ontv=ch.getAttribute("id")
116         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
117         chlist.append([ontv,chname])
118     dom.unlink()
119     if len(chlist)>0:
120         return chlist[0]
121     else:
122         return ["",""]
123 def addTitle_Subtitle(recompiled,num,title,exp):
124     try:
125         tST=recompiled.match(exp)
126         newtitle=u""
127         if tST:
128             newtitle=title+u"『"+tST.group(num)+u"』"
129         else:
130             newtitle=title
131     except Exception, inst:
132         recdblist.Commonlogex("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst))
133         newtitle=title
134     return newtitle