X-Git-Url: http://git.osdn.net/view?p=rec10%2Frec10-git.git;a=blobdiff_plain;f=rec10%2Ftrunk%2Fsrc%2Fxml2db_dom.py;h=b65b189a4aa0887f8a30568fc33dff0c34210000;hp=a97615248809e9d584aa0e4ae7a526146c6bedbf;hb=56687f4ee031ab76f503394026148403d45421c9;hpb=a5b762a6fd96b7aee1356c2f3da640194f4c88ea diff --git a/rec10/trunk/src/xml2db_dom.py b/rec10/trunk/src/xml2db_dom.py index a976152..b65b189 100644 --- a/rec10/trunk/src/xml2db_dom.py +++ b/rec10/trunk/src/xml2db_dom.py @@ -1,134 +1,148 @@ -#!/usr/bin/python -# coding: UTF-8 -# Rec10 TS Recording Tools -# Copyright (C) 2009-2010 Yukikaze - -import xml.dom.minidom -import datetime -import re - -import zenhan -import chdb -import auto_rec -import recdb -import rec10d -import n_gram -import recdblist -import epgdb -import status -def getText(elm): - nodelist = elm.childNodes - rc = "" - # 全てのノードに対して - for node in nodelist: - # テキストノードなら値を取得 - if node.nodeType == node.TEXT_NODE: - rc = rc + node.data - return rc -def getText_item(obj): - rc="" - for o in obj: - rc=rc+getText(o) - return rc -def writemultitvDB(bctype,tvlists): - rec10d.rec10db.new_epg_timeline(bctype) - for channel,start,stop,title,desc,longdesc,category in tvlists: - rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category) -def writemultichDB(chlists): - for ontv,dn in chlists: - rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn) -def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る - dtb=datetime.datetime.now() - dom=xml.dom.minidom.parse(file(xmlpath)) - chlist=[] - tvlist=[] - bayes={} - rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01 exp:「#01説明」をタイトルに含める - for ch in dom.getElementsByTagName('channel'): - ontv=ch.getAttribute("id") - chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data - chlist.append([ontv,chname]) - for tv in dom.getElementsByTagName('programme'): - channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel")) - start=tv.getAttribute("start").replace(" +0900", "") - stop=tv.getAttribute("stop").replace(" +0900", "") - title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_") - desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc"))) - longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc"))) - category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category"))) - title=addTitle_Subtitle(rHisch,1,title,desc) - tch=chdb.ontvsearch(channel) - if tch!=None: - if tch.get('bctype',"")==bctype: - bt=bayes.get(tch['chtxt'],["","",0]) - bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1] - bayes[tch['chtxt']]=bt2 - tvlist.append([channel,start,stop,title,desc,longdesc,category]) - p=auto_rec.calc_key(tch['chtxt'], title,desc+" "+longdesc) - if p>2000: - bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S") - bttt=bttt-datetime.timedelta(seconds=1200) - bttime=bttt.strftime("%Y-%m-%d %H:%M:%S") - ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S") - ettt=ettt+datetime.timedelta(seconds=1200) - ettime=ettt.strftime("%Y-%m-%d %H:%M:%S") - chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt']) - p2=0 - if len(chs)>0: - for ch in chs: - p2t=n_gram.trigram(ch['title'],title) - if p2t>p2: - p2=p2t - if p2<350: - if status.getSettings_auto_bayes()==1: - if p2<200: - maxnum=0 - if len(ch['ch'])>2: - maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1] - maxnum=int(configreader.getenv("bscs_max"))-maxnum - else: - maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0] - maxnum=int(configreader.getenv("te_max"))-maxnum - if maxnum>0: - topt=status.getSettings_auto_opt() - recdb.recreserv(title,tch['chtxt'], bttime, ettime,topt) - else: - recdb.add_auto_bayes(tch['chtxt'],title,start,stop,p) - - dom.unlink() - if len(chlist) > 0: - writemultichDB(chlist) - chlist = [] - if len(tvlist) > 0: - writemultitvDB(bctype,tvlist) - tvlist = [] - for ct,list in bayes.items(): - auto_rec.update_recall(ct,list[0],list[1],list[2]) - auto_rec.update_recall("ALL",list[0],list[1],list[2]) - dtb=datetime.datetime.now()-dtb - recdblist.printutf8(bctype + u" epg取り出し終了") - recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .") -def getChdata(xmlpath): - dom=xml.dom.minidom.parse(file(xmlpath)) - chlist=[] - for ch in dom.getElementsByTagName('channel'): - ontv=ch.getAttribute("id") - chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data - chlist.append([ontv,chname]) - dom.unlink() - if len(chlist)>0: - return chlist[0] - else: - return ["",""] -def addTitle_Subtitle(recompiled,num,title,exp): - try: - tST=recompiled.match(exp) - newtitle=u"" - if tST: - newtitle=title+u"『"+tST.group(num)+u"』" - else: - newtitle=title - except Exception, inst: - recdblist.Commonlogex("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst),log_level=200) - newtitle=title - return newtitle \ No newline at end of file +#!/usr/bin/python +# coding: UTF-8 +# Rec10 TS Recording Tools +# Copyright (C) 2009-2011 Yukikaze + +import xml.dom.minidom +import datetime +import re +import traceback + +import zenhan +import chdb +import auto_rec +import recdb +import rec10d +import n_gram +import recdblist +import epgdb +import status +def getText(elm): + nodelist = elm.childNodes + rc = "" + # 全てのノードに対して + for node in nodelist: + # テキストノードなら値を取得 + if node.nodeType == node.TEXT_NODE: + rc = rc + node.data + return rc +def getText_item(obj): + rc="" + for o in obj: + rc=rc+getText(o) + return rc +def writeMultiTVDB(bctype,tvlists): + rec10d.rec10db.new_epg_timeline(bctype) + for channel,start,stop,title,desc,longdesc,category in tvlists: + rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category) +def writeMultiCHDB(chlists): + for chtxt,dn in chlists: + rec10d.rec10db.update_chname_by_chtxt_epg_ch(chtxt,dn) + if dn==None: + chdb.changeCHShow(chtxt,"0") + else: + if len(dn)<1: + chdb.changeCHShow(chtxt,"0") +def writeMultiLogoDB(logolists): + #[type,sv,pngdata] + for type,sv,pngdata in logolists: + chtxt=chdb.searchCHFromCsch(sv)['chtxt'] + if len(chtxt)>0: + rec10d.rec10db.change_logodata_epg_ch(chtxt,type,pngdata) +def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る + dtb=datetime.datetime.now() + dom=xml.dom.minidom.parse(file(xmlpath)) + chlist=[] + tvlist=[] + bayes={} + rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01 exp:「#01説明」をタイトルに含める + for ch in dom.getElementsByTagName('channel'): + chtxtt=ch.getAttribute("id") + chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data + chlist.append([chtxtt,chname]) + for tv in dom.getElementsByTagName('programme'): + channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel")) + start=tv.getAttribute("start").replace(" +0900", "") + stop=tv.getAttribute("stop").replace(" +0900", "") + title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_") + desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc"))) + longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc"))) + category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category"))) + title=addTitle_Subtitle(rHisch,1,title,desc) + tch=chdb.searchCHFromChtxt(channel) + if tch!=None: + if tch.get('bctype',"")==bctype and not tch['visible']==0: + bt=bayes.get(tch['chtxt'],["","",0]) + bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1] + bayes[tch['chtxt']]=bt2 + tvlist.append([channel,start,stop,title,desc,longdesc,category]) + p=auto_rec.calcKey(tch['chtxt'], title,desc+" "+longdesc) + if p>2000: + bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S") + bttt=bttt-datetime.timedelta(seconds=1200) + bttime=bttt.strftime("%Y-%m-%d %H:%M:%S") + ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S") + ettt=ettt+datetime.timedelta(seconds=1200) + ettime=ettt.strftime("%Y-%m-%d %H:%M:%S") + chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt']) + p2=0 + if len(chs)>0: + for ch in chs: + p2t=n_gram.trigram(ch['title'],title) + if p2t>p2: + p2=p2t + if p2<350: + if status.getSettings_auto_bayes()==1: + if p2<200: + maxnum=0 + if len(ch['ch'])>2: + maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1] + maxnum=int(configreader.getConfEnv("bscs_max"))-maxnum + else: + maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0] + maxnum=int(configreader.getConfEnv("te_max"))-maxnum + if maxnum>0: + topt=status.getSettings_auto_opt() + recdb.reserveReckey(recdblist.REC_RESERVE,title,tch['chtxt'], bttime, ettime,topt) + else: + recdb.addAutoBayesKeyword(tch['chtxt'],title,start,stop,p) + dom.unlink() + if len(chlist) > 0: + writeMultiCHDB(chlist) + chlist = [] + if len(tvlist) > 0: + writeMultiTVDB(bctype,tvlist) + tvlist = [] + for ct,list in bayes.items(): + auto_rec.updateRecall(ct,list[0],list[1],list[2]) + auto_rec.updateRecall("ALL",list[0],list[1],list[2]) + dtb=datetime.datetime.now()-dtb + recdblist.printutf8(bctype + u" epg取り出し終了") + recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .") +def xml2db_dom_logo(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る + dtb=datetime.datetime.now() + dom=xml.dom.minidom.parse(file(xmlpath)) + logolist=[] + for logo in dom.getElementsByTagName('logo'): + type=int(logo.getAttribute("type")) + sv=logo.getAttribute("sv") + pngdata=logo.childNodes[0].data + logolist.append([type,sv,pngdata]) + dom.unlink() + if len(logolist) > 0: + writeMultiLogoDB(logolist) + dtb=datetime.datetime.now()-dtb + recdblist.printutf8(bctype + u" epg取り出し終了") + recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating logo DB .") +def addTitle_Subtitle(recompiled,num,title,exp): + try: + tST=recompiled.match(exp) + newtitle=u"" + if tST: + newtitle=title+u"『"+tST.group(num)+u"』" + else: + newtitle=title + except Exception, inst: + recdblist.addCommonlogEX("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst)+traceback.format_exc(),log_level=200) + newtitle=title + return newtitle