# Copyright (C) 2009 Yukikaze
#import sys
#import os
+import xml.parsers.expat
+import auto_rec
+import chdb
import rec10d
import zenhan
-import xml.parsers.expat
-flags={}
-lastflag=""
-xmldate={}
-db=""
-bctype=""
-def xml2db(xmlpath,dbpath,bctypetemp):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
- global db,bctype
- bctype=bctypetemp
- #print path
- createDB()
- #db.execute('drop table tv')
- #db.execute('drop table ch')
-
- p=xml.parsers.expat.ParserCreate()
- p.buffer_text=True
- p.StartElementHandler=start_element
- p.EndElementHandler=end_element
- p.CharacterDataHandler=char_data
- parseDocument(p,xmlpath)
-
-def createDB():
+import datetime
+import recdb
+flags = {}
+lastflag = ""
+xmldate = {}
+db = ""
+bctype = ""
+chlists = []
+tvlists = []
+bayes={}
+dtb=""
+def xml2db(xmlpath, bctypetemp):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
+ global db, bctype
+ bctype = bctypetemp
+ p = xml.parsers.expat.ParserCreate()
+ p.buffer_text = True
+ p.StartElementHandler = start_element
+ p.EndElementHandler = end_element
+ p.CharacterDataHandler = char_data
+ parseDocument(p, xmlpath)
+def writemultitvDB(tvlists):
global bctype
- rec10d.rec10db.new_tv(bctype)
- rec10d.rec10db.new_ch(bctype)
-def writetvDB(bctype,channel,start,stop,title,desc):#tvdateは(放送種別,チャンネル,start,stop,title,desc)の形式
- rec10d.rec10db.add_tv(bctype,channel,start,stop,title,desc)
-def writechDB(bctype,id,display):#tvdateは(チャンネル,display)の形式
- rec10d.rec10db.add_ch(bctype,id,display)
-def parseDocument(p,xmlfile):
- f=open(xmlfile,'r')
+ rec10d.rec10db.new_epg_timeline(bctype)
+ rec10d.rec10db.add_multi_epg_timeline(tvlists)
+def writemultichDB(chlists):
+ for bctype,ontv,dn in chlists.items():
+ rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
+def parseDocument(p, xmlfile):
+ f = open(xmlfile, 'r')
p.ParseFile(f)
f.close()
-def start_element(name,attrs):
- global flags,lastflag,xmldate
- flags[name]=1
- lastflag=name
- for (key,val) in attrs.iteritems():
- xmldate[name+":"+key]=val
+def start_element(name, attrs):
+ global flags, lastflag, xmldate,dtb
+ flags[name] = 1
+ lastflag = name
+ for (key, val) in attrs.iteritems():
+ xmldate[name + ":" + key] = val
+ dtb=datetime.datetime.now()
def end_element(name):
- global flags,lastflag,xmldate,db
- if flags[name]==1:
- if name=="channel":
- ubctype=unicode(bctype)
- uid=unicode(xmldate['channel:id'])
- udn=unicode(xmldate['display-name'])
- writechDB(bctype,uid,udn)
- xmldate['channel:id']=""
- xmldate['display-name']=""
- elif name=="programme":
- ubctype=unicode(bctype)
- uch=unicode(xmldate['programme:channel'])
- ustart=unicode(xmldate['programme:start'].replace(" +0900",""))
- ustop=unicode(xmldate['programme:stop'].replace(" +0900",""))
- utitle=unicode(xmldate['title'].replace(",","-"))
- udesc=unicode(xmldate['desc'])
- writetvDB(ubctype,uch,ustart,ustop,utitle,udesc)
- #print xmld
- xmldate['title']=""
- xmldate['desc']=""
- #xmldate[]
- if name=="tv":
- """
- r=db.execute("SELECT * FROM tv")
- for row in r:
- print row
- """
- print bctype+" epg取り出し終了"
- flags[name]=0
+ global flags, lastflag, xmldate, db, chlists, tvlists ,bayes,dtb
+ if flags[name] == 1:
+ if name == "channel":
+ ubctype = unicode(bctype)
+ uid = unicode(xmldate['channel:id'])
+ udn = unicode(xmldate['display-name'])
+ #print uid
+ bcsearch = None
+ bs = chdb.ontvsearch(xmldate['channel:id'])
+ if bs != None:
+ #print bcsearch
+ bcsearch = bs['bctype']
+ if ((bcsearch != None) and (bcsearch == bctype)):
+ chlists.append([bctype, uid, udn])
+ xmldate['channel:id'] = ""
+ xmldate['display-name'] = ""
+ elif name == "programme":
+ ubctype = unicode(bctype)
+ uch = unicode(xmldate['programme:channel'])
+ ustart = unicode(xmldate['programme:start'].replace(" +0900", ""))
+ ustop = unicode(xmldate['programme:stop'].replace(" +0900", ""))
+ utitle = unicode(xmldate['title'].replace(",", "-"))
+ udesc = unicode(xmldate['desc'])
+ ulongdesc = unicode(xmldate['longdesc'])
+ ucategory = unicode(xmldate['category'])
+ tch=chdb.ontvsearch(uch)
+ if tch!=None:
+ if tch.get('bctype',"")==ubctype:
+ bt=bayes.get(tch['chtxt'],["","",0])
+ bt2=[bt[0]+utitle+" ",bt[1]+udesc+" "+ulongdesc+" ",bt[2]+1]
+ bayes[tch['chtxt']]=bt2
+ tvlists.append([ubctype, uch, ustart, ustop, utitle, udesc,ulongdesc,ucategory])
+ if auto_rec.calc_key(tch['chtxt'], utitle, udesc+" "+ulongdesc):
+ recdb.rec_reckey(recdb.REC_BAYES_SUGGEST, utitle, tch['chtxt'], ustart,ustop,"")
+ xmldate['title'] = ""
+ xmldate['desc'] = ""
+ xmldate['longdesc'] = ""
+ xmldate['category'] =""
+ if name == "tv":
+ if len(chlists) > 0:
+ writemultichDB(chlists)
+ chlists = []
+ if len(tvlists) > 0:
+ writemultitvDB(tvlists)
+ tvlists = []
+ for ct,list in bayes.items():
+ auto_rec.update_recall(ct,list[0],list[1],list[2])
+ auto_rec.update_recall("ALL",list[0],list[1],list[2])
+ dtb=datetime.datetime.now()-dtb
+ print bctype + " epg取り出し終了"
+ print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"seconds taken for updating bayes-auto DB ."
+ flags[name] = 0
def char_data(data):
- global flags,lastflag,xmldate
- data=data.replace("\n","")
- data=data.replace(" ","")
- data=data.replace("\"","")
- data=data.replace("\'","")
+ global flags, lastflag, xmldate
+ data = data.replace("\n", "")
+ data = data.replace(" ", "")
+ data = data.replace("\"", "")
+ data = data.replace("\'", "")
try:
- datat=zenhan.toHankaku(data)
+ datat = zenhan.toHankaku_ABC123(data)
except:
- datat=data
- data=datat.encode('utf-8')
+ datat = data
+ data = datat.encode('utf-8')
if xmldate.has_key(lastflag):
- if xmldate[lastflag]!=data:
- xmldate[lastflag]+=data
+ if xmldate[lastflag] != data:
+ xmldate[lastflag] += data
else:
- xmldate[lastflag]=data
-
-
-
-
+ xmldate[lastflag] = data