rec10/trunk/src/xml2db_dom.py

   1 #!/usr/bin/python
   2 # coding: UTF-8
   3 # Rec10 TS Recording Tools
   4 # Copyright (C) 2009-2010 Yukikaze
   5
   6 import xml.dom.minidom
   7 import datetime
   8 import re
   9
  10 import zenhan
  11 import chdb
  12 import auto_rec
  13 import recdb
  14 import rec10d
  15 import n_gram
  16 import recdblist
  17 import epgdb
  18 import status
  19 def getText(elm):
  20     nodelist = elm.childNodes
  21     rc = ""
  22     # 全てのノードに対して
  23     for node in nodelist:
  24         # テキストノードなら値を取得
  25         if node.nodeType == node.TEXT_NODE:
  26             rc = rc + node.data
  27     return rc
  28 def getText_item(obj):
  29     rc=""
  30     for o in obj:
  31         rc=rc+getText(o)
  32     return rc
  33 def writemultitvDB(bctype,tvlists):
  34     rec10d.rec10db.new_epg_timeline(bctype)
  35     for channel,start,stop,title,desc,longdesc,category in tvlists:
  36         rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
  37 def writemultichDB(chlists):
  38     for ontv,dn in chlists:
  39         rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
  40 def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
  41     dtb=datetime.datetime.now()
  42     dom=xml.dom.minidom.parse(file(xmlpath))
  43     chlist=[]
  44     tvlist=[]
  45     bayes={}
  46     for ch in dom.getElementsByTagName('channel'):
  47         ontv=ch.getAttribute("id")
  48         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
  49         chlist.append([ontv,chname])
  50     for tv in dom.getElementsByTagName('programme'):
  51         channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
  52         start=tv.getAttribute("start").replace(" +0900", "")
  53         stop=tv.getAttribute("stop").replace(" +0900", "")
  54         title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")
  55         desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
  56         longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
  57         category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
  58         title=addTitle_Subtitle(title,desc)
  59         tch=chdb.ontvsearch(channel)
  60         if tch!=None:
  61             if tch.get('bctype',"")==bctype:
  62                 bt=bayes.get(tch['chtxt'],["","",0])
  63                 bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
  64                 bayes[tch['chtxt']]=bt2
  65                 tvlist.append([channel,start,stop,title,desc,longdesc,category])
  66                 p=auto_rec.calc_key(tch['chtxt'], title,desc+" "+longdesc)
  67                 if p>2000:
  68                     bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")
  69                     bttt=bttt-datetime.timedelta(seconds=1200)
  70                     bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")
  71                     ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")
  72                     ettt=ettt+datetime.timedelta(seconds=1200)
  73                     ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")
  74                     chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])
  75                     p2=0
  76                     if len(chs)>0:
  77                         for ch in chs:
  78                             p2t=n_gram.trigram(ch['title'],title)
  79                             if p2t>p2:
  80                                 p2=p2t
  81                     if p2<350:
  82                         if status.getSettings_auto_bayes()==1:
  83                             if p2<200:
  84                                 maxnum=0
  85                                 if len(ch['ch'])>2:
  86                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]
  87                                     maxnum=int(configreader.getenv("bscs_max"))-maxnum
  88                                 else:
  89                                     maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]
  90                                     maxnum=int(configreader.getenv("te_max"))-maxnum
  91                                 if maxnum>0:
  92                                     topt=status.getSettings_auto_opt()
  93                                     recdb.recreserv(title,tch['chtxt'], bttime, ettime,topt)
  94                         else:
  95                             recdb.add_auto_bayes(tch['chtxt'],title,start,stop,p)
  96
  97     dom.unlink()
  98     if len(chlist) > 0:
  99         writemultichDB(chlist)
 100         chlist = []
 101         if len(tvlist) > 0:
 102             writemultitvDB(bctype,tvlist)
 103         tvlist = []
 104         for ct,list in bayes.items():
 105             auto_rec.update_recall(ct,list[0],list[1],list[2])
 106             auto_rec.update_recall("ALL",list[0],list[1],list[2])
 107     dtb=datetime.datetime.now()-dtb
 108     recdblist.printutf8(bctype + u" epg取り出し終了")
 109     recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
 110 def getChdata(xmlpath):
 111     dom=xml.dom.minidom.parse(file(xmlpath))
 112     chlist=[]
 113     for ch in dom.getElementsByTagName('channel'):
 114         ontv=ch.getAttribute("id")
 115         chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
 116         chlist.append([ontv,chname])
 117     dom.unlink()
 118     if len(chlist)>0:
 119         return chlist[0]
 120     else:
 121         return ["",""]
 122 def addTitle_Subtitle(title,exp):
 123     try:
 124         rST=re.compile(u"『(.+)』(.+)\Z")##History CHのタイトル#01  exp:「#01説明」をタイトルに含める
 125         tST=rST.match(exp)
 126         newtitle=u""
 127         if tST:
 128             newtitle=title+u"『"+tST.group(0)+u"』"
 129         else:
 130             newtitle=title
 131     except Exception, inst:
 132         recdblist.Commonlogex("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst))
 133         newtitle=title
 134     return newtitle