convert CRLF to LF

[rec10/rec10-git.git] / rec10 / trunk / src / xml2db_dom.py
diff --git a/rec10/trunk/src/xml2db_dom.py b/rec10/trunk/src/xml2db_dom.py

index 5059c06..24c1da7 100644 (file)
--- a/rec10/trunk/src/xml2db_dom.py
+++ b/rec10/trunk/src/xml2db_dom.py
@@ -1,14 +1,22 @@
  #!/usr/bin/python
  # coding: UTF-8
  # Rec10 TS Recording Tools
-# Copyright (C) 2009 Yukikaze
+# Copyright (C) 2009-2011 Yukikaze
  
  import xml.dom.minidom
+import datetime
+import re
+import traceback
+
  import zenhan
  import chdb
  import auto_rec
  import recdb
-
+import rec10d
+import n_gram
+import recdblist
+import epgdb
+import status
  def getText(elm):
      nodelist = elm.childNodes
      rc = ""
@@ -23,23 +31,35 @@ def getText_item(obj):
      for o in obj:
          rc=rc+getText(o)
      return rc
-def writemultitvDB(bctype,tvlists):
+def writeMultiTVDB(bctype,tvlists):
      rec10d.rec10db.new_epg_timeline(bctype)
      for channel,start,stop,title,desc,longdesc,category in tvlists:
          rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
-def writemultichDB(bctype,chlists):
-    for ontv,dn in chlists:
-        rec10d.rec10db.update_chname_by_ontv_epg_ch(ontv, dn)
+def writeMultiCHDB(chlists):
+    for chtxt,dn in chlists:
+        rec10d.rec10db.update_chname_by_chtxt_epg_ch(chtxt,dn)
+        if dn==None:
+            chdb.changeCHShow(chtxt,"0")
+        else:
+            if len(dn)<1:
+                chdb.changeCHShow(chtxt,"0")
+def writeMultiLogoDB(logolists):
+    #[type,sv,pngdata]
+    for type,sv,pngdata in logolists:
+        chtxt=chdb.searchCHFromCsch(sv)['chtxt']
+        if len(chtxt)>0:
+            rec10d.rec10db.change_logodata_epg_ch(chtxt,type,pngdata)
  def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
      dtb=datetime.datetime.now()
      dom=xml.dom.minidom.parse(file(xmlpath))
      chlist=[]
      tvlist=[]
      bayes={}
+    rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01  exp:「#01説明」をタイトルに含める
      for ch in dom.getElementsByTagName('channel'):
-        ontv=ch.getAttribute("id")
+        chtxtt=ch.getAttribute("id")
          chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
-        chlist.append([ontv,chname])
+        chlist.append([chtxtt,chname])
      for tv in dom.getElementsByTagName('programme'):
          channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
          start=tv.getAttribute("start").replace(" +0900", "")
@@ -48,38 +68,81 @@ def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSが
          desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
          longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
          category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
-        tch=chdb.ontvsearch(channel)
+        title=addTitle_Subtitle(rHisch,1,title,desc)
+        tch=chdb.searchCHFromChtxt(channel)
          if tch!=None:
-            if tch.get('bctype',"")==ubctype:
+            if tch.get('bctype',"")==bctype and not tch['visible']==0:
                  bt=bayes.get(tch['chtxt'],["","",0])
                  bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
                  bayes[tch['chtxt']]=bt2
                  tvlist.append([channel,start,stop,title,desc,longdesc,category])
-                p=auto_rec.calc_key(tch['chtxt'], title,desc+" "+longdesc)
+                p=auto_rec.calcKey(tch['chtxt'], title,desc+" "+longdesc)
                  if p>2000:
-                    recdb.add_auto_bayes(tch['chtxt'],title,start,stop,p)
+                    bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")
+                    bttt=bttt-datetime.timedelta(seconds=1200)
+                    bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")
+                    ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")
+                    ettt=ettt+datetime.timedelta(seconds=1200)
+                    ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")
+                    chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])
+                    p2=0
+                    if len(chs)>0:
+                        for ch in chs:
+                            p2t=n_gram.trigram(ch['title'],title)
+                            if p2t>p2:
+                                p2=p2t
+                    if p2<350:
+                        if status.getSettings_auto_bayes()==1:
+                            if p2<200:
+                                maxnum=0
+                                if len(ch['ch'])>2:
+                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]
+                                    maxnum=int(configreader.getConfEnv("bscs_max"))-maxnum
+                                else:
+                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]
+                                    maxnum=int(configreader.getConfEnv("te_max"))-maxnum
+                                if maxnum>0:
+                                    topt=status.getSettings_auto_opt()
+                                    recdb.reserveReckey(recdblist.REC_RESERVE,title,tch['chtxt'], bttime, ettime,topt)
+                        else:
+                            recdb.addAutoBayesKeyword(tch['chtxt'],title,start,stop,p)
      dom.unlink()
      if len(chlist) > 0:
-        writemultichDB(chlist)
+        writeMultiCHDB(chlist)
          chlist = []
          if len(tvlist) > 0:
-            writemultitvDB(tvlist)
+            writeMultiTVDB(bctype,tvlist)
          tvlist = []
          for ct,list in bayes.items():
-            auto_rec.update_recall(ct,list[0],list[1],list[2])
-            auto_rec.update_recall("ALL",list[0],list[1],list[2])
+            auto_rec.updateRecall(ct,list[0],list[1],list[2])
+            auto_rec.updateRecall("ALL",list[0],list[1],list[2])
      dtb=datetime.datetime.now()-dtb
      recdblist.printutf8(bctype + u" epg取り出し終了")
      recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
-def getChdata(xmlpath):
+def xml2db_dom_logo(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
+    dtb=datetime.datetime.now()
      dom=xml.dom.minidom.parse(file(xmlpath))
-    chlist=[]
-    for ch in dom.getElementsByTagName('channel'):
-        ontv=ch.getAttribute("id")
-        chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
-        chlist.append([ontv,chname])
+    logolist=[]
+    for logo in dom.getElementsByTagName('logo'):
+        type=int(logo.getAttribute("type"))
+        sv=logo.getAttribute("sv")
+        pngdata=logo.childNodes[0].data
+        logolist.append([type,sv,pngdata])
      dom.unlink()
-    if len(chlist)>0:
-        return chlist[0]
-    else:
-        return ["",""]
-\ No newline at end of file
+    if len(logolist) > 0:
+        writeMultiLogoDB(logolist)
+    dtb=datetime.datetime.now()-dtb
+    recdblist.printutf8(bctype + u" epg取り出し終了")
+    recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating logo DB .")
+def addTitle_Subtitle(recompiled,num,title,exp):
+    try:
+        tST=recompiled.match(exp)
+        newtitle=u""
+        if tST:
+            newtitle=title+u"『"+tST.group(num)+u"』"
+        else:
+            newtitle=title
+    except Exception, inst:
+        recdblist.addCommonlogEX("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst)+traceback.format_exc(),log_level=200)
+        newtitle=title
+    return newtitle