OSDN Git Service

add dist
[rec10/rec10-git.git] / dist / trunk / rec10 / xml2db_dom.py
diff --git a/dist/trunk/rec10/xml2db_dom.py b/dist/trunk/rec10/xml2db_dom.py
new file mode 100755 (executable)
index 0000000..fd41840
--- /dev/null
@@ -0,0 +1,143 @@
+#!/usr/bin/python\r
+# coding: UTF-8\r
+# Rec10 TS Recording Tools\r
+# Copyright (C) 2009-2011 Yukikaze\r
+\r
+import xml.dom.minidom\r
+import datetime\r
+import re\r
+import traceback\r
+\r
+import zenhan\r
+import chdb\r
+import auto_rec\r
+import recdb\r
+import rec10d\r
+import n_gram\r
+import recdblist\r
+import epgdb\r
+import status\r
+def getText(elm):\r
+    nodelist = elm.childNodes\r
+    rc = ""\r
+    # 全てのノードに対して\r
+    for node in nodelist:\r
+        # テキストノードなら値を取得\r
+        if node.nodeType == node.TEXT_NODE:\r
+            rc = rc + node.data\r
+    return rc\r
+def getText_item(obj):\r
+    rc=""\r
+    for o in obj:\r
+        rc=rc+getText(o)\r
+    return rc\r
+def writeMultiTVDB(bctype,tvlists):\r
+    rec10d.rec10db.new_epg_timeline(bctype)\r
+    for channel,start,stop,title,desc,longdesc,category in tvlists:\r
+        rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)\r
+def writeMultiCHDB(chlists):\r
+    for chtxt,dn in chlists:\r
+        rec10d.rec10db.update_chname_by_chtxt_epg_ch(chtxt,dn)\r
+def writeMultiLogoDB(logolists):\r
+    #[type,sv,pngdata]\r
+    for type,sv,pngdata in logolists:\r
+        chtxt=chdb.searchCHFromCsch(sv)['chtxt']\r
+        if len(chtxt)>0:\r
+            rec10d.rec10db.change_logodata_epg_ch(chtxt,type,pngdata)\r
+def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る\r
+    dtb=datetime.datetime.now()\r
+    dom=xml.dom.minidom.parse(file(xmlpath))\r
+    chlist=[]\r
+    tvlist=[]\r
+    bayes={}\r
+    rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01  exp:「#01説明」をタイトルに含める\r
+    for ch in dom.getElementsByTagName('channel'):\r
+        chtxtt=ch.getAttribute("id")\r
+        chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data\r
+        chlist.append([chtxtt,chname])\r
+    for tv in dom.getElementsByTagName('programme'):\r
+        channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))\r
+        start=tv.getAttribute("start").replace(" +0900", "")\r
+        stop=tv.getAttribute("stop").replace(" +0900", "")\r
+        title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")\r
+        desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))\r
+        longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))\r
+        category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))\r
+        title=addTitle_Subtitle(rHisch,1,title,desc)\r
+        tch=chdb.searchCHFromChtxt(channel)\r
+        if tch!=None:\r
+            if tch.get('bctype',"")==bctype and not tch['visible']==0:\r
+                bt=bayes.get(tch['chtxt'],["","",0])\r
+                bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]\r
+                bayes[tch['chtxt']]=bt2\r
+                tvlist.append([channel,start,stop,title,desc,longdesc,category])\r
+                p=auto_rec.calcKey(tch['chtxt'], title,desc+" "+longdesc)\r
+                if p>2000:\r
+                    bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")\r
+                    bttt=bttt-datetime.timedelta(seconds=1200)\r
+                    bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")\r
+                    ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")\r
+                    ettt=ettt+datetime.timedelta(seconds=1200)\r
+                    ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")\r
+                    chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])\r
+                    p2=0\r
+                    if len(chs)>0:\r
+                        for ch in chs:\r
+                            p2t=n_gram.trigram(ch['title'],title)\r
+                            if p2t>p2:\r
+                                p2=p2t\r
+                    if p2<350:\r
+                        if status.getSettings_auto_bayes()==1:\r
+                            if p2<200:\r
+                                maxnum=0\r
+                                if len(ch['ch'])>2:\r
+                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]\r
+                                    maxnum=int(configreader.getConfEnv("bscs_max"))-maxnum\r
+                                else:\r
+                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]\r
+                                    maxnum=int(configreader.getConfEnv("te_max"))-maxnum\r
+                                if maxnum>0:\r
+                                    topt=status.getSettings_auto_opt()\r
+                                    recdb.reserveReckey(recdblist.REC_RESERVE,title,tch['chtxt'], bttime, ettime,topt)\r
+                        else:\r
+                            recdb.addAutoBayesKeyword(tch['chtxt'],title,start,stop,p)\r
+    dom.unlink()\r
+    if len(chlist) > 0:\r
+        writeMultiCHDB(chlist)\r
+        chlist = []\r
+        if len(tvlist) > 0:\r
+            writeMultiTVDB(bctype,tvlist)\r
+        tvlist = []\r
+        for ct,list in bayes.items():\r
+            auto_rec.updateRecall(ct,list[0],list[1],list[2])\r
+            auto_rec.updateRecall("ALL",list[0],list[1],list[2])\r
+    dtb=datetime.datetime.now()-dtb\r
+    recdblist.printutf8(bctype + u" epg取り出し終了")\r
+    recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")\r
+def xml2db_dom_logo(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る\r
+    dtb=datetime.datetime.now()\r
+    dom=xml.dom.minidom.parse(file(xmlpath))\r
+    logolist=[]\r
+    for logo in dom.getElementsByTagName('logo'):\r
+        type=int(logo.getAttribute("type"))\r
+        sv=logo.getAttribute("sv")\r
+        pngdata=logo.childNodes[0].data\r
+        logolist.append([type,sv,pngdata])\r
+    dom.unlink()\r
+    if len(logolist) > 0:\r
+        writeMultiLogoDB(logolist)\r
+    dtb=datetime.datetime.now()-dtb\r
+    recdblist.printutf8(bctype + u" epg取り出し終了")\r
+    recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating logo DB .")\r
+def addTitle_Subtitle(recompiled,num,title,exp):\r
+    try:\r
+        tST=recompiled.match(exp)\r
+        newtitle=u""\r
+        if tST:\r
+            newtitle=title+u"『"+tST.group(num)+u"』"\r
+        else:\r
+            newtitle=title\r
+    except Exception, inst:\r
+        recdblist.addCommonlogEX("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst)+traceback.format_exc(),log_level=200)\r
+        newtitle=title\r
+    return newtitle\r