OSDN Git Service

convert CRLF to LF
[rec10/rec10-git.git] / rec10 / trunk / src / xml2db_dom.py
index b65b189..24c1da7 100644 (file)
-#!/usr/bin/python\r
-# coding: UTF-8\r
-# Rec10 TS Recording Tools\r
-# Copyright (C) 2009-2011 Yukikaze\r
-\r
-import xml.dom.minidom\r
-import datetime\r
-import re\r
-import traceback\r
-\r
-import zenhan\r
-import chdb\r
-import auto_rec\r
-import recdb\r
-import rec10d\r
-import n_gram\r
-import recdblist\r
-import epgdb\r
-import status\r
-def getText(elm):\r
-    nodelist = elm.childNodes\r
-    rc = ""\r
-    # 全てのノードに対して\r
-    for node in nodelist:\r
-        # テキストノードなら値を取得\r
-        if node.nodeType == node.TEXT_NODE:\r
-            rc = rc + node.data\r
-    return rc\r
-def getText_item(obj):\r
-    rc=""\r
-    for o in obj:\r
-        rc=rc+getText(o)\r
-    return rc\r
-def writeMultiTVDB(bctype,tvlists):\r
-    rec10d.rec10db.new_epg_timeline(bctype)\r
-    for channel,start,stop,title,desc,longdesc,category in tvlists:\r
-        rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)\r
-def writeMultiCHDB(chlists):\r
-    for chtxt,dn in chlists:\r
-        rec10d.rec10db.update_chname_by_chtxt_epg_ch(chtxt,dn)\r
-        if dn==None:\r
-            chdb.changeCHShow(chtxt,"0")\r
-        else:\r
-            if len(dn)<1:\r
-                chdb.changeCHShow(chtxt,"0")\r
-def writeMultiLogoDB(logolists):\r
-    #[type,sv,pngdata]\r
-    for type,sv,pngdata in logolists:\r
-        chtxt=chdb.searchCHFromCsch(sv)['chtxt']\r
-        if len(chtxt)>0:\r
-            rec10d.rec10db.change_logodata_epg_ch(chtxt,type,pngdata)\r
-def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る\r
-    dtb=datetime.datetime.now()\r
-    dom=xml.dom.minidom.parse(file(xmlpath))\r
-    chlist=[]\r
-    tvlist=[]\r
-    bayes={}\r
-    rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01  exp:「#01説明」をタイトルに含める\r
-    for ch in dom.getElementsByTagName('channel'):\r
-        chtxtt=ch.getAttribute("id")\r
-        chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data\r
-        chlist.append([chtxtt,chname])\r
-    for tv in dom.getElementsByTagName('programme'):\r
-        channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))\r
-        start=tv.getAttribute("start").replace(" +0900", "")\r
-        stop=tv.getAttribute("stop").replace(" +0900", "")\r
-        title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")\r
-        desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))\r
-        longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))\r
-        category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))\r
-        title=addTitle_Subtitle(rHisch,1,title,desc)\r
-        tch=chdb.searchCHFromChtxt(channel)\r
-        if tch!=None:\r
-            if tch.get('bctype',"")==bctype and not tch['visible']==0:\r
-                bt=bayes.get(tch['chtxt'],["","",0])\r
-                bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]\r
-                bayes[tch['chtxt']]=bt2\r
-                tvlist.append([channel,start,stop,title,desc,longdesc,category])\r
-                p=auto_rec.calcKey(tch['chtxt'], title,desc+" "+longdesc)\r
-                if p>2000:\r
-                    bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")\r
-                    bttt=bttt-datetime.timedelta(seconds=1200)\r
-                    bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")\r
-                    ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")\r
-                    ettt=ettt+datetime.timedelta(seconds=1200)\r
-                    ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")\r
-                    chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])\r
-                    p2=0\r
-                    if len(chs)>0:\r
-                        for ch in chs:\r
-                            p2t=n_gram.trigram(ch['title'],title)\r
-                            if p2t>p2:\r
-                                p2=p2t\r
-                    if p2<350:\r
-                        if status.getSettings_auto_bayes()==1:\r
-                            if p2<200:\r
-                                maxnum=0\r
-                                if len(ch['ch'])>2:\r
-                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]\r
-                                    maxnum=int(configreader.getConfEnv("bscs_max"))-maxnum\r
-                                else:\r
-                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]\r
-                                    maxnum=int(configreader.getConfEnv("te_max"))-maxnum\r
-                                if maxnum>0:\r
-                                    topt=status.getSettings_auto_opt()\r
-                                    recdb.reserveReckey(recdblist.REC_RESERVE,title,tch['chtxt'], bttime, ettime,topt)\r
-                        else:\r
-                            recdb.addAutoBayesKeyword(tch['chtxt'],title,start,stop,p)\r
-    dom.unlink()\r
-    if len(chlist) > 0:\r
-        writeMultiCHDB(chlist)\r
-        chlist = []\r
-        if len(tvlist) > 0:\r
-            writeMultiTVDB(bctype,tvlist)\r
-        tvlist = []\r
-        for ct,list in bayes.items():\r
-            auto_rec.updateRecall(ct,list[0],list[1],list[2])\r
-            auto_rec.updateRecall("ALL",list[0],list[1],list[2])\r
-    dtb=datetime.datetime.now()-dtb\r
-    recdblist.printutf8(bctype + u" epg取り出し終了")\r
-    recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")\r
-def xml2db_dom_logo(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る\r
-    dtb=datetime.datetime.now()\r
-    dom=xml.dom.minidom.parse(file(xmlpath))\r
-    logolist=[]\r
-    for logo in dom.getElementsByTagName('logo'):\r
-        type=int(logo.getAttribute("type"))\r
-        sv=logo.getAttribute("sv")\r
-        pngdata=logo.childNodes[0].data\r
-        logolist.append([type,sv,pngdata])\r
-    dom.unlink()\r
-    if len(logolist) > 0:\r
-        writeMultiLogoDB(logolist)\r
-    dtb=datetime.datetime.now()-dtb\r
-    recdblist.printutf8(bctype + u" epg取り出し終了")\r
-    recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating logo DB .")\r
-def addTitle_Subtitle(recompiled,num,title,exp):\r
-    try:\r
-        tST=recompiled.match(exp)\r
-        newtitle=u""\r
-        if tST:\r
-            newtitle=title+u"『"+tST.group(num)+u"』"\r
-        else:\r
-            newtitle=title\r
-    except Exception, inst:\r
-        recdblist.addCommonlogEX("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst)+traceback.format_exc(),log_level=200)\r
-        newtitle=title\r
-    return newtitle\r
+#!/usr/bin/python
+# coding: UTF-8
+# Rec10 TS Recording Tools
+# Copyright (C) 2009-2011 Yukikaze
+
+import xml.dom.minidom
+import datetime
+import re
+import traceback
+
+import zenhan
+import chdb
+import auto_rec
+import recdb
+import rec10d
+import n_gram
+import recdblist
+import epgdb
+import status
+def getText(elm):
+    nodelist = elm.childNodes
+    rc = ""
+    # 全てのノードに対して
+    for node in nodelist:
+        # テキストノードなら値を取得
+        if node.nodeType == node.TEXT_NODE:
+            rc = rc + node.data
+    return rc
+def getText_item(obj):
+    rc=""
+    for o in obj:
+        rc=rc+getText(o)
+    return rc
+def writeMultiTVDB(bctype,tvlists):
+    rec10d.rec10db.new_epg_timeline(bctype)
+    for channel,start,stop,title,desc,longdesc,category in tvlists:
+        rec10d.rec10db.add_epg_timeline(bctype, channel, start, stop, title, desc, longdesc, category)
+def writeMultiCHDB(chlists):
+    for chtxt,dn in chlists:
+        rec10d.rec10db.update_chname_by_chtxt_epg_ch(chtxt,dn)
+        if dn==None:
+            chdb.changeCHShow(chtxt,"0")
+        else:
+            if len(dn)<1:
+                chdb.changeCHShow(chtxt,"0")
+def writeMultiLogoDB(logolists):
+    #[type,sv,pngdata]
+    for type,sv,pngdata in logolists:
+        chtxt=chdb.searchCHFromCsch(sv)['chtxt']
+        if len(chtxt)>0:
+            rec10d.rec10db.change_logodata_epg_ch(chtxt,type,pngdata)
+def xml2db_dom(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
+    dtb=datetime.datetime.now()
+    dom=xml.dom.minidom.parse(file(xmlpath))
+    chlist=[]
+    tvlist=[]
+    bayes={}
+    rHisch=re.compile(u"『([^『]+)』(.+)\Z")##History CHのタイトル#01  exp:「#01説明」をタイトルに含める
+    for ch in dom.getElementsByTagName('channel'):
+        chtxtt=ch.getAttribute("id")
+        chname=ch.getElementsByTagName("display-name").item(0).childNodes[0].data
+        chlist.append([chtxtt,chname])
+    for tv in dom.getElementsByTagName('programme'):
+        channel=zenhan.toHankaku_ABC123(tv.getAttribute("channel"))
+        start=tv.getAttribute("start").replace(" +0900", "")
+        stop=tv.getAttribute("stop").replace(" +0900", "")
+        title=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("title"))).replace(",", "_")
+        desc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("desc")))
+        longdesc=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("longdesc")))
+        category=zenhan.toHankaku_ABC123(getText_item(tv.getElementsByTagName("category")))
+        title=addTitle_Subtitle(rHisch,1,title,desc)
+        tch=chdb.searchCHFromChtxt(channel)
+        if tch!=None:
+            if tch.get('bctype',"")==bctype and not tch['visible']==0:
+                bt=bayes.get(tch['chtxt'],["","",0])
+                bt2=[bt[0]+title+" ",bt[1]+desc+" "+longdesc+" ",bt[2]+1]
+                bayes[tch['chtxt']]=bt2
+                tvlist.append([channel,start,stop,title,desc,longdesc,category])
+                p=auto_rec.calcKey(tch['chtxt'], title,desc+" "+longdesc)
+                if p>2000:
+                    bttt=datetime.datetime.strptime(start,"%Y%m%d%H%M%S")
+                    bttt=bttt-datetime.timedelta(seconds=1200)
+                    bttime=bttt.strftime("%Y-%m-%d %H:%M:%S")
+                    ettt=datetime.datetime.strptime(stop,"%Y%m%d%H%M%S")
+                    ettt=ettt+datetime.timedelta(seconds=1200)
+                    ettime=ettt.strftime("%Y-%m-%d %H:%M:%S")
+                    chs=rec10d.rec10db.select_bytime_bychtxt_all_timeline(bttime,ettime,tch['chtxt'])
+                    p2=0
+                    if len(chs)>0:
+                        for ch in chs:
+                            p2t=n_gram.trigram(ch['title'],title)
+                            if p2t>p2:
+                                p2=p2t
+                    if p2<350:
+                        if status.getSettings_auto_bayes()==1:
+                            if p2<200:
+                                maxnum=0
+                                if len(ch['ch'])>2:
+                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[1]
+                                    maxnum=int(configreader.getConfEnv("bscs_max"))-maxnum
+                                else:
+                                    maxnum=epgdb.count_schedule_timeline(bttime, ettime)[0]
+                                    maxnum=int(configreader.getConfEnv("te_max"))-maxnum
+                                if maxnum>0:
+                                    topt=status.getSettings_auto_opt()
+                                    recdb.reserveReckey(recdblist.REC_RESERVE,title,tch['chtxt'], bttime, ettime,topt)
+                        else:
+                            recdb.addAutoBayesKeyword(tch['chtxt'],title,start,stop,p)
+    dom.unlink()
+    if len(chlist) > 0:
+        writeMultiCHDB(chlist)
+        chlist = []
+        if len(tvlist) > 0:
+            writeMultiTVDB(bctype,tvlist)
+        tvlist = []
+        for ct,list in bayes.items():
+            auto_rec.updateRecall(ct,list[0],list[1],list[2])
+            auto_rec.updateRecall("ALL",list[0],list[1],list[2])
+    dtb=datetime.datetime.now()-dtb
+    recdblist.printutf8(bctype + u" epg取り出し終了")
+    recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating bayes-auto DB .")
+def xml2db_dom_logo(xmlpath, bctype):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
+    dtb=datetime.datetime.now()
+    dom=xml.dom.minidom.parse(file(xmlpath))
+    logolist=[]
+    for logo in dom.getElementsByTagName('logo'):
+        type=int(logo.getAttribute("type"))
+        sv=logo.getAttribute("sv")
+        pngdata=logo.childNodes[0].data
+        logolist.append([type,sv,pngdata])
+    dom.unlink()
+    if len(logolist) > 0:
+        writeMultiLogoDB(logolist)
+    dtb=datetime.datetime.now()-dtb
+    recdblist.printutf8(bctype + u" epg取り出し終了")
+    recdblist.printutf8(str(dtb.days * 24 * 60 * 60 + dtb.seconds)+u"seconds taken for updating logo DB .")
+def addTitle_Subtitle(recompiled,num,title,exp):
+    try:
+        tST=recompiled.match(exp)
+        newtitle=u""
+        if tST:
+            newtitle=title+u"『"+tST.group(num)+u"』"
+        else:
+            newtitle=title
+    except Exception, inst:
+        recdblist.addCommonlogEX("Error", "addTitle_Subtitle(xml2db_dob.py)", str(type(inst)),str(inst)+traceback.format_exc(),log_level=200)
+        newtitle=title
+    return newtitle