OSDN Git Service

use n-gram to search key schedule.
authorgn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
Sat, 18 Jul 2009 06:21:35 +0000 (06:21 +0000)
committergn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
Sat, 18 Jul 2009 06:21:35 +0000 (06:21 +0000)
git-svn-id: svn+ssh://svn.sourceforge.jp/svnroot/rec10@71 4e526526-5e11-4fc0-8910-f8fd03428081

rec10/trunk/src/epgdb.py
rec10/trunk/src/n_gram.py [new file with mode: 0644]
rec10/trunk/src/recdb.py
rec10/trunk/src/timerec.py
rec10/trunk/src/ts2epg.py
rec10/trunk/src/zenhan.py [new file with mode: 0644]

index 40850ef..6185824 100644 (file)
@@ -3,11 +3,13 @@
 # Rec10 TS Recording Tools
 # Copyright (C) 2009 Yukikaze
 import os
+import time
 import sqlite3
 import chdb
 import datetime
 import ts2epg
 import xml2db
+import n_gram
 path=str(os.path.dirname(os.path.abspath(__file__)))+"/"
 tmppath=path
 def update(chtxt):
@@ -79,6 +81,63 @@ def searchtime(titletxt,time,deltatime):#時間以内のものだけを表示 de
         dt=abs(dt)
         if dt<deltatime*60*60:
             ret.append(chtxt+","+title.encode('utf-8')+","+btime+","+etime)
+            print title
     db.close()
     return ret
+def searchtime2(titletxt,timet,deltatime,chtxt):
+    time=datetime.datetime.strptime(timet,"%Y-%m-%d %H:%M:%S")
+    dbpath=path+"ch.db"
+    db=sqlite3.connect(dbpath)
+    #print dbpath
+    ret=[]
+    deltatime=int(deltatime)
+    beforetime=time.strftime("%Y%m%d%H%M%S")
+    aftertime=time+datetime.timedelta(hours=deltatime)
+    aftertime=aftertime.strftime("%Y%m%d%H%M%S")
+    dbexe="SELECT chdata.chtxt,title,start,stop FROM tv INNER JOIN chdata WHERE chdata.ontv=tv.channel AND start >= \""+beforetime+"\" AND start <= \""+aftertime+"\" AND chdata.chtxt=\""+chtxt+"\""
+    #print dbexe
+    one={}
+    tempt=0
+    for chtxtt, title, start, stop in db.execute(dbexe):
+        """btime=start.replace(" +0900","")
+        btime=btime[0:4]+"-"+btime[4:6]+"-"+btime[6:8]+" "+btime[8:10]+":"+btime[10:12]+":00"
+        etime=stop.replace(" +0900","")
+        etime=etime[0:4]+"-"+etime[4:6]+"-"+etime[6:8]+" "+etime[8:10]+":"+etime[10:12]+":00"
+        bt=datetime.datetime.strptime(btime,"%Y-%m-%d %H:%M:%S")
+        t=datetime.datetime.strptime(time,"%Y-%m-%d %H:%M:%S")
+        try:
+            chtxt=chdb.ontvsearch(ch)['chtxt']
+        except:
+            chtxt="Unknown"
+            #print ch
+        dt=bt-t
+        dt=dt.days*24*60*60+dt.seconds
+        dt=abs(dt)
+        if dt<deltatime*60*60:
+            ret.append(chtxt+","+title.encode('utf-8')+","+btime+","+etime)
 
+            """
+        ret=[]
+        p=n_gram.bigram(titletxt,title)
+        btime=start
+        btime=btime[0:4]+"-"+btime[4:6]+"-"+btime[6:8]+" "+btime[8:10]+":"+btime[10:12]+":00"
+        etime=stop
+        etime=etime[0:4]+"-"+etime[4:6]+"-"+etime[6:8]+" "+etime[8:10]+":"+etime[10:12]+":00"
+        bt=datetime.datetime.strptime(btime,"%Y-%m-%d %H:%M:%S")
+        et=datetime.datetime.strptime(etime,"%Y-%m-%d %H:%M:%S")
+        dt=bt-time
+        dt=24*60*60*dt.days+dt.seconds+1
+        p=p*90
+        dt=1000-1000*dt/(7*24*60*60)
+        if p>0:
+            p=p+dt
+            print dt
+        if p>tempt:
+            one=chtxtt+","+title.encode('utf-8')+","+btime+","+etime
+            tempt=p
+        if p>0:
+            print title+":"+str(p)+"点"
+    db.close()
+    ret.append(one)
+    print ret
+    return ret
diff --git a/rec10/trunk/src/n_gram.py b/rec10/trunk/src/n_gram.py
new file mode 100644 (file)
index 0000000..75ef09c
--- /dev/null
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+# coding: UTF-8
+# Rec10 TS Recording Tools
+# Copyright (C) 2009 Yukikaze
+import zenhan
+def bigram(str1,str2):
+    str1=zenhan.toHankaku(str1)
+    str2=zenhan.toHankaku(str2)
+    """
+    bigramによる単語の近さを計算します。
+    """
+    gram=[]
+    if len(str1)<2:
+        gram.append(str1)
+    else:
+        tmp=str1[0]
+        for x in str1[1:]:
+            gram.append(tmp+x)
+            tmp=x
+    point=0
+    for x in gram:
+        i=find_gram(x, str2)
+        if i>0:
+            i=90+10*i
+        else:
+            i=0
+        point=point+i
+    return point
+def find_gram(gram,s):
+    """
+    s中にあらわれるgramの数を調べる
+    """
+    st=s
+    i=0
+    while st.find(gram)!=-1:
+       i=i+1
+       st=st[st.find(gram)+1:]
+    return i
\ No newline at end of file
index 0c0f3a7..5b6c532 100644 (file)
@@ -152,6 +152,14 @@ def getnow(dhour):
     db.commit()
     db.close()
     return recdatum
+def countRecNow(dhour):
+    d=getnow(dhour)
+    ret=0
+    for i in d:
+        t=i['type']
+        if t=="key" or t=="keyevery" or t=="rec" or t=="ret":
+            ret=ret+1
+    return ret
 def delete_old(dhour):
     """
     delete keys except recdb.REC_MISS_ENCODE and recdb.REC_KEYWORD_EVERY_SOME_DAYS before dhour hours from now.
index 36dbe41..beffca4 100644 (file)
@@ -24,7 +24,10 @@ def task():
     """
     recdb.delete_old("6")
     tasks=recdb.getnow("1")
-    if len(tasks)==0:
+    inum=recdb.countRecNow("1")
+    print "now-rectask"
+    print inum
+    if inum==0:
         update=chdb.update()
         #print "番組表更新処理"
         print update
@@ -77,8 +80,8 @@ def task():
 
         elif task["type"]==recdb.REC_KEYWORD:#"key,"+chtxt+","+keyword+","+btime+","+deltatime+","+opt
             deltatime=task['deltatime']
-            if dt<30*60:
-                recdatum=epgdb.searchtime(title,btime,deltatime)
+            if dt<90*60:
+                recdatum=epgdb.searchtime2(title.decode('utf-8'),btime,deltatime,chtxt)
                 if len(recdatum)>0:
                     recdb.del_reckey(recdb.REC_KEYWORD, title, chtxt, btime)
                     for recdata in recdatum:
index 674c92a..8218280 100644 (file)
@@ -17,27 +17,27 @@ def write(pout,ch):
     print "ts2epg処理"
     bctype=chdb.chsearch(ch)['bctype']
     print ch+":"+bctype
-    timet="120"
+    timet="180"
     if re.search(u'cs',bctype):
         mode="/CS"
-        timet="300"
+        timet="240"
     elif re.search(u'bs',bctype):
         mode="/BS"
-        timet="300"
+        timet="180"
     else :
         mode=chdb.bctypesearch(u'te'+ch)['ontv']
     tsepg2xml=configreader.getpath("tsepg2xml")
     epgdump=configreader.getpath("epgdump")
     if os.access(pout+".ts", os.F_OK):
-        os.removedirs(pout+".ts")
-    time.sleep(5)
+        os.remove(pout+".ts")
+    time.sleep(10)
     print type(ch)
     print ch
-    tv2ts.tv2tsmix(pout+".ts",ch,"180")
+    tv2ts.tv2tsmix(pout+".ts",ch,timet)
     t=0
     while not os.access(pout+".ts", os.F_OK):
         time.sleep(10)
-        tv2ts.tv2tsmix(pout+".ts",ch,"180")
+        tv2ts.tv2tsmix(pout+".ts",ch,timet)
         t=t+1
         if t>10:
             break
@@ -45,6 +45,6 @@ def write(pout,ch):
     #exe="export LANG=ja_JP.UTF-8 && /usr/bin/nice -n 19 "+epgdump+" "+mode+" "+pout+".ts "+pout
     print exe
     os.system(exe)
-    time.sleep(5)
+    time.sleep(10)
         
 
diff --git a/rec10/trunk/src/zenhan.py b/rec10/trunk/src/zenhan.py
new file mode 100644 (file)
index 0000000..d5f5c96
--- /dev/null
@@ -0,0 +1,26 @@
+#!/usr/bin/python
+# coding: UTF-8
+# Rec10 TS Recording Tools
+# Copyright (C) 2009 Yukikaze
+
+global z_ascii
+global h_ascii
+global z_number
+global h_number
+z_ascii=u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !”#$%&’()*+,−./:;<=>?@[¥]^_‘{|}〜"
+h_ascii=u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
+z_number=u"0123456789"
+h_number=u"0123456789"
+def toHankaku(str):
+    retstr=u""
+    for s in str:
+        i=z_ascii.find(s)
+        if (i != -1):
+            s=h_ascii[i]
+        i=z_number.find(s)
+        if (i != -1):
+            s=h_number[i]
+        retstr=retstr+s
+    return retstr
+
+