OSDN Git Service

bayes suggest implemented.
authorgn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)
committergn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)
git-svn-id: svn+ssh://svn.sourceforge.jp/svnroot/rec10@221 4e526526-5e11-4fc0-8910-f8fd03428081

rec10/trunk/src/auto_rec.py
rec10/trunk/src/dbMySQL.py
rec10/trunk/src/recdb.py
rec10/trunk/src/timerec.py
rec10/trunk/src/ts2epg.py
rec10/trunk/src/tv2mp4.py
rec10/trunk/src/xml2db.py

index b64f789..5c50232 100644 (file)
@@ -15,10 +15,14 @@ def change_db_num(chtxt,recaddnum,alladdnum):
     rec10d.rec10db.add_num_bayeskey(chtxt, recaddnum,alladdnum)
 def change_db_ratio_rec(key,chtxt,beforenum,addnum):
     rec10d.rec10db.change_ratio_rec_bayeskey(key, chtxt, beforenum, addnum)
+def change_db_ratio_rec_many(chtxt,beforenum,list):
+    rec10d.rec10db.change_multi_ratio_rec_bayeskey(chtxt, beforenum, list)
+def change_db_reduce_ratio_rec(chtxt,beforenum,addnum):
+    rec10d.rec10db.change_ratio_rec_reduce_bayeskey(chtxt, beforenum, addnum)
 def change_db_ratio_all(key,chtxt,beforenum,addnum):
     rec10d.rec10db.change_ratio_all_bayeskey(key, chtxt, beforenum, addnum)
 def change_db_ratio_all_many(chtxt,beforenum,list):
-    rec10d.rec10db.change_many_ratio_all_bayeskey(chtxt,beforenum, list)
+    rec10d.rec10db.change_multi_ratio_all_bayeskey(chtxt,beforenum, list)
 def get_db_num(chtxt):
     ret=get_db_key("NUM", chtxt)
     if len(ret)<3:
@@ -33,20 +37,25 @@ def update_recall(chtxt,titles,descs,newnum):
     descs=" "+descs+" "
     str1=titles*2+descs+titles*2
     noun=n_gram.get_noun_quad_gram(str1)
-    bnum=get_db_num(chtxt)[2]
+    bnum=get_db_num(chtxt)[3]
     if bnum<1:
         bnum=100
     change_db_ratio_all_many(chtxt, bnum,noun)
     change_db_epg(chtxt, bnum, newnum)
     change_db_num(chtxt, 0, newnum)
 def add_key(chtxt,title,desc):
-    title=" "+title+" "
-    desc=" "+desc+" "
+    title=u" "+title+u" "
+    desc=u" "+desc+u" "
     str1=title*2+desc+title*2##タイトルは重視したいので幾度か足す。
+    print str1
     noun=n_gram.get_noun_quad_gram(str1)#nounは辞書のキーに文字を、中身に出現回数を書いたもの。
-    for i,j in noun.items():
-        change_db_ratio_rec(i,chtxt,bnum,j)
-    change_db_num(chtxt, 1, 0)
+    bnum=get_db_num(chtxt)[2]
+    print noun
+    if bnum<1:
+        bnum=1
+    change_db_ratio_rec_many(chtxt, bnum, noun)
+    change_db_num(chtxt,1,0)
+    change_db_reduce_ratio_rec(chtxt, bnum,1)
 def calc_key(chtxt,title,desc):
     """
 
@@ -59,15 +68,16 @@ def calc_key(chtxt,title,desc):
     noun=n_gram.get_noun_quad_gram(str1)#nounは辞書のキーに文字を、中身に出現回数を書いたもの。
     tnum=0
     for key,num in noun.items():
+        addnum=0
         pp=get_db_key(key,chtxt)
-        trec=pp[2]
-        tall=pp[3]
-        if tall<trec:
-            p=1000*trec/(3*tall+trec)
-            if p>kyoukai:
-                tnum=tnum+1
-            difp=trec-tall+difp*tnum
-            tnum=tnum+1
+        if len(pp)>3:
+            trec=pp[2]
+            tall=pp[3]
+            if tall<trec:
+                p=1000*trec/(3*tall+trec)
+                if p>kyoukai:
+                    addnum=addnum+1
+        tnum=tnum+num*addnum
     if kyoukai_num<tnum:
         return 1
     else:
index 6d024c5..e029c2b 100644 (file)
@@ -604,21 +604,15 @@ class DB_MySQL:
         db = self.connect_db()
         beforenumf=beforenum
         beforenum=str(beforenum)
-        #addnum=str(addnum)
-        #db[1].execute("\
-        #UPDATE bayeskey SET ratio_all=(ratio_all*%s+%s)/%s WHERE chtxt=%s AND keychar=%s",\
-        #(beforenum,addnum,beforenum,chtxt,key)\
-        #)
         db[1].execute("INSERT INTO bayeskey (keychar,chtxt,ratio_rec,ratio_all) \
         VALUES (%s,%s,%s,%s)\
         ON DUPLICATE KEY UPDATE \
         ratio_all=(ratio_all*%s+%s)/%s",(key,chtxt,"0",str(Decimal(addnum)/beforenumf),beforenum,chtxt,key))
         self.close_db(db)
-    def change_many_ratio_all_bayeskey(self,chtxt,beforenum,list):
+    def change_multi_ratio_all_bayeskey(self,chtxt,beforenum,list):
         """
         list={key:addnum}のリスト
         """
-        retlist=[]
         beforenumf=beforenum
         beforenum=str(beforenum)
         db = self.connect_db()
@@ -629,15 +623,34 @@ class DB_MySQL:
             ON DUPLICATE KEY UPDATE \
             ratio_all=(ratio_all*%s+%s)/%s",retl)
         self.close_db(db)
-    def change_ratio_rec_bayeskey(self,key,chtxt,beforenum,addnum):
-        db = self.connect_db()
+    def change_ratio_rec_reduce_bayeskey(self,chtxt,beforenum,newnum):
         beforenum=str(beforenum)
-        addnum=str(addnum)
+        newnum=str(newnum)
+        db = self.connect_db()
         db[1].execute("\
-        UPDATE bayeskey SET ratio_rec=(ratio_rec*%s+%s)/%s WHERE chtxt=%s AND keychar=%s",\
-        (beforenum,addnum,beforenum,chtxt,key)\
+        UPDATE bayeskey SET ratio_rec=ratio_rec*%s/(%s+%s) WHERE chtxt=%s AND NOT (keychar=\"NUM\")",(beforenum,newnum,beforenum,chtxt)\
         )
         self.close_db(db)
+    def change_ratio_rec_bayeskey(self,key,chtxt,beforenum,addnum):
+        db = self.connect_db()
+        beforenumf=beforenum
+        beforenum=str(beforenum)
+        db[1].execute("INSERT INTO bayeskey (keychar,chtxt,ratio_rec,ratio_all) \
+        VALUES (%s,%s,%s,%s)\
+        ON DUPLICATE KEY UPDATE \
+        ratio_rec=(ratio_rec*%s+%s)/%s",(key,chtxt,str(Decimal(addnum)/beforenumf),"0",beforenum,chtxt,key))
+        self.close_db(db)
+    def change_multi_ratio_rec_bayeskey(self,chtxt,beforenum,list):#self,key,chtxt,beforenum,addnum):
+        beforenumf=beforenum
+        beforenum=str(beforenum)
+        db = self.connect_db()
+        for i,j in list.items():
+            retl=(i,chtxt,str(Decimal(j)/beforenumf),"0",beforenum,str(j),beforenum)
+            db[1].execute("INSERT INTO bayeskey (keychar,chtxt,ratio_rec,ratio_all) \
+            VALUES (%s,%s,%s,%s)\
+            ON DUPLICATE KEY UPDATE \
+            ratio_rec=(ratio_rec*%s+%s)/%s",retl)
+        self.close_db(db)
     def select_by_key_bayeskey(self,key,chtxt):
         db = self.connect_db()
         dbexe = db[1].execute("\
index a9268e1..85f9ac7 100644 (file)
@@ -27,6 +27,7 @@ global REC_TS_RECORDING
 global REC_AVI_TO_MKV
 global REC_CHANGING_CANTAINER
 global REC_AUTO_SUGGEST
+global REC_BAYES_SUGGEST
 global REC_AUTO_KEYWORD
 REC_RESERVE = "res"
 REC_FINAL_RESERVE = "rec"
@@ -48,7 +49,7 @@ REC_CHANGING_CANTAINER = "changingcontainer"
 REC_AVI_TO_MKV = "avi2mkv"
 REC_AUTO_SUGGEST = "auto_suggest"
 REC_AUTO_KEYWORD = "auto_keyword"
-
+REC_BAYES_SUGGEST ="bayes_suggest"
 def recreserv(title, chtxt, btime, etime, opt):#optにはa(アニメ)d(副音声)v(xvid)
     rec_reckey("res", title, chtxt, btime, etime, opt)
 def auto_keyreserv(keyword, chtxt, btime, etime, deltatime, opt):
index a9efc7e..38da071 100644 (file)
@@ -8,6 +8,7 @@ import re
 import sys
 import time
 
+import auto_rec
 import chdb
 import checker
 import configreader
@@ -26,13 +27,9 @@ def task():
     """
     recdb.delete_old("24")
     tasks = recdb.getnow("2")
-    #print tasks
     inum = recdb.countRecNow_minutes("10")
-    #inum=recdb.countRecNow("1")
     print "番組表更新処理"+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     print "inum:" + str(inum) + "bscsrec:" + str(status.getBSCSRecording()) + "terec:" + str(status.getTERecording()) 
-
-    #if inum+status.getBSCSRecording()+status.getTERecording() < 2:
     encodenum=0
     b25num=0
     update = chdb.update()
@@ -57,7 +54,6 @@ def task():
                     epgdb.updatebc(bctype)
                     update = chdb.update()
                     i = i + 1
-                    #rec10d.rec10db.update_by_bctype_chdata(bctype)
                 if i > 0:
                     break
             sys.exit()
@@ -87,12 +83,9 @@ def task():
             opt = task['opt']
         except:
             opt = ""
-        #print title
-        #print task
         tnow = datetime.datetime.now()
         dtt = bt-tnow
         dt = dtt.days * 24 * 60 * 60 + dtt.seconds
-        #print title+str(dt)
         if task["type"] == recdb.REC_RESERVE:#"res,"+chtxt+","+title+","+btime+","+etime+","+opt
             if (dt < 58 * 60 and dt > 20 * 60):
                 pid = os.fork()
@@ -103,7 +96,6 @@ def task():
                     chdatat = rec10d.rec10db.select_by_bctype_chdata(bctypet)
                     dt1 = datetime.datetime.strptime(chdatat[0][5], "%Y-%m-%d %H:%M:%S")-datetime.datetime.now()
                     dt1 = dt1.days * 24 * 60 * 60 + dt1.seconds
-                    #print "oikake search:"+str(dt1)
                     if dt1 < 60 * 60:
                         recdata = epgdb.searchtime2(title.decode('utf-8'), btime, "5", chtxt)
                         chtxtn = recdata[0]
@@ -122,14 +114,15 @@ def task():
                                 recdb.del_reckey(recdb.REC_RESERVE, title, chtxt, btime)
                                 recdb.rec_reckey(recdb.REC_FINAL_RESERVE, titlen, chtxtn, btimen, etimen, opt)
                                 recdb.rec_reclog(titlen, chtxtn, btimen, etimen, opt, exp, longexp, category)
+                                auto_rec.add_key(chtxt, titlen,exp+" "+longexp)
                                 print "Oikake "+title+" : "+titlen+" "+btimen+" "+etimen
                             except Exception, inst:
+                                print "Error happended in Oikake DB"
                                 print type(inst)
                                 print inst
                         else:
                             print "nothing match"
                     else:
-                        #print rec10d.rec10db.select_by_bctype_chdata(bctypet)
                         if rec10d.rec10db.select_by_bctype_chdata(bctypet)[0][6] != "0":
                             rec10d.rec10db.update_status_by_bctype_chdata(bctypet, "3")
                     sys.exit()
@@ -159,12 +152,13 @@ def task():
                     try:
                         recdb.del_reckey(recdb.REC_KEYWORD, title, chtxt, btime)
                         recdb.rec_reckey(recdb.REC_RESERVE, titlet, chtxtt, btimet, etimet, opt)
-                        
+                        recdb.rec_reclog(titlet, chtxtt, btimet, etimet, opt, exp,longexp,category)
+                        auto_rec.add_key(chtxt, titlet,exp+" "+longexp)
                         print "key "+title+" : "+titlet+" "+btimet+" "+etimet
                     except Exception, inst:
+                        print "Error happened in REC_KEYWORD DB"
                         print type(inst)
                         print inst
-                    recdb.rec_reclog(titlet, chtxtt, btimet, etimet, opt, exp,longexp,category)
                 else:
                     print "nothing match"
         elif task["type"] == recdb.REC_KEYWORD_EVERY_SOME_DAYS:#"keyevery,"+chtxt+","+keyword+","+btime+","+deltatime+","+opt+","+deltaday
@@ -186,7 +180,6 @@ def task():
                 btxt = bt.strftime("%Y-%m-%d %H:%M:%S")
                 etxt = et.strftime("%Y-%m-%d %H:%M:%S")
                 recdb.everyreserv(title, chtxt, btxt, etxt, deltatime, opt, deltaday)
-            
         elif task["type"] == recdb.REC_FINAL_RESERVE:#"rec,"+chtxt+","+title+","+btime+","+etime+","+opt
             print dt
             if dt < 6 * 60 and dt > 0:
index ee06138..6e5f9aa 100644 (file)
@@ -18,7 +18,7 @@ def write(pout, ch):
     print "ts2epg処理"
     bctype = chdb.chsearch(ch)['bctype']
     print ch + ":" + bctype
-    timet = "10"
+    timet = "30"
     if re.search(u'cs', bctype):
         mode = "/CS"
         timet = "180"
index fb90250..e396543 100644 (file)
@@ -1,4 +1,3 @@
-import os.path
 #!/usr/bin/python
 # coding: UTF-8
 # Rec10 TS Recording Tools
@@ -7,12 +6,12 @@ import sys
 import commands
 import tv2avi
 import os
+import os.path
 import time
 def ts2mp4(pin, pout, opt):
     dir=os.path.split(pin)[0]
     title=os.path.split(pin)[1]
     title=os.path.splitext(title)[0]
-    #title=base64.b16encode(title)
     tpavi=os.path.join(dir, title+".avi")
     tpmp4=os.path.join(dir, title+".mp4")
     tv2avi.ts2avi(pin, tpavi, opt)
index 08b0808..492ba8c 100644 (file)
@@ -10,6 +10,7 @@ import chdb
 import rec10d
 import zenhan
 import datetime
+import recdb
 flags = {}
 lastflag = ""
 xmldate = {}
@@ -18,6 +19,7 @@ bctype = ""
 chlists = []
 tvlists = []
 bayes={}
+dtb=""
 def xml2db(xmlpath, bctypetemp):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
     global db, bctype
     bctype = bctypetemp
@@ -40,13 +42,14 @@ def parseDocument(p, xmlfile):
     p.ParseFile(f)
     f.close()
 def start_element(name, attrs):
-    global flags, lastflag, xmldate
+    global flags, lastflag, xmldate,dtb
     flags[name] = 1
     lastflag = name
     for (key, val) in attrs.iteritems():
         xmldate[name + ":" + key] = val
+    dtb=datetime.datetime.now()
 def end_element(name):
-    global flags, lastflag, xmldate, db, chlists, tvlists ,bayes
+    global flags, lastflag, xmldate, db, chlists, tvlists ,bayes,dtb
     if flags[name] == 1:
         if name == "channel":
             ubctype = unicode(bctype)
@@ -78,8 +81,8 @@ def end_element(name):
                     bt2=[bt[0]+utitle+" ",bt[1]+udesc+" "+ulongdesc+" ",bt[2]+1]
                     bayes[tch['chtxt']]=bt2
                     tvlists.append([ubctype, uch, ustart, ustop, utitle, udesc,ulongdesc,ucategory])
-            if auto_rec.calc_key(chtxt, utitle, udesc+" "+ulongdesc):
-                ""
+                    if auto_rec.calc_key(tch['chtxt'], utitle, udesc+" "+ulongdesc):
+                        recdb.rec_reckey(recdb.REC_BAYES_SUGGEST, utitle, tch['chtxt'], ustart,ustop,"")
             xmldate['title'] = ""
             xmldate['desc'] = ""
             xmldate['longdesc'] = ""
@@ -91,12 +94,11 @@ def end_element(name):
             if len(tvlists) > 0:
                 writemultitvDB(tvlists)
             tvlists = []
-            dtb=datetime.datetime.now()
             for ct,list in bayes.items():
                 auto_rec.update_recall(ct,list[0],list[1],list[2])
             dtb=datetime.datetime.now()-dtb
             print bctype + " epg取り出し終了"
-            print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"second taken for updating bayes-auto DB ."
+            print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"seconds taken for updating bayes-auto DB ."
     flags[name] = 0
 def char_data(data):
     global flags, lastflag, xmldate