bayes suggest implemented.

author gn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>

Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)

committer gn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>

Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)
author gn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)
committer gn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)
diff --git a/rec10/trunk/src/auto_rec.py b/rec10/trunk/src/auto_rec.py

index b64f789..5c50232 100644 (file)
--- a/rec10/trunk/src/auto_rec.py
+++ b/rec10/trunk/src/auto_rec.py
@@ -15,10 +15,14 @@ def change_db_num(chtxt,recaddnum,alladdnum):
      rec10d.rec10db.add_num_bayeskey(chtxt, recaddnum,alladdnum)
  def change_db_ratio_rec(key,chtxt,beforenum,addnum):
      rec10d.rec10db.change_ratio_rec_bayeskey(key, chtxt, beforenum, addnum)
+def change_db_ratio_rec_many(chtxt,beforenum,list):
+    rec10d.rec10db.change_multi_ratio_rec_bayeskey(chtxt, beforenum, list)
+def change_db_reduce_ratio_rec(chtxt,beforenum,addnum):
+    rec10d.rec10db.change_ratio_rec_reduce_bayeskey(chtxt, beforenum, addnum)
  def change_db_ratio_all(key,chtxt,beforenum,addnum):
      rec10d.rec10db.change_ratio_all_bayeskey(key, chtxt, beforenum, addnum)
  def change_db_ratio_all_many(chtxt,beforenum,list):
-    rec10d.rec10db.change_many_ratio_all_bayeskey(chtxt,beforenum, list)
+    rec10d.rec10db.change_multi_ratio_all_bayeskey(chtxt,beforenum, list)
  def get_db_num(chtxt):
      ret=get_db_key("NUM", chtxt)
      if len(ret)<3:
@@ -33,20 +37,25 @@ def update_recall(chtxt,titles,descs,newnum):
      descs=" "+descs+" "
      str1=titles*2+descs+titles*2
      noun=n_gram.get_noun_quad_gram(str1)
-    bnum=get_db_num(chtxt)[2]
+    bnum=get_db_num(chtxt)[3]
      if bnum<1:
          bnum=100
      change_db_ratio_all_many(chtxt, bnum,noun)
      change_db_epg(chtxt, bnum, newnum)
      change_db_num(chtxt, 0, newnum)
  def add_key(chtxt,title,desc):
-    title=" "+title+" "
-    desc=" "+desc+" "
+    title=u" "+title+u" "
+    desc=u" "+desc+u" "
      str1=title*2+desc+title*2##タイトルは重視したいので幾度か足す。
+    print str1
      noun=n_gram.get_noun_quad_gram(str1)#nounは辞書のキーに文字を、中身に出現回数を書いたもの。
-    for i,j in noun.items():
-        change_db_ratio_rec(i,chtxt,bnum,j)
-    change_db_num(chtxt, 1, 0)
+    bnum=get_db_num(chtxt)[2]
+    print noun
+    if bnum<1:
+        bnum=1
+    change_db_ratio_rec_many(chtxt, bnum, noun)
+    change_db_num(chtxt,1,0)
+    change_db_reduce_ratio_rec(chtxt, bnum,1)
  def calc_key(chtxt,title,desc):
      """
  
@@ -59,15 +68,16 @@ def calc_key(chtxt,title,desc):
      noun=n_gram.get_noun_quad_gram(str1)#nounは辞書のキーに文字を、中身に出現回数を書いたもの。
      tnum=0
      for key,num in noun.items():
+        addnum=0
          pp=get_db_key(key,chtxt)
-        trec=pp[2]
-        tall=pp[3]
-        if tall<trec:
-            p=1000*trec/(3*tall+trec)
-            if p>kyoukai:
-                tnum=tnum+1
-            difp=trec-tall+difp*tnum
-            tnum=tnum+1
+        if len(pp)>3:
+            trec=pp[2]
+            tall=pp[3]
+            if tall<trec:
+                p=1000*trec/(3*tall+trec)
+                if p>kyoukai:
+                    addnum=addnum+1
+        tnum=tnum+num*addnum
      if kyoukai_num<tnum:
          return 1
      else:
diff --git a/rec10/trunk/src/dbMySQL.py b/rec10/trunk/src/dbMySQL.py

index 6d024c5..e029c2b 100644 (file)
--- a/rec10/trunk/src/dbMySQL.py
+++ b/rec10/trunk/src/dbMySQL.py
@@ -604,21 +604,15 @@ class DB_MySQL:
          db = self.connect_db()
          beforenumf=beforenum
          beforenum=str(beforenum)
-        #addnum=str(addnum)
-        #db[1].execute("\
-        #UPDATE bayeskey SET ratio_all=(ratio_all*%s+%s)/%s WHERE chtxt=%s AND keychar=%s",\
-        #(beforenum,addnum,beforenum,chtxt,key)\
-        #)
          db[1].execute("INSERT INTO bayeskey (keychar,chtxt,ratio_rec,ratio_all) \
          VALUES (%s,%s,%s,%s)\
          ON DUPLICATE KEY UPDATE \
          ratio_all=(ratio_all*%s+%s)/%s",(key,chtxt,"0",str(Decimal(addnum)/beforenumf),beforenum,chtxt,key))
          self.close_db(db)
-    def change_many_ratio_all_bayeskey(self,chtxt,beforenum,list):
+    def change_multi_ratio_all_bayeskey(self,chtxt,beforenum,list):
          """
          list={key:addnum}のリスト
          """
-        retlist=[]
          beforenumf=beforenum
          beforenum=str(beforenum)
          db = self.connect_db()
@@ -629,15 +623,34 @@ class DB_MySQL:
              ON DUPLICATE KEY UPDATE \
              ratio_all=(ratio_all*%s+%s)/%s",retl)
          self.close_db(db)
-    def change_ratio_rec_bayeskey(self,key,chtxt,beforenum,addnum):
-        db = self.connect_db()
+    def change_ratio_rec_reduce_bayeskey(self,chtxt,beforenum,newnum):
          beforenum=str(beforenum)
-        addnum=str(addnum)
+        newnum=str(newnum)
+        db = self.connect_db()
          db[1].execute("\
-        UPDATE bayeskey SET ratio_rec=(ratio_rec*%s+%s)/%s WHERE chtxt=%s AND keychar=%s",\
-        (beforenum,addnum,beforenum,chtxt,key)\
+        UPDATE bayeskey SET ratio_rec=ratio_rec*%s/(%s+%s) WHERE chtxt=%s AND NOT (keychar=\"NUM\")",(beforenum,newnum,beforenum,chtxt)\
          )
          self.close_db(db)
+    def change_ratio_rec_bayeskey(self,key,chtxt,beforenum,addnum):
+        db = self.connect_db()
+        beforenumf=beforenum
+        beforenum=str(beforenum)
+        db[1].execute("INSERT INTO bayeskey (keychar,chtxt,ratio_rec,ratio_all) \
+        VALUES (%s,%s,%s,%s)\
+        ON DUPLICATE KEY UPDATE \
+        ratio_rec=(ratio_rec*%s+%s)/%s",(key,chtxt,str(Decimal(addnum)/beforenumf),"0",beforenum,chtxt,key))
+        self.close_db(db)
+    def change_multi_ratio_rec_bayeskey(self,chtxt,beforenum,list):#self,key,chtxt,beforenum,addnum):
+        beforenumf=beforenum
+        beforenum=str(beforenum)
+        db = self.connect_db()
+        for i,j in list.items():
+            retl=(i,chtxt,str(Decimal(j)/beforenumf),"0",beforenum,str(j),beforenum)
+            db[1].execute("INSERT INTO bayeskey (keychar,chtxt,ratio_rec,ratio_all) \
+            VALUES (%s,%s,%s,%s)\
+            ON DUPLICATE KEY UPDATE \
+            ratio_rec=(ratio_rec*%s+%s)/%s",retl)
+        self.close_db(db)
      def select_by_key_bayeskey(self,key,chtxt):
          db = self.connect_db()
          dbexe = db[1].execute("\
diff --git a/rec10/trunk/src/recdb.py b/rec10/trunk/src/recdb.py

index a9268e1..85f9ac7 100644 (file)
--- a/rec10/trunk/src/recdb.py
+++ b/rec10/trunk/src/recdb.py
@@ -27,6 +27,7 @@ global REC_TS_RECORDING
  global REC_AVI_TO_MKV
  global REC_CHANGING_CANTAINER
  global REC_AUTO_SUGGEST
+global REC_BAYES_SUGGEST
  global REC_AUTO_KEYWORD
  REC_RESERVE = "res"
  REC_FINAL_RESERVE = "rec"
@@ -48,7 +49,7 @@ REC_CHANGING_CANTAINER = "changingcontainer"
  REC_AVI_TO_MKV = "avi2mkv"
  REC_AUTO_SUGGEST = "auto_suggest"
  REC_AUTO_KEYWORD = "auto_keyword"
-
+REC_BAYES_SUGGEST ="bayes_suggest"
  def recreserv(title, chtxt, btime, etime, opt):#optにはa(アニメ)d(副音声)v(xvid)
      rec_reckey("res", title, chtxt, btime, etime, opt)
  def auto_keyreserv(keyword, chtxt, btime, etime, deltatime, opt):
diff --git a/rec10/trunk/src/timerec.py b/rec10/trunk/src/timerec.py

index a9efc7e..38da071 100644 (file)
--- a/rec10/trunk/src/timerec.py
+++ b/rec10/trunk/src/timerec.py
@@ -8,6 +8,7 @@ import re
  import sys
  import time
  
+import auto_rec
  import chdb
  import checker
  import configreader
@@ -26,13 +27,9 @@ def task():
      """
      recdb.delete_old("24")
      tasks = recdb.getnow("2")
-    #print tasks
      inum = recdb.countRecNow_minutes("10")
-    #inum=recdb.countRecNow("1")
      print "番組表更新処理"+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
      print "inum:" + str(inum) + "bscsrec:" + str(status.getBSCSRecording()) + "terec:" + str(status.getTERecording()) 
-
-    #if inum+status.getBSCSRecording()+status.getTERecording() < 2:
      encodenum=0
      b25num=0
      update = chdb.update()
@@ -57,7 +54,6 @@ def task():
                      epgdb.updatebc(bctype)
                      update = chdb.update()
                      i = i + 1
-                    #rec10d.rec10db.update_by_bctype_chdata(bctype)
                  if i > 0:
                      break
              sys.exit()
@@ -87,12 +83,9 @@ def task():
              opt = task['opt']
          except:
              opt = ""
-        #print title
-        #print task
          tnow = datetime.datetime.now()
          dtt = bt-tnow
          dt = dtt.days * 24 * 60 * 60 + dtt.seconds
-        #print title+str(dt)
          if task["type"] == recdb.REC_RESERVE:#"res,"+chtxt+","+title+","+btime+","+etime+","+opt
              if (dt < 58 * 60 and dt > 20 * 60):
                  pid = os.fork()
@@ -103,7 +96,6 @@ def task():
                      chdatat = rec10d.rec10db.select_by_bctype_chdata(bctypet)
                      dt1 = datetime.datetime.strptime(chdatat[0][5], "%Y-%m-%d %H:%M:%S")-datetime.datetime.now()
                      dt1 = dt1.days * 24 * 60 * 60 + dt1.seconds
-                    #print "oikake search:"+str(dt1)
                      if dt1 < 60 * 60:
                          recdata = epgdb.searchtime2(title.decode('utf-8'), btime, "5", chtxt)
                          chtxtn = recdata[0]
@@ -122,14 +114,15 @@ def task():
                                  recdb.del_reckey(recdb.REC_RESERVE, title, chtxt, btime)
                                  recdb.rec_reckey(recdb.REC_FINAL_RESERVE, titlen, chtxtn, btimen, etimen, opt)
                                  recdb.rec_reclog(titlen, chtxtn, btimen, etimen, opt, exp, longexp, category)
+                                auto_rec.add_key(chtxt, titlen,exp+" "+longexp)
                                  print "Oikake "+title+" : "+titlen+" "+btimen+" "+etimen
                              except Exception, inst:
+                                print "Error happended in Oikake DB"
                                  print type(inst)
                                  print inst
                          else:
                              print "nothing match"
                      else:
-                        #print rec10d.rec10db.select_by_bctype_chdata(bctypet)
                          if rec10d.rec10db.select_by_bctype_chdata(bctypet)[0][6] != "0":
                              rec10d.rec10db.update_status_by_bctype_chdata(bctypet, "3")
                      sys.exit()
@@ -159,12 +152,13 @@ def task():
                      try:
                          recdb.del_reckey(recdb.REC_KEYWORD, title, chtxt, btime)
                          recdb.rec_reckey(recdb.REC_RESERVE, titlet, chtxtt, btimet, etimet, opt)
-                        
+                        recdb.rec_reclog(titlet, chtxtt, btimet, etimet, opt, exp,longexp,category)
+                        auto_rec.add_key(chtxt, titlet,exp+" "+longexp)
                          print "key "+title+" : "+titlet+" "+btimet+" "+etimet
                      except Exception, inst:
+                        print "Error happened in REC_KEYWORD DB"
                          print type(inst)
                          print inst
-                    recdb.rec_reclog(titlet, chtxtt, btimet, etimet, opt, exp,longexp,category)
                  else:
                      print "nothing match"
          elif task["type"] == recdb.REC_KEYWORD_EVERY_SOME_DAYS:#"keyevery,"+chtxt+","+keyword+","+btime+","+deltatime+","+opt+","+deltaday
@@ -186,7 +180,6 @@ def task():
                  btxt = bt.strftime("%Y-%m-%d %H:%M:%S")
                  etxt = et.strftime("%Y-%m-%d %H:%M:%S")
                  recdb.everyreserv(title, chtxt, btxt, etxt, deltatime, opt, deltaday)
-            
          elif task["type"] == recdb.REC_FINAL_RESERVE:#"rec,"+chtxt+","+title+","+btime+","+etime+","+opt
              print dt
              if dt < 6 * 60 and dt > 0:
diff --git a/rec10/trunk/src/ts2epg.py b/rec10/trunk/src/ts2epg.py

index ee06138..6e5f9aa 100644 (file)
--- a/rec10/trunk/src/ts2epg.py
+++ b/rec10/trunk/src/ts2epg.py
@@ -18,7 +18,7 @@ def write(pout, ch):
      print "ts2epg処理"
      bctype = chdb.chsearch(ch)['bctype']
      print ch + ":" + bctype
-    timet = "10"
+    timet = "30"
      if re.search(u'cs', bctype):
          mode = "/CS"
          timet = "180"
diff --git a/rec10/trunk/src/tv2mp4.py b/rec10/trunk/src/tv2mp4.py

index fb90250..e396543 100644 (file)
--- a/rec10/trunk/src/tv2mp4.py
+++ b/rec10/trunk/src/tv2mp4.py
@@ -1,4 +1,3 @@
-import os.path
  #!/usr/bin/python
  # coding: UTF-8
  # Rec10 TS Recording Tools
@@ -7,12 +6,12 @@ import sys
  import commands
  import tv2avi
  import os
+import os.path
  import time
  def ts2mp4(pin, pout, opt):
      dir=os.path.split(pin)[0]
      title=os.path.split(pin)[1]
      title=os.path.splitext(title)[0]
-    #title=base64.b16encode(title)
      tpavi=os.path.join(dir, title+".avi")
      tpmp4=os.path.join(dir, title+".mp4")
      tv2avi.ts2avi(pin, tpavi, opt)
diff --git a/rec10/trunk/src/xml2db.py b/rec10/trunk/src/xml2db.py

index 08b0808..492ba8c 100644 (file)
--- a/rec10/trunk/src/xml2db.py
+++ b/rec10/trunk/src/xml2db.py
@@ -10,6 +10,7 @@ import chdb
  import rec10d
  import zenhan
  import datetime
+import recdb
  flags = {}
  lastflag = ""
  xmldate = {}
@@ -18,6 +19,7 @@ bctype = ""
  chlists = []
  tvlists = []
  bayes={}
+dtb=""
  def xml2db(xmlpath, bctypetemp):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る
      global db, bctype
      bctype = bctypetemp
@@ -40,13 +42,14 @@ def parseDocument(p, xmlfile):
      p.ParseFile(f)
      f.close()
  def start_element(name, attrs):
-    global flags, lastflag, xmldate
+    global flags, lastflag, xmldate,dtb
      flags[name] = 1
      lastflag = name
      for (key, val) in attrs.iteritems():
          xmldate[name + ":" + key] = val
+    dtb=datetime.datetime.now()
  def end_element(name):
-    global flags, lastflag, xmldate, db, chlists, tvlists ,bayes
+    global flags, lastflag, xmldate, db, chlists, tvlists ,bayes,dtb
      if flags[name] == 1:
          if name == "channel":
              ubctype = unicode(bctype)
@@ -78,8 +81,8 @@ def end_element(name):
                      bt2=[bt[0]+utitle+" ",bt[1]+udesc+" "+ulongdesc+" ",bt[2]+1]
                      bayes[tch['chtxt']]=bt2
                      tvlists.append([ubctype, uch, ustart, ustop, utitle, udesc,ulongdesc,ucategory])
-            if auto_rec.calc_key(chtxt, utitle, udesc+" "+ulongdesc):
-                ""
+                    if auto_rec.calc_key(tch['chtxt'], utitle, udesc+" "+ulongdesc):
+                        recdb.rec_reckey(recdb.REC_BAYES_SUGGEST, utitle, tch['chtxt'], ustart,ustop,"")
              xmldate['title'] = ""
              xmldate['desc'] = ""
              xmldate['longdesc'] = ""
@@ -91,12 +94,11 @@ def end_element(name):
              if len(tvlists) > 0:
                  writemultitvDB(tvlists)
              tvlists = []
-            dtb=datetime.datetime.now()
              for ct,list in bayes.items():
                  auto_rec.update_recall(ct,list[0],list[1],list[2])
              dtb=datetime.datetime.now()-dtb
              print bctype + " epg取り出し終了"
-            print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"second taken for updating bayes-auto DB ."
+            print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"seconds taken for updating bayes-auto DB ."
      flags[name] = 0
  def char_data(data):
      global flags, lastflag, xmldate
author	gn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
	Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)
committer	gn64_jp <gn64_jp@4e526526-5e11-4fc0-8910-f8fd03428081>
	Thu, 12 Nov 2009 14:16:58 +0000 (14:16 +0000)
rec10/trunk/src/auto_rec.py		patch \| blob \| history
rec10/trunk/src/dbMySQL.py		patch \| blob \| history
rec10/trunk/src/recdb.py		patch \| blob \| history
rec10/trunk/src/timerec.py		patch \| blob \| history
rec10/trunk/src/ts2epg.py		patch \| blob \| history
rec10/trunk/src/tv2mp4.py		patch \| blob \| history
rec10/trunk/src/xml2db.py		patch \| blob \| history