From 933ac3ce0c08f8272ec188226e9156611cee7f84 Mon Sep 17 00:00:00 2001 From: gn64_jp Date: Thu, 12 Nov 2009 14:16:58 +0000 Subject: [PATCH] bayes suggest implemented. git-svn-id: svn+ssh://svn.sourceforge.jp/svnroot/rec10@221 4e526526-5e11-4fc0-8910-f8fd03428081 --- rec10/trunk/src/auto_rec.py | 40 +++++++++++++++++++++++++--------------- rec10/trunk/src/dbMySQL.py | 37 +++++++++++++++++++++++++------------ rec10/trunk/src/recdb.py | 3 ++- rec10/trunk/src/timerec.py | 19 ++++++------------- rec10/trunk/src/ts2epg.py | 2 +- rec10/trunk/src/tv2mp4.py | 3 +-- rec10/trunk/src/xml2db.py | 14 ++++++++------ 7 files changed, 68 insertions(+), 50 deletions(-) diff --git a/rec10/trunk/src/auto_rec.py b/rec10/trunk/src/auto_rec.py index b64f789..5c50232 100644 --- a/rec10/trunk/src/auto_rec.py +++ b/rec10/trunk/src/auto_rec.py @@ -15,10 +15,14 @@ def change_db_num(chtxt,recaddnum,alladdnum): rec10d.rec10db.add_num_bayeskey(chtxt, recaddnum,alladdnum) def change_db_ratio_rec(key,chtxt,beforenum,addnum): rec10d.rec10db.change_ratio_rec_bayeskey(key, chtxt, beforenum, addnum) +def change_db_ratio_rec_many(chtxt,beforenum,list): + rec10d.rec10db.change_multi_ratio_rec_bayeskey(chtxt, beforenum, list) +def change_db_reduce_ratio_rec(chtxt,beforenum,addnum): + rec10d.rec10db.change_ratio_rec_reduce_bayeskey(chtxt, beforenum, addnum) def change_db_ratio_all(key,chtxt,beforenum,addnum): rec10d.rec10db.change_ratio_all_bayeskey(key, chtxt, beforenum, addnum) def change_db_ratio_all_many(chtxt,beforenum,list): - rec10d.rec10db.change_many_ratio_all_bayeskey(chtxt,beforenum, list) + rec10d.rec10db.change_multi_ratio_all_bayeskey(chtxt,beforenum, list) def get_db_num(chtxt): ret=get_db_key("NUM", chtxt) if len(ret)<3: @@ -33,20 +37,25 @@ def update_recall(chtxt,titles,descs,newnum): descs=" "+descs+" " str1=titles*2+descs+titles*2 noun=n_gram.get_noun_quad_gram(str1) - bnum=get_db_num(chtxt)[2] + bnum=get_db_num(chtxt)[3] if bnum<1: bnum=100 change_db_ratio_all_many(chtxt, bnum,noun) change_db_epg(chtxt, bnum, newnum) change_db_num(chtxt, 0, newnum) def add_key(chtxt,title,desc): - title=" "+title+" " - desc=" "+desc+" " + title=u" "+title+u" " + desc=u" "+desc+u" " str1=title*2+desc+title*2##タイトルは重視したいので幾度か足す。 + print str1 noun=n_gram.get_noun_quad_gram(str1)#nounは辞書のキーに文字を、中身に出現回数を書いたもの。 - for i,j in noun.items(): - change_db_ratio_rec(i,chtxt,bnum,j) - change_db_num(chtxt, 1, 0) + bnum=get_db_num(chtxt)[2] + print noun + if bnum<1: + bnum=1 + change_db_ratio_rec_many(chtxt, bnum, noun) + change_db_num(chtxt,1,0) + change_db_reduce_ratio_rec(chtxt, bnum,1) def calc_key(chtxt,title,desc): """ @@ -59,15 +68,16 @@ def calc_key(chtxt,title,desc): noun=n_gram.get_noun_quad_gram(str1)#nounは辞書のキーに文字を、中身に出現回数を書いたもの。 tnum=0 for key,num in noun.items(): + addnum=0 pp=get_db_key(key,chtxt) - trec=pp[2] - tall=pp[3] - if tallkyoukai: - tnum=tnum+1 - difp=trec-tall+difp*tnum - tnum=tnum+1 + if len(pp)>3: + trec=pp[2] + tall=pp[3] + if tallkyoukai: + addnum=addnum+1 + tnum=tnum+num*addnum if kyoukai_num 0: break sys.exit() @@ -87,12 +83,9 @@ def task(): opt = task['opt'] except: opt = "" - #print title - #print task tnow = datetime.datetime.now() dtt = bt-tnow dt = dtt.days * 24 * 60 * 60 + dtt.seconds - #print title+str(dt) if task["type"] == recdb.REC_RESERVE:#"res,"+chtxt+","+title+","+btime+","+etime+","+opt if (dt < 58 * 60 and dt > 20 * 60): pid = os.fork() @@ -103,7 +96,6 @@ def task(): chdatat = rec10d.rec10db.select_by_bctype_chdata(bctypet) dt1 = datetime.datetime.strptime(chdatat[0][5], "%Y-%m-%d %H:%M:%S")-datetime.datetime.now() dt1 = dt1.days * 24 * 60 * 60 + dt1.seconds - #print "oikake search:"+str(dt1) if dt1 < 60 * 60: recdata = epgdb.searchtime2(title.decode('utf-8'), btime, "5", chtxt) chtxtn = recdata[0] @@ -122,14 +114,15 @@ def task(): recdb.del_reckey(recdb.REC_RESERVE, title, chtxt, btime) recdb.rec_reckey(recdb.REC_FINAL_RESERVE, titlen, chtxtn, btimen, etimen, opt) recdb.rec_reclog(titlen, chtxtn, btimen, etimen, opt, exp, longexp, category) + auto_rec.add_key(chtxt, titlen,exp+" "+longexp) print "Oikake "+title+" : "+titlen+" "+btimen+" "+etimen except Exception, inst: + print "Error happended in Oikake DB" print type(inst) print inst else: print "nothing match" else: - #print rec10d.rec10db.select_by_bctype_chdata(bctypet) if rec10d.rec10db.select_by_bctype_chdata(bctypet)[0][6] != "0": rec10d.rec10db.update_status_by_bctype_chdata(bctypet, "3") sys.exit() @@ -159,12 +152,13 @@ def task(): try: recdb.del_reckey(recdb.REC_KEYWORD, title, chtxt, btime) recdb.rec_reckey(recdb.REC_RESERVE, titlet, chtxtt, btimet, etimet, opt) - + recdb.rec_reclog(titlet, chtxtt, btimet, etimet, opt, exp,longexp,category) + auto_rec.add_key(chtxt, titlet,exp+" "+longexp) print "key "+title+" : "+titlet+" "+btimet+" "+etimet except Exception, inst: + print "Error happened in REC_KEYWORD DB" print type(inst) print inst - recdb.rec_reclog(titlet, chtxtt, btimet, etimet, opt, exp,longexp,category) else: print "nothing match" elif task["type"] == recdb.REC_KEYWORD_EVERY_SOME_DAYS:#"keyevery,"+chtxt+","+keyword+","+btime+","+deltatime+","+opt+","+deltaday @@ -186,7 +180,6 @@ def task(): btxt = bt.strftime("%Y-%m-%d %H:%M:%S") etxt = et.strftime("%Y-%m-%d %H:%M:%S") recdb.everyreserv(title, chtxt, btxt, etxt, deltatime, opt, deltaday) - elif task["type"] == recdb.REC_FINAL_RESERVE:#"rec,"+chtxt+","+title+","+btime+","+etime+","+opt print dt if dt < 6 * 60 and dt > 0: diff --git a/rec10/trunk/src/ts2epg.py b/rec10/trunk/src/ts2epg.py index ee06138..6e5f9aa 100644 --- a/rec10/trunk/src/ts2epg.py +++ b/rec10/trunk/src/ts2epg.py @@ -18,7 +18,7 @@ def write(pout, ch): print "ts2epg処理" bctype = chdb.chsearch(ch)['bctype'] print ch + ":" + bctype - timet = "10" + timet = "30" if re.search(u'cs', bctype): mode = "/CS" timet = "180" diff --git a/rec10/trunk/src/tv2mp4.py b/rec10/trunk/src/tv2mp4.py index fb90250..e396543 100644 --- a/rec10/trunk/src/tv2mp4.py +++ b/rec10/trunk/src/tv2mp4.py @@ -1,4 +1,3 @@ -import os.path #!/usr/bin/python # coding: UTF-8 # Rec10 TS Recording Tools @@ -7,12 +6,12 @@ import sys import commands import tv2avi import os +import os.path import time def ts2mp4(pin, pout, opt): dir=os.path.split(pin)[0] title=os.path.split(pin)[1] title=os.path.splitext(title)[0] - #title=base64.b16encode(title) tpavi=os.path.join(dir, title+".avi") tpmp4=os.path.join(dir, title+".mp4") tv2avi.ts2avi(pin, tpavi, opt) diff --git a/rec10/trunk/src/xml2db.py b/rec10/trunk/src/xml2db.py index 08b0808..492ba8c 100644 --- a/rec10/trunk/src/xml2db.py +++ b/rec10/trunk/src/xml2db.py @@ -10,6 +10,7 @@ import chdb import rec10d import zenhan import datetime +import recdb flags = {} lastflag = "" xmldate = {} @@ -18,6 +19,7 @@ bctype = "" chlists = [] tvlists = [] bayes={} +dtb="" def xml2db(xmlpath, bctypetemp):#bctypeは放送種別で'TE'(地デジ)BS,CSがある。地デジの場合は te数字 が入る global db, bctype bctype = bctypetemp @@ -40,13 +42,14 @@ def parseDocument(p, xmlfile): p.ParseFile(f) f.close() def start_element(name, attrs): - global flags, lastflag, xmldate + global flags, lastflag, xmldate,dtb flags[name] = 1 lastflag = name for (key, val) in attrs.iteritems(): xmldate[name + ":" + key] = val + dtb=datetime.datetime.now() def end_element(name): - global flags, lastflag, xmldate, db, chlists, tvlists ,bayes + global flags, lastflag, xmldate, db, chlists, tvlists ,bayes,dtb if flags[name] == 1: if name == "channel": ubctype = unicode(bctype) @@ -78,8 +81,8 @@ def end_element(name): bt2=[bt[0]+utitle+" ",bt[1]+udesc+" "+ulongdesc+" ",bt[2]+1] bayes[tch['chtxt']]=bt2 tvlists.append([ubctype, uch, ustart, ustop, utitle, udesc,ulongdesc,ucategory]) - if auto_rec.calc_key(chtxt, utitle, udesc+" "+ulongdesc): - "" + if auto_rec.calc_key(tch['chtxt'], utitle, udesc+" "+ulongdesc): + recdb.rec_reckey(recdb.REC_BAYES_SUGGEST, utitle, tch['chtxt'], ustart,ustop,"") xmldate['title'] = "" xmldate['desc'] = "" xmldate['longdesc'] = "" @@ -91,12 +94,11 @@ def end_element(name): if len(tvlists) > 0: writemultitvDB(tvlists) tvlists = [] - dtb=datetime.datetime.now() for ct,list in bayes.items(): auto_rec.update_recall(ct,list[0],list[1],list[2]) dtb=datetime.datetime.now()-dtb print bctype + " epg取り出し終了" - print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"second taken for updating bayes-auto DB ." + print str(dtb.days * 24 * 60 * 60 + dtb.seconds)+"seconds taken for updating bayes-auto DB ." flags[name] = 0 def char_data(data): global flags, lastflag, xmldate -- 2.11.0