OSDN Git Service

stop using trunk directory in rectool
[rec10/rec10-git.git] / rec10 / trunk / src / guess.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009-2011 Yukikaze
5
6 import os
7 import os.path
8 import re
9 import time
10 import os
11 import datetime
12 import n_gram
13 import recdblist
14 import zenhan
15 import auto_move
16
17 def detSeriesNum(execpath):
18     files=os.listdir(execpath)
19     ss={}
20     for file in files:
21         ftitle=os.path.splitext(file)
22         fname=ftitle[0]
23         fname=zenhan.toHankaku(unicode(fname,"utf-8"))
24         nt=detNameType(fname, execpath)
25         st=nt['title']
26         if not ss.has_key(st):
27             ss[st]={}
28         ss[st][nt['num']]=ftitle
29         print ss
30     return ss
31 def detDeltaNum(title,movepath):
32     """
33         #番組の話数を推測する。
34     """
35     files=os.listdir(movepath)
36     ff=[]
37     maxnum=0
38     for file in files:
39         recdblist.printutf8(unicode(file,'utf-8'),verbose_level=800)
40         file=os.path.join(movepath,file)
41         if os.path.isfile(file):
42             name=os.path.splitext(os.path.split(file)[1])
43             name=name[0]
44             name=zenhan.toHankaku(unicode(name,'utf-8'))
45             p1=detNameType(name,movepath)
46             #recdblist.printutf8(p1['title']+" "+str(p1['num']))
47             time1=time.localtime(os.path.getmtime(file))
48             time1=datetime.datetime.fromtimestamp(os.path.getmtime(file))
49             if p1['num']!=-1:
50                 ff.append([p1['num'],p1['title'],time1])
51                 if maxnum<p1['num']+1:
52                     maxnum=p1['num']+1
53     return detMultiDeltaDays(ff)
54 def detMultiDeltaDays(num_with_title_with_dates):
55     maxnum=0
56     for ft in num_with_title_with_dates:
57         if maxnum<ft[0]+1:
58             maxnum=ft[0]+1
59     ff=num_with_title_with_dates
60     f3=[None]*maxnum
61     fret={}
62     for i in range(0, maxnum, 1):
63         f3[i]=[]
64     for f2 in ff:
65         f3[f2[0]].append(f2)
66     for i in range(maxnum):
67         for j in range(i+1,maxnum,1):
68             for ft3 in f3[i]:
69                 for ft4 in f3[j]:
70                     for gdd in detSingleDeltaDay(i, ft3[2], j, ft4[2]):
71                         #print gdd
72                         if fret.get(gdd[0])!=None:
73                             #print fret
74                             fret[gdd[0]]=fret[gdd[0]]+gdd[1]
75                         else:
76                             fret[gdd[0]]=gdd[1]
77     #print fret
78     maxk=0
79     maxp=0
80     for i in range(maxnum):
81         if fret.get(i)!=None:
82             if maxp<fret[i]:
83                 maxk=i
84                 maxp=fret[i]
85     #print maxk
86     #print maxp
87     return maxk
88 def detSingleDeltaDay(num1,date1,num2,date2):
89     dd=date1-date2
90     if date1<date2:
91         dd=date2-date1
92     dn=dd.days
93     d=num2-num1
94     if d<0:
95         d=-1*d
96     dp=dd+datetime.timedelta(hours=6)
97     dp=dp.days
98     dm=dd-datetime.timedelta(hours=6)
99     dm=dm.days
100     ret=[]
101     if dn%d*2>d:
102         dn=dn+d
103     if dm%d*2>d:
104         dm=dm+d
105     if dp%d*2>d:
106         dp=dp+d
107     #recdblist.printutf8(str(d)+":"+str(dn))
108     if dp!=dn:
109         ret.append([dn/d,60])
110         ret.append([dp/d,40])
111     elif dm!=dn:
112         ret.append([dn/d,60])
113         ret.append([dm/d,40])
114     else:
115         ret.append([dn/d,100])
116     return ret
117 def detNameType(title,path):
118     """
119     type A ---title#<number>
120     type B ---title#<number>subtitle
121     type C ---title subtitle
122     type D ---title(without number)
123     type Aj ---title第<number>話
124     path --search reflexively
125     """
126     new=0
127     if re.search(u"[新]",title) or re.search(u" 新",title):
128         title=title.replace(u"[新]","")
129         title=title.replace(u" 新","")
130         new=1
131     recdblist.printutf8(title,verbose_level=800)
132     title=auto_move.getTitle(title)##titleから日時を除く
133     title=title.replace(u"無料≫","")
134     #rA=re.compile(".+(?P<title>)#\d(?P<num>)\s[0,10]\z")
135     rA=re.compile("(.+)#(\d*)\s*\Z")
136     tA=rA.match(title)
137     rB=re.compile("(.+)#(\d*)\s*(\D*)")
138     tB=rB.match(title)
139     rAj=re.compile("(.+)第(\d*)話\s*\Z")
140     tAj=rAj.match(title)
141     ret={'title':"",'type':"",'num':0,'subtitle':"",'folder':""}
142     if tA:
143         #recdblist.printutf8("typeA")
144         #recdblist.printutf8("title="+tA.group(1))
145         #recdblist.printutf8("num="+tA.group(2))
146         ret['type']="A"
147         ret['title']=tA.group(1).replace(" ","")
148         ret['num']=int(tA.group(2))
149         ret['folder']=searchFolder(tA.group(1),unicode(path,'utf-8'))
150     if tAj:
151         #recdblist.printutf8("typeA")
152         #recdblist.printutf8("title="+tAj.group(1))
153         #recdblist.printutf8("num="+tAj.group(2))
154         ret['type']="Aj"
155         ret['title']=tAj.group(1).replace(" ","")
156         ret['num']=int(tAj.group(2))
157         ret['folder']=searchFolder(tAj.group(1),unicode(path,'utf-8'))
158     elif tB:
159         #recdblist.printutf8("typeB")
160         #recdblist.printutf8("title="+tB.group(1))
161         #recdblist.printutf8("num="+tB.group(2))
162         #recdblist.printutf8("subtitle="+tB.group(3))
163         ret['type']="B"
164         ret['title']=tB.group(1).replace(" ","")
165         ret['num']=int(tB.group(2))
166         ret['folder']=searchFolder(tB.group(1),unicode(path,'utf-8'))
167         ret['subtitle']=tB.group(3)
168     else:#type C or type D
169         #fold=searchFolder(title, path)
170         ts=title.split(" ")
171         tt=""
172         rt=["",0,""]
173         for t in ts:
174             tt=tt+" "+t
175             ft1=searchFolder(tt,unicode(path,'utf-8'))
176             #recdblist.printutf8(tt)
177             #print ft1
178             if ft1!="":
179                 #recdblist.printutf8(rt)
180                 #recdblist.printutf8(ft1[0]+" : "+str(ft1[1]))
181                 if ft1[1]>rt[1]:
182                     rt[0]=tt
183                     rt[1]=ft1[1]
184                     rt[2]=ft1[0]
185                     #recdblist.printutf8(rt)
186         #recdblist.printutf8("title="+rt[0][1:]+"/")
187         #recdblist.printutf8("subtitle = "+title.replace(rt[0][1:],"")[1:])
188         ret['title']=rt[0][1:].replace(" ","")
189         ret['num']=-1
190         ret['folder']=rt[2]
191         ret['subtitle']=title.replace(rt[0][1:],"")[1:]
192         if ret['subtitle'].replace(" ","")=="":
193             ret['type']="D"
194         else:
195             ret['type']="C"
196     if new==1:
197         ret['num']=1
198     return ret
199 def searchFolder(title,path,threshold=500):
200     """
201     titleからフォルダーを探す
202     """
203     folderpath=os.listdir(path)
204     lfpath=[]
205     ngram=[]
206     for ft in folderpath:
207         fullpath=os.path.join(path, ft)
208         if os.path.isdir(fullpath):
209             lfpath.append(fullpath)
210             ftt=os.listdir(fullpath)
211             if len(ftt)>0:
212                 for ft2 in ftt:
213                     folderpath.append(os.path.join(fullpath, ft2))
214         else:
215             lfpath.append(fullpath)
216     for dirp in lfpath:
217         cmpp=""
218         appp=""
219         if os.path.isdir(dirp):
220             cmpp=os.path.dirname(dirp)
221             appp=dirp
222         else:
223             cmpp=os.path.basename(dirp)
224             appp=os.path.dirname(dirp)
225         ntitle=auto_move.getTitle(title)
226         ncmpp=auto_move.getTitle(cmpp)
227         p=n_gram.trigram(ntitle,ncmpp)
228         if p>0:
229             ngram.append((p,appp))
230     ngram=list(set(ngram))
231     ngram.sort()
232     ngram.reverse()
233     if len(ngram)>0:
234         #recdblist.printutf8(title + ngram[0][1] + " : "+str(ngram[0][0]))
235         if ngram[0][0]>threshold:
236             return ngram[0][1]
237         else:
238             return ""
239     else:
240         return ""