OSDN Git Service

implement seriesnum determinant system.
[rec10/rec10-git.git] / rec10 / trunk / src / guess.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009-2010 Yukikaze
5
6 import os
7 import os.path
8 import re
9 import time
10 import os
11 import datetime
12 import n_gram
13 import recdblist
14 import zenhan
15 import auto_move
16
17 def detName(title,path):
18     tt=detNameType(title, path)
19     if tt['type']=="C"or tt['type']=="D":
20         tt['num']=detNum(tt['title'],tt['folder'])
21 def detSeriesNum(execpath):
22     files=os.listdir(execpath)
23     ss={}
24     for file in files:
25         ftitle=os.path.splitext(file)
26         fname=ftitle[0]
27         fname=zenhan.toHankaku(unicode(name))
28         st=detNameType(fname, execpath)['title']
29         if not len(ss[st])>0:
30             ss[st]=[]
31         ss[detNum(fname,execpath)]=ftitle
32     return ss
33 def detNum(title,movepath):
34     """
35         #番組の話数を推測する。
36     """
37     files=os.listdir(movepath)
38     ff=[]
39     maxnum=0
40     for file in files:
41         recdblist.printutf8(file)
42         file=os.path.join(movepath,file)
43         if os.path.isfile(file):
44             name=os.path.splitext(os.path.split(file)[1])
45             name=name[0]
46             name=zenhan.toHankaku(unicode(name))
47             p1=detNameType(name,movepath)
48             recdblist.printutf8(p1['title']+" "+str(p1['num']))
49             time1=time.localtime(os.path.getmtime(file))
50             time1=datetime.datetime.fromtimestamp(os.path.getmtime(file))
51             if p1['num']!=-1:
52                 ff.append([p1['num'],p1['title'],time1])
53                 if maxnum<p1['num']+1:
54                     maxnum=p1['num']+1
55     return detMultiDeltaDays(ff)
56 def detMultiDeltaDays(num_with_title_with_dates):
57     maxnum=0
58     for ft in num_with_title_with_dates:
59         if maxnum<ft[0]+1:
60             maxnum=ft[0]+1
61     ff=num_with_title_with_dates
62     f3=[None]*maxnum
63     fret={}
64     for i in range(0, maxnum, 1):
65         f3[i]=[]
66     for f2 in ff:
67         f3[f2[0]].append(f2)
68     for i in range(maxnum):
69         for j in range(i+1,maxnum,1):
70             for ft3 in f3[i]:
71                 for ft4 in f3[j]:
72                     for gdd in detSingleDeltaDay(i, ft3[2], j, ft4[2]):
73                         #print gdd
74                         if fret.get(gdd[0])!=None:
75                             #print fret
76                             fret[gdd[0]]=fret[gdd[0]]+gdd[1]
77                         else:
78                             fret[gdd[0]]=gdd[1]
79     #print fret
80     maxk=0
81     maxp=0
82     for i in range(maxnum):
83         if fret.get(i)!=None:
84             if maxp<fret[i]:
85                 maxk=i
86                 maxp=fret[i]
87     #print maxk
88     #print maxp
89     return maxk
90 def detSingleDeltaDay(num1,date1,num2,date2):
91     dd=date1-date2
92     if date1<date2:
93         dd=date2-date1
94     dn=dd.days
95     d=num2-num1
96     if d<0:
97         d=-1*d
98     dp=dd+datetime.timedelta(hours=6)
99     dp=dp.days
100     dm=dd-datetime.timedelta(hours=6)
101     dm=dm.days
102     ret=[]
103     if dn%d*2>d:
104         dn=dn+d
105     if dm%d*2>d:
106         dm=dm+d
107     if dp%d*2>d:
108         dp=dp+d
109     recdblist.printutf8(str(d)+":"+str(dn))
110     if dp!=dn:
111         ret.append([dn/d,60])
112         ret.append([dp/d,40])
113     elif dm!=dn:
114         ret.append([dn/d,60])
115         ret.append([dm/d,40])
116     else:
117         ret.append([dn/d,100])
118     return ret
119 def detNameType(title,path):
120     """
121     type A ---title#<number>
122     type B ---title#<number>subtitle
123     type C ---title subtitle
124     type D ---title(without number)
125     type Aj ---title第<number>話
126     path --search reflexively
127     """
128     recdblist.printutf8(title)
129     title=auto_move.getTitle(title)##titleから日時を除く
130     #rA=re.compile(".+(?P<title>)#\d(?P<num>)\s[0,10]\z")
131     rA=re.compile("(.+)#(\d*)\s*\Z")
132     tA=rA.match(title)
133     rB=re.compile("(.+)#(\d*)\s*(\D*)")
134     tB=rB.match(title)
135     rAj=re.compile("(.+)第(\d*)話\s*\Z")
136     tAj=rAj.match(title)
137     ret={'title':"",'type':"",'num':0,'subtitle':"",'folder':""}
138     if tA:
139         recdblist.printutf8("typeA")
140         recdblist.printutf8("title="+tA.group(1))
141         recdblist.printutf8("num="+tA.group(2))
142         ret['type']="A"
143         ret['title']=tA.group(1)
144         ret['num']=int(tA.group(2))
145         ret['folder']=searchFolder(tA.group(1),path)
146     if tAj:
147         recdblist.printutf8("typeA")
148         recdblist.printutf8("title="+tAj.group(1))
149         recdblist.printutf8("num="+tAj.group(2))
150         ret['type']="Aj"
151         ret['title']=tAj.group(1)
152         ret['num']=int(tAj.group(2))
153         ret['folder']=searchFolder(tAj.group(1),path)
154     elif tB:
155         recdblist.printutf8("typeB")
156         recdblist.printutf8("title="+tB.group(1))
157         recdblist.printutf8("num="+tB.group(2))
158         recdblist.printutf8("subtitle="+tB.group(3))
159         ret['type']="B"
160         ret['title']=tB.group(1)
161         ret['num']=int(tB.group(2))
162         ret['folder']=searchFolder(tB.group(1),path)
163         ret['subtitle']=tB.group(3)
164     else:#type C or type D
165         #fold=searchFolder(title, path)
166         ts=title.split(" ")
167         tt=""
168         rt=["",0,""]
169         for t in ts:
170             tt=tt+" "+t
171             ft1=searchFolder(tt,path)
172             recdblist.printutf8(tt)
173             #print ft1
174             if ft1!="":
175                 #recdblist.printutf8(rt)
176                 #recdblist.printutf8(ft1[0]+" : "+str(ft1[1]))
177                 if ft1[1]>rt[1]:
178                     rt[0]=tt
179                     rt[1]=ft1[1]
180                     rt[2]=ft1[0]
181                     #recdblist.printutf8(rt)
182         #recdblist.printutf8("title="+rt[0][1:]+"/")
183         #recdblist.printutf8("subtitle = "+title.replace(rt[0][1:],"")[1:])
184         ret['title']=rt[0][1:]
185         ret['num']=-1
186         ret['folder']=rt[2]
187         ret['subtitle']=title.replace(rt[0][1:],"")[1:]
188         if ret['subtitle'].replace(" ","")=="":
189             ret['type']="D"
190         else:
191             ret['type']="C"
192     return ret
193 def searchFolder(title,path,threshold=500):
194     """
195     titleからフォルダーを探す
196     """
197     folderpath=os.listdir(path)
198     lfpath=[]
199     ngram=[]
200     for ft in folderpath:
201         fullpath=os.path.join(path, ft)
202         if os.path.isdir(fullpath):
203             lfpath.append(fullpath)
204             ftt=os.listdir(fullpath)
205             if len(ftt)>0:
206                 for ft2 in ftt:
207                     folderpath.append(os.path.join(fullpath, ft2))
208         else:
209             lfpath.append(fullpath)
210     for dirp in lfpath:
211         cmpp=""
212         appp=""
213         if os.path.isdir(dirp):
214             cmpp=os.path.dirname(dirp)
215             appp=dirp
216         else:
217             cmpp=os.path.basename(dirp)
218             appp=os.path.dirname(dirp)
219         ntitle=auto_move.getTitle(title)
220         ncmpp=auto_move.getTitle(cmpp)
221         p=n_gram.trigram(ntitle,ncmpp)
222         if p>0:
223             ngram.append((p,appp))
224     ngram=list(set(ngram))
225     ngram.sort()
226     ngram.reverse()
227     if len(ngram)>0:
228         #recdblist.printutf8(title + ngram[0][1] + " : "+str(ngram[0][0]))
229         if ngram[0][0]>threshold:
230             return ngram[0][1]
231         else:
232             return ""
233     else:
234         return ""