OSDN Git Service

add 2010 copyright.
[rec10/rec10-git.git] / rec10 / trunk / src / guess.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009-2010 Yukikaze
5
6 import os
7 import re
8 import time
9 import os
10 import datetime
11 import n_gram
12 import recdblist
13 import zenhan
14
15 def detName(title,path):
16     tt=detNameType(title, path)
17     if tt['type']=="C"or tt['type']=="D":
18         tt['num']=detNum(tt['title'],tt['folder'])
19 def detNum(title,movepath):
20     """
21         #
22     """
23     files=os.listdir(movepath)
24     ff=[]
25     maxnum=0
26     for file in files:
27         recdblist.printutf8(file)
28         file=os.path.join(movepath,file)
29         if os.path.isfile(file):
30             name=os.path.splitext(os.path.split(file)[1])
31             name=name[0]
32             name=zenhan.toHankaku(unicode(name))
33             p1=detNameType(name,movepath)
34             recdblist.printutf8(p1['title']+" "+str(p1['num']))
35             time1=time.localtime(os.path.getmtime(file))
36             time1=datetime.datetime.fromtimestamp(os.path.getmtime(file))
37             if p1['num']!=-1:
38                 ff.append([p1['num'],p1['title'],time1])
39                 if maxnum<p1['num']+1:
40                     maxnum=p1['num']+1
41     detMultiDeltaDays(ff)
42 def detMultiDeltaDays(num_with_title_with_dates):
43     maxnum=0
44     for ft in num_with_title_with_dates:
45         if maxnum<ft[0]+1:
46             maxnum=ft[0]+1
47     ff=num_with_title_with_dates
48     f3=[None]*maxnum
49     fret={}
50     for i in range(0, maxnum, 1):
51         f3[i]=[]
52     for f2 in ff:
53         f3[f2[0]].append(f2)
54     for i in range(maxnum):
55         for j in range(i+1,maxnum,1):
56             for ft3 in f3[i]:
57                 for ft4 in f3[j]:
58                     for gdd in detSingleDeltaDay(i, ft3[2], j, ft4[2]):
59                         print gdd
60                         if fret.get(gdd[0])!=None:
61                             print fret
62                             fret[gdd[0]]=fret[gdd[0]]+gdd[1]
63                         else:
64                             fret[gdd[0]]=gdd[1]
65     print fret
66     maxk=0
67     maxp=0
68     for i in range(maxnum):
69         if fret.get(i)!=None:
70             if maxp<fret[i]:
71                 maxk=i
72                 maxp=fret[i]
73     print maxk
74     print maxp
75     return maxk
76 def detSingleDeltaDay(num1,date1,num2,date2):
77     dd=date1-date2
78     if date1<date2:
79         dd=date2-date1
80     dn=dd.days
81     d=num2-num1
82     if d<0:
83         d=-1*d
84     dp=dd+datetime.timedelta(hours=6)
85     dp=dp.days
86     dm=dd-datetime.timedelta(hours=6)
87     dm=dm.days
88     ret=[]
89     if dn%d*2>d:
90         dn=dn+d
91     if dm%d*2>d:
92         dm=dm+d
93     if dp%d*2>d:
94         dp=dp+d
95     recdblist.printutf8(str(d)+":"+str(dn))
96     if dp!=dn:
97         ret.append([dn/d,60])
98         ret.append([dp/d,40])
99     elif dm!=dn:
100         ret.append([dn/d,60])
101         ret.append([dm/d,40])
102     else:
103         ret.append([dn/d,100])
104     return ret
105 def detNameType(title,path):
106     """
107     type A ---title#<number>
108     type B ---title#<number>subtitle
109     type C ---title subtitle
110     type D ---title(without number)
111     type Aj ---title第<number>話
112     path --search reflexively
113     """
114     recdblist.printutf8(title)
115     #rA=re.compile(".+(?P<title>)#\d(?P<num>)\s[0,10]\z")
116     rA=re.compile("(.+)#(\d*)\s*\Z")
117     tA=rA.match(title)
118     rB=re.compile("(.+)#(\d*)\s*(\D*)")
119     tB=rB.match(title)
120     rAj=re.compile("(.+)第(\d*)話\s*\Z")
121     tAj=rAj.match(title)
122     ret={'title':"",'type':"",'num':0,'subtitle':"",'folder':""}
123     if tA:
124         recdblist.printutf8("typeA")
125         recdblist.printutf8("title="+tA.group(1))
126         recdblist.printutf8("num="+tA.group(2))
127         ret['type']="A"
128         ret['title']=tA.group(1)
129         ret['num']=int(tA.group(2))
130         ret['folder']=searchFolder(tA.group(1),path)
131     if tAj:
132         recdblist.printutf8("typeA")
133         recdblist.printutf8("title="+tAj.group(1))
134         recdblist.printutf8("num="+tAj.group(2))
135         ret['type']="Aj"
136         ret['title']=tAj.group(1)
137         ret['num']=int(tAj.group(2))
138         ret['folder']=searchFolder(tAj.group(1),path)
139     elif tB:
140         recdblist.printutf8("typeB")
141         recdblist.printutf8("title="+tB.group(1))
142         recdblist.printutf8("num="+tB.group(2))
143         recdblist.printutf8("subtitle="+tB.group(3))
144         ret['type']="B"
145         ret['title']=tB.group(1)
146         ret['num']=int(tB.group(2))
147         ret['folder']=searchFolder(tB.group(1),path)
148         ret['subtitle']=tB.group(3)
149     else:#type C or type D
150         #fold=searchFolder(title, path)
151         ts=title.split(" ")
152         tt=""
153         rt=["",0,""]
154         for t in ts:
155             tt=tt+" "+t
156             ft1=searchFolder(tt,path)
157             recdblist.printutf8(tt)
158             print ft1
159             if ft1!="":
160                 #recdblist.printutf8(rt)
161                 #recdblist.printutf8(ft1[0]+" : "+str(ft1[1]))
162                 if ft1[1]>rt[1]:
163                     rt[0]=tt
164                     rt[1]=ft1[1]
165                     rt[2]=ft1[0]
166                     #recdblist.printutf8(rt)
167         #recdblist.printutf8("title="+rt[0][1:]+"/")
168         #recdblist.printutf8("subtitle = "+title.replace(rt[0][1:],"")[1:])
169         ret['title']=rt[0][1:]
170         ret['num']=-1
171         ret['folder']=rt[2]
172         ret['subtitle']=title.replace(rt[0][1:],"")[1:]
173         if ret['subtitle'].replace(" ","")=="":
174             ret['type']="D"
175         else:
176             ret['type']="C"
177     return ret
178 def searchFolder(title,path):
179     """
180     titleからフォルダーを探す
181     """
182     folderpath=os.listdir(path)
183     lfpath=[]
184     ngram=[]
185     for ft in folderpath:
186         fullpath=os.path.join(path, ft)
187         if os.path.isdir(fullpath):
188             lfpath.append(fullpath)
189             ftt=os.listdir(fullpath)
190             if len(ftt)>0:
191                 for ft2 in ftt:
192                     folderpath.append(os.path.join(fullpath, ft2))
193         else:
194             lfpath.append(fullpath)
195     for dirp in lfpath:
196         cmpp=""
197         appp=""
198         if os.path.isdir(dirp):
199             cmpp=os.path.dirname(dirp)
200             appp=dirp
201         else:
202             cmpp=os.path.basename(dirp)
203             appp=os.path.dirname(dirp)
204         p=n_gram.trigram(title.decode("utf-8"),cmpp.decode("utf-8"))
205         if p>0:
206             ngram.append((p,appp))
207     ngram=list(set(ngram))
208     ngram.sort()
209     ngram.reverse()
210     if len(ngram)>0:
211         #recdblist.printutf8(title + ngram[0][1] + " : "+str(ngram[0][0]))
212         if ngram[0][0]>300:
213             return [ngram[0][1],ngram[0][0]]
214         else:
215             return ""
216     else:
217         return ""