OSDN Git Service

9940c606cd724b6d49c054f70400dedf274d3d0f
[nysol/mining.git] / take / lib / enumLcmIs.rb
1 #!/usr/bin/env ruby
2 # encoding: utf-8
3
4 require "rubygems"
5 require "mcmd"
6
7 require "lcm"
8 require "zdd"
9 require "traDB.rb"
10
11 module TAKE
12
13
14 #========================================================================
15 # 列挙関数:lcm 利用DB:TraDB
16 #========================================================================
17 class LcmIs
18         attr_reader :size  # 列挙されたパターン数
19
20         def reduceTaxo(pat,items)
21                 tf=MCMD::Mtemp.new
22
23                 if items.taxonomy==nil then
24                         return pat
25                 end
26
27                 xxrt = tf.file
28                 taxo=items.taxonomy
29                 f=""
30                 f << "mtrafld f=#{taxo.itemFN},#{taxo.taxoFN} -valOnly a=__fld i=#{taxo.file} o=#{xxrt}"
31                 system(f)
32
33                 # xxrtの内容:oyakoに親子関係にあるアイテム集合のリストが格納される
34                 # __fld
35                 # A X
36                 # B X
37                 # C Y
38                 # D Z
39                 # E Z
40                 # F Z
41                 oyako=ZDD.constant(0)
42                 MCMD::Mcsvin.new("i=#{xxrt}"){|csv|
43                         csv.each{|fldVal|
44                                 items=fldVal["__fld"]
45                                 oyako=oyako+ZDD.itemset(items)
46                         }
47                 }
48
49                 # 親子リストにあるアイテム集合を含むパターンを削除する
50                 pat=pat.restrict(oyako).iif(0,pat)
51
52                 return pat
53         end
54
55         def initialize(db)
56                 @temp=MCMD::Mtemp.new
57
58                 @db = db # 入力データベース
59
60                 @file=@temp.file
61
62                 items=@db.items
63
64                 # アイテムをシンボルから番号に変換する。
65                 f=""
66                 f << "msortf f=#{@db.itemFN}                                                   i=#{@db.file} |"
67                 f << "mjoin  k=#{@db.itemFN} K=#{items.itemFN} m=#{items.file} f=#{items.idFN} |"
68                 f << "mcut   f=#{@db.idFN},#{items.idFN}                                       |"
69                 f << "msortf f=#{@db.idFN}                                                     |"
70                 f << "mtra   k=#{@db.idFN} f=#{items.idFN}                                     |"
71                 f << "mcut   f=#{items.idFN} -nfno                                             o=#{@file}"
72                 system(f)
73         end
74
75         def enumerate(type, minSup, lenLB=1, lenUB=4, top=10000, minSupCnt=0)
76
77                 tf=MCMD::Mtemp.new
78
79                 @type      = type
80                 if minSupCnt!=0 then
81                         @minSupCnt = minSupCnt
82                         @minSup    = minSupCnt.to_f / @db.size.to_f
83                 else
84                         @minSup    = minSup.to_f
85                         @minSupCnt = (@minSup * @db.size.to_f + 0.99).to_i
86                 end
87                 @lenLB     = lenLB
88                 @lenUB     = lenUB
89                 @top       = top
90
91                 xxp = tf.file #MCMD::Mtemp.new
92                 xxt = tf.file #MCMD::Mtemp.new
93
94                 if(top==0) then
95                         MCMD::lcm("type=#{@type} i=#{@file} s=#{@minSupCnt} l=#{@lenLB} u=#{@lenUB} o=#{xxp} t=#{xxt}")
96                 else
97                         MCMD::lcm("type=#{@type} i=#{@file} s=#{@minSupCnt} l=#{@lenLB} u=#{@lenUB} o=#{xxp} t=#{xxt} K=#{@top}")
98                 end
99
100                 # パターンのサポートを計算しCSV出力する
101                 MCMD::msgLog("output patterns to CSV file ...")
102                 xxp0=tf.file
103                 @pFile = @temp.file
104                 items=@db.items
105                 f=""
106                 f << "mcut     -nfni f=0:pid,1:pattern,2:count                               i=#{xxp} |"
107                 f << "mdelnull f=pattern                                                     |"
108                 f << "mvreplace vf=pattern m=#{items.file} K=#{items.idFN} f=#{items.itemFN} |"
109                 f << "msetstr  v=#{@db.size} a=total                                         |" # トータル件数
110                 f << "mcal     c='${count}/${total}' a=support                               |" # サポートの計算
111                 f << "mcut     f=pid,pattern,count,total,support                             |"
112                 f << "mtra  -r f=pattern                                                     |"
113                 f << "msortf   f=pid,pattern                                                 |"
114                 f << "mtra     k=pid f=pattern                                               |"
115                 f << "mvsort   vf=pattern |"
116                 f << "msortf   f=pattern                                                     o=#{xxp0}"
117                 system(f)
118
119                 # taxonomy指定がない場合(2010/11/20追加)
120                 if items.taxonomy==nil then
121                         FileUtils.cp(xxp0, @pFile)
122
123                 # taxonomy指定がある場合
124                 else
125                         MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")
126                         zdd=ZDD.constant(0)
127                         MCMD::Mcsvin.new("i=#{xxp0}"){|csv|
128                                 csv.each{|fldVal|
129                                         items=fldVal['pattern']
130                                         zdd=zdd+ZDD.itemset(items)
131                                 }
132                         }
133
134                         zdd=reduceTaxo(zdd,@db.items)
135                         xxp1=tf.file
136                         xxp2=tf.file
137                         zdd.csvout(xxp1)
138                         f=""
139                         f << "mcut   -nfni f=1:pattern i=#{xxp1} |"
140                         f << "mvsort vf=pattern        |"
141                         f << "msortf f=pattern         o=#{xxp2}"
142                         system(f)
143
144                         f=""
145                         f << "msortf  f=pattern           i=#{xxp0} |"
146                         f << "mcommon k=pattern m=#{xxp2} |"
147                         f << "msortf  f=support%nr        o=#{@pFile}"
148                         system(f)
149
150                 end
151
152                 @size = MCMD::mrecount("i=#{@pFile}") # 列挙されたパターンの数
153                 MCMD::msgLog("the number of patterns enumerated is #{@size}")
154
155                 # トランザクション毎に出現するシーケンスを書き出す
156                 MCMD::msgLog("output tid-patterns ...")
157                 @tFile = @temp.file
158
159                 xxp3=tf.file
160                 f=""
161                 f << "mcut    f=#{@db.idFN} i=#{@db.file} |"
162                 f << "muniq   k=#{@db.idFN} |"
163                 f << "mnumber S=0 a=__tno   |"
164                 f << "msortf  f=__tno       o=#{xxp3}"
165                 system(f)
166
167                 xxp4=tf.file
168                 f=""
169                 f << "mcut    f=pid i=#{@pFile} |"
170                 f << "msortf  f=pid o=#{xxp4}"
171                 system(f)
172
173                 f=""
174                 f << "mcut     -nfni f=0:__tno,1:pid           i=#{xxt} |"
175                 f << "msortf   f=pid                           |"
176                 f << "mcommon  k=pid m=#{xxp4}                 |"
177                 f << "msortf   f=__tno                         |"
178                 f << "mjoin    k=__tno m=#{xxp3} f=#{@db.idFN} |"
179                 f << "mcut     f=#{@db.idFN},pid               o=#{@tFile}"
180                 system(f)
181         end
182
183   def output(outpath)
184                 system "mv #{@pFile} #{outpath}/patterns.csv"
185                 system "mv #{@tFile} #{outpath}/tid_pats.csv"
186         end
187 end
188
189 end #module
190