14 #========================================================================
16 #========================================================================
18 attr_reader :size # 列挙されたパターン数
20 def reduceTaxo(pat,items)
23 if items.taxonomy==nil then
30 f << "mtrafld f=#{taxo.itemFN},#{taxo.taxoFN} -valOnly a=__fld i=#{taxo.file} o=#{xxrt}"
33 # xxrtの内容:oyakoに親子関係にあるアイテム集合のリストが格納される
42 MCMD::Mcsvin.new("i=#{xxrt}"){|csv|
45 oyako=oyako+ZDD.itemset(items)
49 # 親子リストにあるアイテム集合を含むパターンを削除する
50 pat=pat.restrict(oyako).iif(0,pat)
66 f << "msortf f=#{@db.itemFN} i=#{@db.file} |"
67 f << "mjoin k=#{@db.itemFN} K=#{items.itemFN} m=#{items.file} f=#{items.idFN} |"
68 f << "mcut f=#{@db.idFN},#{items.idFN} |"
69 f << "msortf f=#{@db.idFN} |"
70 f << "mtra k=#{@db.idFN} f=#{items.idFN} |"
71 f << "mcut f=#{items.idFN} -nfno o=#{@file}"
75 def enumerate(type, minSup, lenLB=1, lenUB=4, top=10000, minSupCnt=0)
81 @minSupCnt = minSupCnt
82 @minSup = minSupCnt.to_f / @db.size.to_f
85 @minSupCnt = (@minSup * @db.size.to_f + 0.99).to_i
91 xxp = tf.file #MCMD::Mtemp.new
92 xxt = tf.file #MCMD::Mtemp.new
95 MCMD::lcm("type=#{@type} i=#{@file} s=#{@minSupCnt} l=#{@lenLB} u=#{@lenUB} o=#{xxp} t=#{xxt}")
97 MCMD::lcm("type=#{@type} i=#{@file} s=#{@minSupCnt} l=#{@lenLB} u=#{@lenUB} o=#{xxp} t=#{xxt} K=#{@top}")
100 # パターンのサポートを計算しCSV出力する
101 MCMD::msgLog("output patterns to CSV file ...")
106 f << "mcut -nfni f=0:pid,1:pattern,2:count i=#{xxp} |"
107 f << "mdelnull f=pattern |"
108 f << "mvreplace vf=pattern m=#{items.file} K=#{items.idFN} f=#{items.itemFN} |"
109 f << "msetstr v=#{@db.size} a=total |" # トータル件数
110 f << "mcal c='${count}/${total}' a=support |" # サポートの計算
111 f << "mcut f=pid,pattern,count,total,support |"
112 f << "mtra -r f=pattern |"
113 f << "msortf f=pid,pattern |"
114 f << "mtra k=pid f=pattern |"
115 f << "mvsort vf=pattern |"
116 f << "msortf f=pattern o=#{xxp0}"
119 # taxonomy指定がない場合(2010/11/20追加)
120 if items.taxonomy==nil then
121 FileUtils.cp(xxp0, @pFile)
125 MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")
127 MCMD::Mcsvin.new("i=#{xxp0}"){|csv|
129 items=fldVal['pattern']
130 zdd=zdd+ZDD.itemset(items)
134 zdd=reduceTaxo(zdd,@db.items)
139 f << "mcut -nfni f=1:pattern i=#{xxp1} |"
140 f << "mvsort vf=pattern |"
141 f << "msortf f=pattern o=#{xxp2}"
145 f << "msortf f=pattern i=#{xxp0} |"
146 f << "mcommon k=pattern m=#{xxp2} |"
147 f << "msortf f=support%nr o=#{@pFile}"
152 @size = MCMD::mrecount("i=#{@pFile}") # 列挙されたパターンの数
153 MCMD::msgLog("the number of patterns enumerated is #{@size}")
155 # トランザクション毎に出現するシーケンスを書き出す
156 MCMD::msgLog("output tid-patterns ...")
161 f << "mcut f=#{@db.idFN} i=#{@db.file} |"
162 f << "muniq k=#{@db.idFN} |"
163 f << "mnumber S=0 a=__tno |"
164 f << "msortf f=__tno o=#{xxp3}"
169 f << "mcut f=pid i=#{@pFile} |"
170 f << "msortf f=pid o=#{xxp4}"
174 f << "mcut -nfni f=0:__tno,1:pid i=#{xxt} |"
175 f << "msortf f=pid |"
176 f << "mcommon k=pid m=#{xxp4} |"
177 f << "msortf f=__tno |"
178 f << "mjoin k=__tno m=#{xxp3} f=#{@db.idFN} |"
179 f << "mcut f=#{@db.idFN},pid o=#{@tFile}"
184 system "mv #{@pFile} #{outpath}/patterns.csv"
185 system "mv #{@tFile} #{outpath}/tid_pats.csv"