OSDN Git Service

ver1.1
[nysol/mining.git] / take / lib / enumLcmIs.rb
diff --git a/take/lib/enumLcmIs.rb b/take/lib/enumLcmIs.rb
new file mode 100644 (file)
index 0000000..9940c60
--- /dev/null
@@ -0,0 +1,190 @@
+#!/usr/bin/env ruby
+# encoding: utf-8
+
+require "rubygems"
+require "mcmd"
+
+require "lcm"
+require "zdd"
+require "traDB.rb"
+
+module TAKE
+
+
+#========================================================================
+# 列挙関数:lcm 利用DB:TraDB
+#========================================================================
+class LcmIs
+       attr_reader :size  # 列挙されたパターン数
+
+       def reduceTaxo(pat,items)
+               tf=MCMD::Mtemp.new
+
+               if items.taxonomy==nil then
+                       return pat
+               end
+
+               xxrt = tf.file
+               taxo=items.taxonomy
+               f=""
+               f << "mtrafld f=#{taxo.itemFN},#{taxo.taxoFN} -valOnly a=__fld i=#{taxo.file} o=#{xxrt}"
+               system(f)
+
+               # xxrtの内容:oyakoに親子関係にあるアイテム集合のリストが格納される
+               # __fld
+               # A X
+               # B X
+               # C Y
+               # D Z
+               # E Z
+               # F Z
+               oyako=ZDD.constant(0)
+               MCMD::Mcsvin.new("i=#{xxrt}"){|csv|
+                       csv.each{|fldVal|
+                               items=fldVal["__fld"]
+                               oyako=oyako+ZDD.itemset(items)
+                       }
+               }
+
+               # 親子リストにあるアイテム集合を含むパターンを削除する
+               pat=pat.restrict(oyako).iif(0,pat)
+
+               return pat
+       end
+
+       def initialize(db)
+               @temp=MCMD::Mtemp.new
+
+               @db = db # 入力データベース
+
+               @file=@temp.file
+
+               items=@db.items
+
+               # アイテムをシンボルから番号に変換する。
+               f=""
+               f << "msortf f=#{@db.itemFN}                                                   i=#{@db.file} |"
+               f << "mjoin  k=#{@db.itemFN} K=#{items.itemFN} m=#{items.file} f=#{items.idFN} |"
+               f << "mcut   f=#{@db.idFN},#{items.idFN}                                       |"
+               f << "msortf f=#{@db.idFN}                                                     |"
+               f << "mtra   k=#{@db.idFN} f=#{items.idFN}                                     |"
+               f << "mcut   f=#{items.idFN} -nfno                                             o=#{@file}"
+               system(f)
+       end
+
+       def enumerate(type, minSup, lenLB=1, lenUB=4, top=10000, minSupCnt=0)
+
+               tf=MCMD::Mtemp.new
+
+               @type      = type
+               if minSupCnt!=0 then
+                       @minSupCnt = minSupCnt
+                       @minSup    = minSupCnt.to_f / @db.size.to_f
+               else
+                       @minSup    = minSup.to_f
+                       @minSupCnt = (@minSup * @db.size.to_f + 0.99).to_i
+               end
+               @lenLB     = lenLB
+               @lenUB     = lenUB
+               @top       = top
+
+               xxp = tf.file #MCMD::Mtemp.new
+               xxt = tf.file #MCMD::Mtemp.new
+
+               if(top==0) then
+                       MCMD::lcm("type=#{@type} i=#{@file} s=#{@minSupCnt} l=#{@lenLB} u=#{@lenUB} o=#{xxp} t=#{xxt}")
+               else
+                       MCMD::lcm("type=#{@type} i=#{@file} s=#{@minSupCnt} l=#{@lenLB} u=#{@lenUB} o=#{xxp} t=#{xxt} K=#{@top}")
+               end
+
+               # パターンのサポートを計算しCSV出力する
+               MCMD::msgLog("output patterns to CSV file ...")
+               xxp0=tf.file
+               @pFile = @temp.file
+               items=@db.items
+               f=""
+               f << "mcut     -nfni f=0:pid,1:pattern,2:count                               i=#{xxp} |"
+               f << "mdelnull f=pattern                                                     |"
+               f << "mvreplace vf=pattern m=#{items.file} K=#{items.idFN} f=#{items.itemFN} |"
+               f << "msetstr  v=#{@db.size} a=total                                         |" # トータル件数
+               f << "mcal     c='${count}/${total}' a=support                               |" # サポートの計算
+               f << "mcut     f=pid,pattern,count,total,support                             |"
+               f << "mtra  -r f=pattern                                                     |"
+               f << "msortf   f=pid,pattern                                                 |"
+               f << "mtra     k=pid f=pattern                                               |"
+               f << "mvsort   vf=pattern |"
+               f << "msortf   f=pattern                                                     o=#{xxp0}"
+               system(f)
+
+               # taxonomy指定がない場合(2010/11/20追加)
+               if items.taxonomy==nil then
+                       FileUtils.cp(xxp0, @pFile)
+
+               # taxonomy指定がある場合
+               else
+                       MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")
+                       zdd=ZDD.constant(0)
+                       MCMD::Mcsvin.new("i=#{xxp0}"){|csv|
+                               csv.each{|fldVal|
+                                       items=fldVal['pattern']
+                                       zdd=zdd+ZDD.itemset(items)
+                               }
+                       }
+
+                       zdd=reduceTaxo(zdd,@db.items)
+                       xxp1=tf.file
+                       xxp2=tf.file
+                       zdd.csvout(xxp1)
+                       f=""
+                       f << "mcut   -nfni f=1:pattern i=#{xxp1} |"
+                       f << "mvsort vf=pattern        |"
+                       f << "msortf f=pattern         o=#{xxp2}"
+                       system(f)
+
+                       f=""
+                       f << "msortf  f=pattern           i=#{xxp0} |"
+                       f << "mcommon k=pattern m=#{xxp2} |"
+                       f << "msortf  f=support%nr        o=#{@pFile}"
+                       system(f)
+
+               end
+
+               @size = MCMD::mrecount("i=#{@pFile}") # 列挙されたパターンの数
+               MCMD::msgLog("the number of patterns enumerated is #{@size}")
+
+               # トランザクション毎に出現するシーケンスを書き出す
+               MCMD::msgLog("output tid-patterns ...")
+               @tFile = @temp.file
+
+               xxp3=tf.file
+               f=""
+               f << "mcut    f=#{@db.idFN} i=#{@db.file} |"
+               f << "muniq   k=#{@db.idFN} |"
+               f << "mnumber S=0 a=__tno   |"
+               f << "msortf  f=__tno       o=#{xxp3}"
+               system(f)
+
+               xxp4=tf.file
+               f=""
+               f << "mcut    f=pid i=#{@pFile} |"
+               f << "msortf  f=pid o=#{xxp4}"
+               system(f)
+
+               f=""
+               f << "mcut     -nfni f=0:__tno,1:pid           i=#{xxt} |"
+               f << "msortf   f=pid                           |"
+               f << "mcommon  k=pid m=#{xxp4}                 |"
+               f << "msortf   f=__tno                         |"
+               f << "mjoin    k=__tno m=#{xxp3} f=#{@db.idFN} |"
+               f << "mcut     f=#{@db.idFN},pid               o=#{@tFile}"
+               system(f)
+       end
+
+  def output(outpath)
+               system "mv #{@pFile} #{outpath}/patterns.csv"
+               system "mv #{@tFile} #{outpath}/tid_pats.csv"
+       end
+end
+
+end #module
+