1 package jp.sourceforge.stigmata.birthmarks.comparators;
3 import java.util.HashMap;
6 import jp.sourceforge.stigmata.Birthmark;
7 import jp.sourceforge.stigmata.BirthmarkContext;
8 import jp.sourceforge.stigmata.BirthmarkElement;
9 import jp.sourceforge.stigmata.birthmarks.ValueCountable;
10 import jp.sourceforge.stigmata.spi.BirthmarkSpi;
13 * Comparing birthmarks by cosine similarity algorithm. This class compares
14 * birthmarks which must be implemented
15 * {@link ValueCountable <code>ValueCountable</code>} interface.
17 * @author Haruaki Tamada
19 public class CosineSimilarityBirthmarkComparator extends
20 AbstractBirthmarkComparator{
22 public CosineSimilarityBirthmarkComparator(BirthmarkSpi spi){
27 public double compare(Birthmark b1, Birthmark b2, BirthmarkContext context){
28 if(!b1.getType().equals(b2.getType())){
31 if(b1.getElementCount() == 0 && b2.getElementCount() == 0){
34 else if(b1.getElementCount() == 0 || b2.getElementCount() == 0){
38 Map<String, CountPair> pairs = new HashMap<String, CountPair>();
39 addCount(pairs, b1, true);
40 addCount(pairs, b2, false);
42 double norm1 = norm(pairs, true);
43 double norm2 = norm(pairs, false);
44 double product = innerproduct(pairs);
45 double similarity = product / (norm1 * norm2);
46 // System.out.printf("%g / (%g * %g) = %g%n", product, norm1, norm2, similarity);
48 // double radian = Math.acos(product / (norm1 * norm2));
49 // double angle = 90 - (180 * radian / Math.PI);
50 // double sim = angle / 90;
51 // System.out.printf("angle: %g (%g�x, %g)%n", radian, angle, sim);
56 private double innerproduct(Map<String, CountPair> pairs){
58 for(CountPair pair: pairs.values()){
59 sum += pair.get(true) * pair.get(false);
64 private double norm(Map<String, CountPair> pairs, boolean first){
66 for(CountPair pair: pairs.values()){
67 sum += pair.get(first) * pair.get(first);
69 return Math.sqrt(sum);
72 private void addCount(Map<String, CountPair> pairs, Birthmark birthmark, boolean first){
73 for(BirthmarkElement element: birthmark){
74 ValueCountable vc = (ValueCountable)element;
75 CountPair cp = pairs.get(vc.getValueName());
78 pairs.put(vc.getValueName(), cp);
80 cp.set(first, vc.getValueCount());
84 private class CountPair{
88 public int get(boolean first){
97 public void set(boolean first, int count){
108 * This method is used for debugging.
110 @SuppressWarnings("unused")
111 private void printAll(Map<String, CountPair> pairs){
112 System.out.println("----------");
113 for(Map.Entry<String, CountPair> entry: pairs.entrySet()){
114 CountPair pair = entry.getValue();
115 System.out.printf("%40s: %5d, %5d%n", entry.getKey(), pair.get(true), pair.get(false));