OSDN Git Service

0127407ef6041e2b37004cf00affc6ab3cb9eaf2
[stigmata/stigmata.git] / src / main / java / jp / sourceforge / stigmata / birthmarks / comparators / EditDistanceBirthmarkComparator.java
1 package jp.sourceforge.stigmata.birthmarks.comparators;
2
3 import jp.sourceforge.stigmata.Birthmark;
4 import jp.sourceforge.stigmata.BirthmarkContext;
5 import jp.sourceforge.stigmata.BirthmarkElement;
6 import jp.sourceforge.stigmata.spi.BirthmarkSpi;
7
8 /**
9  * calculate similarities between two birthmarks by edit distance
10  * algorithm (levenshtein distance).
11  *
12  * @author Haruaki TAMADA
13  */
14 public class EditDistanceBirthmarkComparator extends AbstractBirthmarkComparator{
15     public EditDistanceBirthmarkComparator(BirthmarkSpi spi){
16         super(spi);
17     }
18
19     @Override
20     public double compare(Birthmark b1, Birthmark b2, BirthmarkContext context){
21         if(!b1.getType().equals(b2.getType())){
22             return Double.NaN;
23         }
24
25         BirthmarkElement[] element1 = b1.getElements();
26         BirthmarkElement[] element2 = b2.getElements();
27         int[][] distance = createDistanceMatrics(element1, element2);
28
29         int length = element1.length;
30         if(length < element2.length){
31             length = element2.length;
32         }
33         int d = distance[element1.length][element2.length];
34
35         if(element1.length == 0 && element2.length == 0){
36             return 1d;
37         }
38         return (double)(length - d) / length;
39     }
40
41     protected int[][] createDistanceMatrics(BirthmarkElement[] element1,
42                                             BirthmarkElement[] element2){
43         int[][] distance = new int[element1.length + 1][element2.length + 1];
44         for(int i = 0; i <= element1.length; i++) distance[i][0] = i;
45         for(int i = 0; i <= element2.length; i++) distance[0][i] = i;
46
47         for(int i = 1; i <= element1.length; i++){
48             for(int j = 1; j <= element2.length; j++){
49                 int cost = 1;
50                 if(element1[i - 1] == null){
51                     if(element2[j - 1] == null) cost = 0;
52                     else                        cost = 1;
53                 }
54                 else{
55                     if(element1[i - 1].equals(element2[j - 1])) cost = 0;
56                     else                                        cost = 1;
57                 }
58                 int insertion = distance[i - 1][j    ] + 1;
59                 int deletion  = distance[i    ][j - 1] + 1;
60                 int replace   = distance[i - 1][j - 1] + cost;
61
62                 if(insertion <= deletion && insertion <= replace) distance[i][j] = insertion;
63                 else if(deletion <= replace)                      distance[i][j] = deletion;
64                 else                                              distance[i][j] = replace;
65             }
66         }
67         return distance;
68     }
69 }