OSDN Git Service

dd887b78368f8c040fe8e899b3180a561fc9552d
[stigmata/stigmata.git] / src / main / java / jp / sourceforge / stigmata / birthmarks / comparators / EditDistanceBirthmarkComparator.java
1 package jp.sourceforge.stigmata.birthmarks.comparators;
2
3 /*
4  * $Id$
5  */
6
7 import jp.sourceforge.stigmata.Birthmark;
8 import jp.sourceforge.stigmata.BirthmarkContext;
9 import jp.sourceforge.stigmata.BirthmarkElement;
10 import jp.sourceforge.stigmata.spi.BirthmarkSpi;
11
12 /**
13  * calculate similarities between two birthmarks by edit distance
14  * algorithm (levenshtein distance).
15  *
16  * @author Haruaki TAMADA
17  * @version $Revision$ 
18  */
19 public class EditDistanceBirthmarkComparator extends AbstractBirthmarkComparator{
20     public EditDistanceBirthmarkComparator(BirthmarkSpi spi){
21         super(spi);
22     }
23
24     @Override
25     public double compare(Birthmark b1, Birthmark b2, BirthmarkContext context){
26         if(!b1.getType().equals(b2.getType())){
27             return Double.NaN;
28         }
29
30         BirthmarkElement[] element1 = b1.getElements();
31         BirthmarkElement[] element2 = b2.getElements();
32         int[][] distance = createDistanceMatrics(element1, element2);
33
34         int length = element1.length;
35         if(length < element2.length){
36             length = element2.length;
37         }
38         int d = distance[element1.length][element2.length];
39
40         if(element1.length == 0 && element2.length == 0){
41             return 1d;
42         }
43         return (double)(length - d) / length;
44     }
45
46     protected int[][] createDistanceMatrics(BirthmarkElement[] element1,
47                                             BirthmarkElement[] element2){
48         int[][] distance = new int[element1.length + 1][element2.length + 1];
49         for(int i = 0; i <= element1.length; i++) distance[i][0] = i;
50         for(int i = 0; i <= element2.length; i++) distance[0][i] = i;
51
52         for(int i = 1; i <= element1.length; i++){
53             for(int j = 1; j <= element2.length; j++){
54                 int cost = 1;
55                 if(element1[i - 1] == null){
56                     if(element2[j - 1] == null) cost = 0;
57                     else                        cost = 1;
58                 }
59                 else{
60                     if(element1[i - 1].equals(element2[j - 1])) cost = 0;
61                     else                                        cost = 1;
62                 }
63                 int insertion = distance[i - 1][j    ] + 1;
64                 int deletion  = distance[i    ][j - 1] + 1;
65                 int replace   = distance[i - 1][j - 1] + cost;
66
67                 if(insertion <= deletion && insertion <= replace) distance[i][j] = insertion;
68                 else if(deletion <= replace)                      distance[i][j] = deletion;
69                 else                                              distance[i][j] = replace;
70             }
71         }
72         return distance;
73     }
74 }