4 #include "recognize_stroke.h"
5 #include "recognize_kanji.h"
8 static void print_stroke(GList *s)
10 g_printf("stroke len = %d\n", g_list_length(s));
11 for(;s; s = g_list_next(s))
13 gint16 x = ((GdkPoint *)s->data)->x;
14 gint16 y = ((GdkPoint *)s->data)->y;
15 g_printf("%d %d\n", x, y);
20 typedef struct _kanji_result
26 gint LevenshteinDistance(gint l1, gchar *s1, gint l2, gchar *s2)
28 gchar *d = calloc((l1+1)*(l2+1)+1, sizeof(gchar));
31 for(i = 0; i <= l1; i++)
33 for(j = 0; j <= l2; j++)
36 for(j = 1; j <= l2; j++)
38 for(i = 1; i <= l1; i++)
40 if(s1[i-1] == s2[j-1])
41 d[i*(l2 + 1) + j] = d[(i-1)*(l2 + 1) + j - 1];
44 gint a = d[(i-1)*(l2 + 1) + j] + 1;
45 gint b = d[i*(l2 + 1) + j - 1] + 1;
46 gint c = d[(i-1)*(l2 + 1) + j - 1] + 1;
47 d[i*(l2 + 1) + j] = (a > b) ? ((b > c) ? c : b) : ((a > c) ? c : a);
51 gint ret = d[(l1+1)*(l2+1) - 1];
56 static kanji_result* rate_next_kanji(gchar **sdata, gunichar2 *entry)
58 kanji_result *res = calloc(1, sizeof(kanji_result));
60 gunichar2 *bakptr = entry;
61 gint i, j, l, l1 = 0, l2 = 0;
62 for(i = 0; i < g_strv_length(sdata); i++)
63 l1 += strlen(sdata[i]);
64 gchar *s1 = calloc(l1, sizeof(gchar));
65 for(i = 0, j = 0; i < g_strv_length(sdata); i++)
68 g_memmove(&(s1[j]), sdata[i], l);
71 for(l2 = 0; g_ascii_isalpha((gchar)*++entry); l2++);
72 gchar *s2 = calloc(l2, sizeof(gchar));
74 for(i = 0; i < l2; s2[i++] = (gchar)(*++entry));
75 res->dist += LevenshteinDistance(l1, s1, l2, s2);
81 static gint kanji_results_compare(gpointer *ptr1, gpointer *ptr2)
83 kanji_result *kr1 = (kanji_result*)*ptr1, *kr2 = (kanji_result*)*ptr2;
84 if(kr1->dist > kr2->dist)
86 if(kr1->dist < kr2->dist)
91 static gunichar2* find_next_entry(gchar *allkanji, gunichar2 *entry, gint allkanjilen, gunichar2 key)
93 if(allkanji == (gchar*)entry && key == 'A')
97 while((gchar*)entry - allkanji < allkanjilen)
113 static gunichar2* pick_kanji(gchar **sdata, gchar *allkanji, gint allkanjilen)
115 const gint MAX_DISTANCE = 3;
116 gint datalen = g_strv_length(sdata), i;
117 gunichar2 key = 'A' + datalen - 1;
118 gunichar2 *entry = (gunichar2*)allkanji;
122 entry = find_next_entry(allkanji, entry, allkanjilen, key);
125 GPtrArray *arr = g_ptr_array_new();
126 g_ptr_array_set_free_func(arr, g_free);
129 kanji_result *res = rate_next_kanji(sdata, entry);
130 g_ptr_array_add(arr, res);
131 g_ptr_array_sort(arr, (GCompareFunc)kanji_results_compare);
132 for(i = arr->len - 1; i >= 0; i--)
134 kanji_result *res = g_ptr_array_index(arr, i);
135 if(res->dist > MAX_DISTANCE)
136 g_ptr_array_remove_index(arr, i);
140 entry = find_next_entry(allkanji, entry, allkanjilen, key);
144 gunichar2 *ret = calloc(arr->len + 1, sizeof(gunichar2));
145 for(i = 0; i < arr->len; i++)
147 kanji_result *res = (kanji_result*)g_ptr_array_index(arr, i);
150 g_ptr_array_free(arr, TRUE);
154 gunichar2* recognize_kanji(GList *strokes)
156 static gchar **sdata = NULL;
157 static gint sdata_len = 0;
158 gint strokes_len = g_list_length(strokes);
162 for(i = 0; i < sdata_len; g_free(sdata[i++]));
168 if(strokes_len == sdata_len - 1)
169 g_free(sdata[sdata_len - 1]);
170 sdata = g_realloc(sdata, (strokes_len + 1)*sizeof(gchar*));
171 if(strokes_len == sdata_len + 1)
172 sdata[strokes_len - 1] = recognize_stroke(g_list_first(g_list_last(strokes)->data));
173 sdata[strokes_len] = 0;
174 sdata_len = strokes_len;
176 gchar *dir = g_strdup(KP_LIBDIR);
180 gchar *fname = g_build_filename(dir, "strokes.txt", NULL);
181 GMappedFile *file = g_mapped_file_new(fname, FALSE, NULL);
186 g_printf("failed to open strokes.txt\n");
189 gint allkanjilen = g_mapped_file_get_length(file);
190 gchar *allkanji = g_mapped_file_get_contents(file);
191 gunichar2 *result = pick_kanji(sdata, allkanji, allkanjilen);
192 g_mapped_file_unref(file);