4 * Copyright (c) 2011 project bchan
6 * This software is provided 'as-is', without any express or implied
7 * warranty. In no event will the authors be held liable for any damages
8 * arising from the use of this software.
10 * Permission is granted to anyone to use this software for any purpose,
11 * including commercial applications, and to alter it and redistribute it
12 * freely, subject to the following restrictions:
14 * 1. The origin of this software must not be misrepresented; you must not
15 * claim that you wrote the original software. If you use this software
16 * in a product, an acknowledgment in the product documentation would be
17 * appreciated but is not required.
19 * 2. Altered source versions must be plainly marked as such, and must not be
20 * misrepresented as being the original software.
22 * 3. This notice may not be removed or altered from any source
27 #include "tadsearch.h"
34 /* http://ja.wikipedia.org/wiki/%E3%83%A9%E3%83%93%E3%83%B3-%E3%82%AB%E3%83%BC%E3%83%97%E6%96%87%E5%AD%97%E5%88%97%E6%A4%9C%E7%B4%A2%E3%82%A2%E3%83%AB%E3%82%B4%E3%83%AA%E3%82%BA%E3%83%A0 */
36 /* Rabin-Karp String Search Algorithm */
43 typedef struct tcloopbuf_t_ tcloopbuf_t;
45 LOCAL TC tcloopbuf_getbackchar(tcloopbuf_t *loopbuf, W n)
47 return loopbuf->buf[(loopbuf->pos - 1 - n) % loopbuf->len];
50 LOCAL Bool tcloopbuf_compairtail(tcloopbuf_t *loopbuf, tcstrbuffer_t *str)
55 for (i = 0; i < str->strlen; i++) {
56 ch_buf = loopbuf->buf[(loopbuf->pos - 1 - i) % loopbuf->len];
57 ch_str = str->buffer[(str->strlen - 1 - i)];
58 if (ch_buf != ch_str) {
65 LOCAL VOID tcloopbuf_putchar(tcloopbuf_t *loopbuf, TC ch)
67 loopbuf->buf[loopbuf->pos++ % loopbuf->len] = ch;
70 LOCAL tcloopbuf_t *tcloopbuf_new(W len)
74 loopbuf = malloc(sizeof(tcloopbuf_t) + sizeof(W)*len);
75 if (loopbuf == NULL) {
79 loopbuf->len = len + 1;
85 LOCAL VOID tcloopbuf_delete(tcloopbuf_t *loopbuf)
90 struct tadlib_rk_hash_t_ {
95 typedef struct tadlib_rk_hash_t_ tadlib_rk_hash_t;
97 struct tadlib_rk_hashset_t_ {
99 tadlib_rk_hash_t array[1];
101 typedef struct tadlib_rk_hashset_t_ tadlib_rk_hashset_t;
103 LOCAL W make_expectedhash(tcstrbuffer_t *buffer)
107 for (i = 0; i < buffer->strlen; i++) {
108 sum += buffer->buffer[i];
114 LOCAL Bool rk_hash_count(tadlib_rk_hash_t *hash, TC ch, W pos, tcloopbuf_t *loopbuf)
119 if (pos < hash->orig->strlen) {
120 hash->currenthash += ch;
124 if (pos == hash->orig->strlen) {
125 hash->currenthash += ch;
127 prev_ch = tcloopbuf_getbackchar(loopbuf, hash->orig->strlen);
128 hash->currenthash += ch - prev_ch;
131 if (hash->currenthash == hash->expectedhash) {
132 ok = tcloopbuf_compairtail(loopbuf, hash->orig);
141 LOCAL W rk_hashset_count(tadlib_rk_hashset_t *hashset, TC ch, W pos, tcloopbuf_t *loopbuf)
146 for (i = 0; i < hashset->setlen; i++) {
147 hit = rk_hash_count(hashset->array + i, ch, pos, loopbuf);
156 LOCAL tadlib_rk_hashset_t* rk_hashset_new(tcstrbuffer_t *strset, W setlen)
158 tadlib_rk_hashset_t *hashset;
161 hashset = malloc(sizeof(W) + (sizeof(tadlib_rk_hash_t) * setlen));
162 if (hashset == NULL) {
165 hashset->setlen = setlen;
166 for (i = 0; i < setlen; i++) {
167 hashset->array[i].orig = strset + i;
168 hashset->array[i].expectedhash = make_expectedhash(strset + i);
169 hashset->array[i].currenthash = 0;
175 LOCAL VOID rk_hashset_delete(tadlib_rk_hashset_t *hashset)
180 LOCAL W tadlib_rk_getmaxlenfromstrset(tcstrbuffer_t *strset, W setlen)
185 for (i = 0; i < setlen; i++) {
186 if (strset[i].strlen > max) {
187 max = strset[i].strlen;
194 EXPORT W tadlib_rabinkarpsearch(TC *str, W len, tcstrbuffer_t *strset, W setlen, tadlib_rk_result_t *result)
197 TADITERATOR_RESULT_T iter_result;
202 W found, max, ret = 0, n;
203 tcloopbuf_t *loopbuf;
204 tadlib_rk_hashset_t *hashset;
206 max = tadlib_rk_getmaxlenfromstrset(strset, setlen);
208 loopbuf = tcloopbuf_new(max);
209 if (loopbuf == NULL) {
212 hashset = rk_hashset_new(strset, setlen);
213 if (hashset == NULL) {
214 tcloopbuf_delete(loopbuf);
218 taditerator_initialize(&iter, str, len);
221 iter_result = taditerator_next(&iter, &pos, &ch, &seg, &size, &data0);
222 if (iter_result == TADITERATOR_RESULT_END) {
225 if (iter_result != TADITERATOR_RESULT_CHARCTOR) {
229 tcloopbuf_putchar(loopbuf, ch);
230 found = rk_hashset_count(hashset, ch, n, loopbuf);
237 taditerator_finalize(&iter);
239 rk_hashset_delete(hashset);
240 tcloopbuf_delete(loopbuf);