From cbb6887377647d1bdc835079b452c3b1203420d3 Mon Sep 17 00:00:00 2001 From: kazuma-t Date: Sun, 25 Mar 2007 16:45:12 +0000 Subject: [PATCH] spaces are anno[0] (SPACE_POS) --- ChangeLog | 4 ++++ lib/tokenizer.c | 27 ++++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3ba1c1b..42bed41 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2007-03-26 TAKAOKA Kazuma + + * lib/tokenizer.c (is_anno): spaces are anno[0] (SPACE_POS) + 2007-03-25 TAKAOKA Kazuma * lib/parse.c (set_anno): diff --git a/lib/tokenizer.c b/lib/tokenizer.c index 824548c..3517d72 100644 --- a/lib/tokenizer.c +++ b/lib/tokenizer.c @@ -27,7 +27,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: tokenizer.c,v 1.3 2007/03/25 16:25:46 kazuma-t Exp $ + * $Id: tokenizer.c,v 1.4 2007/03/25 16:45:12 kazuma-t Exp $ */ #include @@ -147,9 +147,9 @@ cha_tok_parse(chasen_tok_t *tok, unsigned char *str, char *type, int len, anno_info *anno = NULL; int no; - if (anno_no != NULL && (no = is_anno(tok, str, len)) < 0) { - anno = &(tok->anno[-no]); - *anno_no = -no; + if (anno_no != NULL && (no = is_anno(tok, str, len)) >= 0) { + anno = &(tok->anno[no]); + *anno_no = no; for (cursor = anno->len1; cursor < len; cursor += tok->mblen(str + cursor, len - cursor)) { @@ -163,7 +163,7 @@ cha_tok_parse(chasen_tok_t *tok, unsigned char *str, char *type, int len, for (cursor = head = 0; cursor < len; cursor += tok->mblen(str + cursor, len - cursor)) { if (anno_no != NULL && - is_anno(tok, str + cursor, len - cursor) < 0) { + is_anno(tok, str + cursor, len - cursor) >= 0) { return cursor; } else { state = tok->get_char_type(tok, str + cursor, len - cursor); @@ -410,21 +410,30 @@ en_char_type(chasen_tok_t *tok, unsigned char *str, int len) static int is_anno(chasen_tok_t *tok, unsigned char *string, int len) { - int i; + int i, j; anno_info *anno = tok->anno; - if (anno == NULL) { + /* spaces are anno[0] (SPACE_POS) */ + j = 0; + while (j < len && isspace(string[j])) + j++; + if (j) { + anno[0].len1 = j; return 0; } + + if (anno == NULL) { + return -1; + } for (i = 1; (anno[i].str1 != NULL); i++) { if (len < anno[i].len1) { continue; } if (!memcmp(string, anno[i].str1, anno[i].len1)) { - return -i; + return i; } } - return 0; + return -1; } static int -- 2.11.0