2 * Copyright (c) 2003 Nara Institute of Science and Technology
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name Nara Institute of Science and Technology may not be used to
15 * endorse or promote products derived from this software without
16 * specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Nara Institute
22 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * $Id: connect.c,v 1.1.1.1 2007/03/13 07:40:10 masayu-a Exp $
35 #define CHA_LINEMAX 8192
37 static int tbl_num; /* Ï¢ÀÜɽ¤Î¥µ¥¤¥º */
38 static int tbl_num_goi; /* Ï¢ÀÜɽ¤Î¸ì×äοô */
39 static int i_num; /* Ï¢ÀܹÔÎó¤Î¹Ô */
40 static int j_num; /* Ï¢ÀܹÔÎó¤ÎÎó */
41 static rensetu_pair_t *rensetu_tbl;
42 static connect_rule_t *connect_mtr;
48 cmp_pair(rensetu_pair_t * pair1, rensetu_pair_t * pair2)
53 * ¸«½Ð¤·¸ì surface form
55 if (pair1->goi == NULL && pair2->goi != NULL)
57 if (pair1->goi != NULL && pair2->goi == NULL)
63 if ((ret = pair1->hinsi - pair2->hinsi) != 0)
67 * ³èÍÑ·¿ Conjugation type
69 if ((ret = pair1->type - pair2->type) != 0)
73 * ¸«½Ð¤·¸ì surface form
75 if (pair1->goi != NULL && pair2->goi != NULL)
76 if ((ret = strcmp(pair1->goi, pair2->goi)) != 0)
80 * ³èÍÑ·Á Conjugation form
82 if ((ret = pair1->form - pair2->form) != 0)
85 return pair1->index - pair2->index;
89 cha_numtok(char *s, int *valp)
99 if (*s < '0' || *s > '9')
100 cha_exit_file(1, "illegal format");
101 for (*valp = 0; *s >= '0' && *s <= '9';
102 *valp = *valp * 10 + *s++ - '0');
113 cha_read_table(FILE * fp_out, int dir)
117 char buf[CHA_LINEMAX], *s;
118 int i, val, cell_num;
120 fp = cha_fopen_grammar(TABLE_FILE, "r", 1, dir, &filepath);
123 fprintf(fp_out, "parsing %s\n", filepath);
125 Cha_lineno_error = ++Cha_lineno;
126 fscanf(fp, "%d\n", &cell_num);
128 (rensetu_pair_t *) cha_malloc(sizeof(rensetu_pair_t) * cell_num);
131 for (i = 0; i < cell_num; i++) {
132 Cha_lineno_error = ++Cha_lineno;
133 if (fgets(buf, sizeof(buf), fp) == NULL)
134 cha_exit_file(1, "illegal format");
135 Cha_lineno_error = ++Cha_lineno;
136 if (fgets(s = buf, sizeof(buf), fp) == NULL)
137 cha_exit_file(1, "illegal format");
138 s = cha_numtok(s, &val);
139 rensetu_tbl[i].i_pos = val;
140 s = cha_numtok(s, &val);
141 rensetu_tbl[i].j_pos = val;
142 if (!tbl_num && val < 0)
144 buf[strlen(buf) - 1] = '\0';
145 if (*s >= '0' && *s <= '9') {
146 s = cha_numtok(s, &val);
147 rensetu_tbl[i].index = i;
148 rensetu_tbl[i].hinsi = val;
149 s = cha_numtok(s, &val);
150 rensetu_tbl[i].type = val;
151 s = cha_numtok(s, &val);
152 rensetu_tbl[i].form = val;
154 rensetu_tbl[i].goi = NULL;
156 rensetu_tbl[i].goi = cha_strdup(s);
169 find_table(lexicon_t * mrph, rensetu_pair_t * pair)
176 if ((ret = mrph->pos - pair->hinsi) != 0)
179 * ³èÍÑ·¿ Conjugation type
181 if ((ret = mrph->inf_type - pair->type) != 0)
185 * ¸«½Ð¤·¸ì surface form
187 if (pair->goi && (ret = strcmp(mrph->headword, pair->goi)))
191 * ³èÍѸì¤Ê¤é¤Ð¡¢³èÍÑ·Á¤Î1È֤ȥޥåÁ
194 return 1 - pair->form;
198 /* if an error occurs, this function returns 0, else returns 1 */
200 cha_check_table(lexicon_t * mrph)
204 if (rensetu_tbl[0].hinsi == 0)
205 qsort(rensetu_tbl, tbl_num, sizeof(rensetu_pair_t),
206 (int (*)()) cmp_pair);
208 ret = (rensetu_pair_t *)
209 bsearch(mrph, rensetu_tbl, tbl_num_goi,
210 sizeof(rensetu_pair_t), (int (*)()) find_table);
212 mrph->con_tbl = ret->index;
216 ret = (rensetu_pair_t *)
217 bsearch(mrph, rensetu_tbl + tbl_num_goi, tbl_num - tbl_num_goi,
218 sizeof(rensetu_pair_t), (int (*)()) find_table);
220 mrph->con_tbl = ret->index;
221 return 1; /* if no error, return 1 */
227 cha_exit_file(-1, "no morpheme in connection table\n");
232 cha_check_table_for_undef(int hinsi)
236 for (i = 0; i < tbl_num; i++)
237 if (rensetu_tbl[i].hinsi == hinsi)
238 if (!rensetu_tbl[i].goi)
248 cha_read_matrix(FILE * fp_out)
252 int i, j, cost, next;
253 char buf[CHA_LINEMAX], *s;
255 fp = cha_fopen_grammar(MATRIX_FILE, "r", 1, 1, &filepath);
258 fprintf(fp_out, "parsing %s", filepath);
260 Cha_lineno_error = ++Cha_lineno;
261 fscanf(fp, "%d %d\n", &i_num, &j_num);
262 connect_mtr = (connect_rule_t *)
263 cha_malloc(sizeof(connect_rule_t) * i_num * j_num);
266 for (i = 0; i < i_num; i++) {
267 Cha_lineno_error = ++Cha_lineno;
268 if (fgets(s = buf, sizeof(buf), fp) == NULL)
269 cha_exit_file(1, "illegal format");
270 for (j = 0; j < j_num;) {
273 s = cha_numtok(s + 1, &nval);
276 s = cha_numtok(s, &next);
278 cha_exit_file(1, "illegal format");
279 s = cha_numtok(s, &cost);
281 s = cha_numtok(s + 1, &nval);
286 connect_mtr[i * j_num + j].next = next;
287 connect_mtr[i * j_num + j].cost = cost;
296 cha_check_automaton(int state, int con, int undef_con_cost, int *costp)
300 printf("[%d:%d:%d]\n", state, con, rensetu_tbl[con].j_pos);
303 cr = &connect_mtr[state * j_num + rensetu_tbl[con].j_pos];
306 *costp = undef_con_cost;
311 printf("[state:%d,con:%d,newcon:%d] ", state, con, cr->next + con);
314 return rensetu_tbl[cr->next + con].i_pos;