3 * 1990/12/06/Thu Yutaka MYOKI(Nagao Lab., KUEE)
5 * $Id: chadic.h,v 1.1.1.1 2007/03/13 07:40:10 masayu-a Exp $
20 #include <sys/types.h>
21 #endif /* HAVE_UNISTD_H */
24 #define FALSE ((int)0)
31 #define PATH_DELIMITER '\\'
32 #define PATHTYPE_MSDOS
34 #define PATH_DELIMITER '/'
37 #define MIDASI_LEN 129
44 #define VCHA_CONNECT_FILE "connect.cha"
45 #define VCHA_CONNTMP_FILE "_connect.cha"
46 #define VCHA_GRAMMAR_FILE "grammar.cha"
47 #define VCHA_CFORM_FILE "cforms.cha"
48 #define VCHA_CTYPE_FILE "ctypes.cha"
49 #define VCHA_TABLE_FILE "table.cha"
50 #define VCHA_MATRIX_FILE "matrix.cha"
51 #define CHA_CONNECT_FILE "chasen.connect.c"
52 #define CHA_CONNTMP_FILE "chasen.connect"
53 #define CHA_GRAMMAR_FILE "chasen.grammar"
54 #define CHA_CFORM_FILE "chasen.cforms"
55 #define CHA_CTYPE_FILE "chasen.ctypes"
56 #define CHA_TABLE_FILE "chasen.table"
57 #define CHA_MATRIX_FILE "chasen.matrix"
58 #define CONNECT_FILE VCHA_CONNECT_FILE
59 #define CONNTMP_FILE VCHA_CONNTMP_FILE
60 #define GRAMMAR_FILE VCHA_GRAMMAR_FILE
61 #define CFORM_FILE VCHA_CFORM_FILE
62 #define CTYPE_FILE VCHA_CTYPE_FILE
63 #define TABLE_FILE VCHA_TABLE_FILE
64 #define MATRIX_FILE VCHA_MATRIX_FILE
68 #define NIL ((chasen_cell_t *)(NULL))
70 #define s_tag(cell) (((chasen_cell_t *)(cell))->tag)
71 #define consp(x) (!nullp(x) && (s_tag(x) == CONS))
72 #define atomp(x) (!nullp(x) && (s_tag(x) == ATOM))
73 #define nullp(cell) ((cell) == NIL)
74 #define car_val(cell) (((chasen_cell_t *)(cell))->value.cha_cons.cha_car)
75 #define cdr_val(cell) (((chasen_cell_t *)(cell))->value.cha_cons.cha_cdr)
76 #define s_atom_val(cell) (((chasen_cell_t *)(cell))->value.atom)
78 /* added by T.Utsuro for weight of rensetu matrix */
79 #define DEFAULT_C_WEIGHT 10
81 /* added by S.Kurohashi for mrph weight default values */
82 #define MRPH_DEFAULT_WEIGHT 1
89 typedef struct _connect_rule_t {
94 /* <cha_car> Éô¤È <cha_cdr> Éô¤Ø¤Î¥Ý¥¤¥ó¥¿¤Çɽ¸½¤µ¤ì¤¿¥»¥ë */
95 typedef struct _bin_t {
96 void *cha_car; /* address of <cha_car> */
97 void *cha_cdr; /* address of <cha_cdr> */
100 /* <BIN> ¤Þ¤¿¤Ï ʸ»úÎó ¤òɽ¸½¤¹¤ë´°Á´¤Ê¹½Â¤ */
101 typedef struct _cell {
102 int tag; /* tag of <cell> 0:cha_cons 1:atom */
109 /* this structure is used only in mkchadic */
111 typedef struct _lexicon_t {
112 char headword[MIDASI_LEN]; /* surface form */
114 char reading[MIDASI_LEN * 2]; /* Japanese reading *//* XXX ad hoc */
116 char pron[MIDASI_LEN * 2]; /* Japanese pronunciation *//* XXX ad hoc */
118 char *base; /* base form */
119 unsigned short pos; /* POS number */
120 unsigned char inf_type; /* Conjugation type number */
121 unsigned char inf_form; /* Conjugation form number */
123 char *info; /* semantic information */
125 short con_tbl; /* connection table number */
126 unsigned short weight; /* cost for morpheme */
130 /* POS information -- see also the comments (the end of this file) */
131 typedef struct _hinsi_t {
132 short *path; /* the path to top node */
133 short *daughter; /* the daughter node */
134 char *name; /* the name of POS (at the level) */
135 short composit; /* for the COMPOSIT_POS */
136 char depth; /* the depth from top node */
137 char kt; /* have conjugation or not */
141 /* ³èÍÑ·¿ conjugation type */
142 typedef struct _ktype {
143 char *name; /* CTYPE name */
144 short basic; /* base form */
147 /* ³èÍÑ·Á conjugation form */
148 typedef struct _kform {
149 char *name; /* CFORM name */
150 char *gobi; /* suffix of surface form */
151 int gobi_len; /* the length of suffix */
152 char *ygobi; /* suffix of Japanese reading */
153 char *pgobi; /* suffix of Japanese pronunciation */
156 /* Ï¢ÀÜɽ connection matrix */
157 typedef struct _rensetu_pair {
159 short i_pos; /* the POS index in the current state (= preceding morpheme) */
160 short j_pos; /* the POS index in the input (= current morpheme) */
162 unsigned short hinsi; /* POS */
163 unsigned char type; /* CTYPE */
164 unsigned char form; /* CFORM */
165 char *goi; /* Lexicalized POS */
172 #define HINSI_MAX 4096
173 extern hinsi_t Cha_hinsi[HINSI_MAX]; /* see also the comments (the end of this file) */
174 extern ktype_t Cha_type[TYPE_NUM];
175 extern kform_t Cha_form[TYPE_NUM][FORM_NUM];
176 extern int Cha_lineno, Cha_lineno_error;
179 extern int Cha_optind;
180 extern char *Cha_optarg;
182 extern int Cha_errno;
183 extern FILE *Cha_stderr;
190 char *cha_convert_escape(char*, int);
191 void cha_set_progpath(char*);
192 void cha_set_rcpath(char*);
193 char *cha_get_rcpath(void);
194 char *cha_get_grammar_dir(void);
195 FILE *cha_fopen(char*, char*, int);
196 FILE *cha_fopen_grammar(char*, char*, int, int, char**);
197 void *cha_malloc(size_t);
198 void *cha_realloc(void*, size_t);
199 #define cha_free(ptr) (free(ptr))
200 char *cha_strdup(char*);
202 void cha_exit(int, char*, ...);
203 void cha_exit_file(int, char*, ...);
204 void cha_perror(char*);
205 void cha_exit_perror(char*);
206 FILE *cha_fopen_rcfile(void);
207 void cha_read_grammar_dir(void);
208 char *cha_read_registry(char*, char*, char*);
211 void cha_set_skip_char(int);
212 int cha_s_feof(FILE*);
213 void cha_s_free(chasen_cell_t*);
214 chasen_cell_t *cha_tmp_atom(char*);
215 chasen_cell_t *cha_cons(void*, void*);
216 chasen_cell_t *cha_car(chasen_cell_t*);
217 chasen_cell_t *cha_cdr(chasen_cell_t*);
218 char *cha_s_atom(chasen_cell_t*);
219 int cha_equal(void*, void*);
220 int cha_s_length(chasen_cell_t*);
221 chasen_cell_t *cha_s_read(FILE*);
222 chasen_cell_t *cha_assoc(chasen_cell_t*, chasen_cell_t*);
223 char *cha_s_tostr(chasen_cell_t*);
224 chasen_cell_t *cha_s_print(FILE*, chasen_cell_t*);
227 void cha_read_class(FILE*);
228 int cha_match_nhinsi(chasen_cell_t*, int);
229 void cha_read_grammar(FILE*, int, int);
232 void cha_read_katuyou(FILE*, int);
235 void cha_read_table(FILE*, int);
236 int cha_check_table(lexicon_t*); /* 970301 tatuo: void -> int for ´è·ò²½ */
237 int cha_check_table_for_undef(int);
238 void cha_read_matrix(FILE*);
239 int cha_check_automaton(int, int, int, int*);
242 int cha_get_nhinsi_str_id(char**);
243 int cha_get_nhinsi_id(chasen_cell_t*);
244 int cha_get_type_id(char*);
245 int cha_get_form_id(char*, int);
248 int cha_getopt(char**, char*, FILE*);
249 int cha_getopt_chasen(char**, FILE*);
251 #endif /* __CHADIC_H__ */
255 the data format of the structure hinsi_t
256 the POS informations are treated in global valuable Cha_hinsi[n]
258 ============= ===================
259 "grammar.cha" "real POS tag list"
260 ============= ===================
262 (B1) ; Cha_hinsi[2] A1-B1 ; Cha_hinsi[2]
264 (C1) ; Cha_hinsi[4] A1-B2-C1 ; Cha_hinsi[4]
266 (D1) ; Cha_hinsi[6] A1-B2-C2-D1 ; Cha_hinsi[6]
267 (D2) ; Cha_hinsi[7] A1-B2-C2-D2 ; Cha_hinsi[7]
268 (D3)) ; Cha_hinsi[8] A1-B2-C2-D3 ; Cha_hinsi[8]
269 (C3) ; Cha_hinsi[9] A1-B2-C3 ; Cha_hinsi[9]
271 (D4) ; Cha_hinsi[11] A1-B2-C4-D4 ; Cha_hinsi[11]
272 (D5)))) ; Cha_hinsi[12] A1-B2-C4-D5 ; Cha_hinsi[12]
274 =========================================
275 *hinsi_t Cha_hinsi[HINSI] for the example
276 =========================================
277 n (idx) = 1 2 3 4 5 6 7 8 9 10 11 12
278 Cha_hinsi[n].name = A1 B1 B2 C1 C2 D1 D2 D3 C3 C4 D4 D5
279 Cha_hinsi[n].depth = 1 2 2 3 3 4 4 4 3 3 4 4
280 *Cha_hinsi[n].daughter = 2 0 4 0 6 0 0 0 0 11 0 0
281 *Cha_hinsi[n].path = 1 1 1 1 1 1 1 1 1 1 1 1