2 * Copyright (c) 2003 Nara Institute of Science and Technology
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name Nara Institute of Science and Technology may not be used to
15 * endorse or promote products derived from this software without
16 * specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Nara Institute
22 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * $Id: dumpdic.c,v 1.1.1.1 2007/03/13 07:40:10 masayu-a Exp $
40 #define NO_COMPOUND LONG_MAX
43 dump_dat(lexicon_t *lex, FILE *datfile, long compound)
48 index = ftell(datfile);
49 dat.stem_len = lex->stem_len;
50 dat.reading_len = lex->reading_len;
51 dat.pron_len = lex->pron_len;
52 dat.base_len = strlen(lex->base);
53 dat.info_len = strlen(lex->info);
54 dat.compound = compound;
55 if (fwrite(&dat, sizeof(dat), 1, datfile) != 1)
56 cha_exit_perror("datfile");
58 if (fputs(lex->reading, datfile) < 0 || fputc('\0', datfile) < 0 ||
59 fputs(lex->pron, datfile) < 0 || fputc('\0', datfile) < 0 ||
60 fputs(lex->base, datfile) < 0 || fputc('\0', datfile) < 0 ||
61 fputs(lex->info, datfile) < 0 || fputc('\0', datfile) < 0)
62 cha_exit_perror("datfile");
64 if (ftell(datfile) % 2)
65 if (fputc('\0', datfile) < 0)
66 cha_exit_perror("datfile");
69 cha_exit_perror("datfile");
75 dump_lex(da_lex_t *lex, FILE *output)
79 index = ftell(output);
80 if (fwrite(lex, sizeof(da_lex_t), 1, output) != 1)
81 cha_exit_perror("lexfile");
87 assemble_lex(da_lex_t *lex, lexicon_t *entry, long dat_index)
89 lex->posid = entry->pos;
90 lex->inf_type = entry->inf_type;
91 lex->inf_form = entry->inf_form;
92 lex->weight = entry->weight;
93 lex->con_tbl = entry->con_tbl;
94 lex->dat_index = dat_index;
100 dump_compound(lexicon_t *entries, FILE *lexfile, FILE *datfile)
104 long compound_index = ftell(lexfile);
107 for (i = 1; entries[i].pos; i++) {
108 short hw_len = strlen(entries[i].headword);
113 dat_index = dump_dat(entries + i, datfile, NO_COMPOUND);
114 assemble_lex(&lex, entries + i, dat_index);
115 fwrite(&hw_len, sizeof(short), 1, lexfile);
116 marker = ftell(lexfile);
117 if (fwrite(&has_next, sizeof(short), 1, lexfile) != 1)
118 cha_exit_perror("lexfile");
119 dump_lex(&lex, lexfile);
122 fseek(lexfile, marker, SEEK_SET);
123 if (fwrite(&has_next, sizeof(short), 1, lexfile) != 1)
124 cha_exit_perror("lexfile");
125 fseek(lexfile, 0L, SEEK_END);
127 return compound_index;
131 dump_dic(lexicon_t *entries, FILE *output[], da_build_t *builder)
133 FILE *datfile = output[0];
134 FILE *lexfile = output[1];
135 FILE *tmpfile = output[2];
136 long dat_index, lex_index;
138 long compound = NO_COMPOUND;
141 compound = dump_compound(entries, lexfile, datfile);
143 dat_index = dump_dat(entries, datfile, compound);
145 assemble_lex(&lex, entries, dat_index);
146 if (entries[0].inf_type == 0 || entries[0].inf_form > 0) {
147 lex_index = dump_lex(&lex, tmpfile);
148 da_build_add(builder, entries[0].headword, lex_index);
150 int stem_len = strlen(entries[0].headword);
151 unsigned short con_tbl = lex.con_tbl;
154 for (i = 1; Cha_form[lex.inf_type][i].name; i++) {
156 lex.con_tbl = con_tbl + i - 1;
157 strcpy(entries[0].headword + stem_len,
158 Cha_form[lex.inf_type][i].gobi);
159 if (!entries[0].headword[0])
161 lex_index = dump_lex(&lex, tmpfile);
162 da_build_add(builder, entries[0].headword, lex_index);