2 * Copyright (c) 2003 Nara Institute of Science and Technology
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name Nara Institute of Science and Technology may not be used to
15 * endorse or promote products derived from this software without
16 * specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Nara Institute
22 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * $Id: print.c,v 1.3 2007/03/25 11:02:53 kazuma-t Exp $
40 #include "tokenizer.h"
42 #define nth_mrph(n) ((mrph_t*)cha_block_get_item(Cha_mrph_block,(n)))
44 #define CHA_OUTPUT_SIZE (1024*16)
45 #define NO_COMPOUND LONG_MAX
47 static int path_buffer[CHA_INPUT_SIZE];
48 static int pos_end = 0;
50 static void (*cha_putc) (), (*cha_puts) (), (*cha_printf) (), (*cha_write) ();
60 dump_mrph_data(mrph_data_t *data)
62 fprintf(stdout, "hw:\t`%s`\n", data->mrph->headword);
63 fprintf(stdout, "hw_len:\t%d\n", data->mrph->headword_len);
64 fprintf(stdout, "undef:\t%d\n", data->mrph->is_undef);
65 fprintf(stdout, "pos:\t%d\n", data->mrph->posid);
66 fprintf(stdout, "itype:\t%d\n", data->mrph->inf_type);
67 fprintf(stdout, "iform:\t%d\n", data->mrph->inf_form);
68 fprintf(stdout, "contbl:\t%d\n", data->mrph->con_tbl);
69 fprintf(stdout, "weight:\t%d\n", data->mrph->weight);
70 fprintf(stdout, "stem_len:\t%d\n", data->stem_len);
71 fprintf(stdout, "reading:\t`%s`\n", data->reading);
72 fprintf(stdout, "pron:\t`%s`\n", data->pron);
73 fprintf(stdout, "base:\t`%s`\n", data->base);
74 fprintf(stdout, "info:\t`%s`\n", data->base);
75 fprintf(stdout, "cmp:\t%ld\n", data->compound);
76 fprintf(stdout, "-----------\n");
81 get_mrph_data(mrph_t *mrph, mrph_data_t *data)
85 if (mrph->darts == NULL) {
86 data->reading = data->pron = data->base
88 data->stem_len = data->reading_len = data->pron_len = -1;
89 data->compound = NO_COMPOUND;
94 p = da_get_dat_base(mrph->darts);
95 memcpy(&d, p + mrph->dat_index, sizeof(da_dat_t));
96 data->compound = d.compound;
97 data->stem_len = d.stem_len;
98 p += mrph->dat_index + sizeof(da_dat_t);
100 p += ((d.reading_len >= 0) ? d.reading_len : 0) + 1;
102 p += ((d.pron_len >= 0) ? d.pron_len : 0) + 1;
106 data->reading_len = d.reading_len;
107 data->pron_len = d.pron_len;
114 * cha_sputc, cha_sputs, cha_sprintf
115 * - output fuctions to string
117 * NOTE: `output' is a dummy argument for compatibility with cha_clputc, etc.
121 static char *cha_output;
122 static int cha_output_idx;
123 static int cha_output_nblock;
126 cha_sputc(int c, char *output /* dummy */ )
128 if (cha_output_idx + 1 >= CHA_OUTPUT_SIZE * cha_output_nblock
131 realloc(cha_output, CHA_OUTPUT_SIZE * ++cha_output_nblock);
135 cha_output[cha_output_idx++] = c;
136 cha_output[cha_output_idx] = '\0';
141 cha_sputs(char *s, char *output)
145 if (cha_output_idx + len >= CHA_OUTPUT_SIZE * cha_output_nblock
148 realloc(cha_output, CHA_OUTPUT_SIZE * ++cha_output_nblock);
152 strcpy(cha_output + cha_output_idx, s);
153 cha_output_idx += len;
158 cha_sprintf(char *output, char *format, ...)
160 char tmpbuf[CHA_INPUT_SIZE];
163 va_start(ap, format);
164 vsprintf(tmpbuf, format, ap);
166 cha_sputs(tmpbuf, output);
170 cha_swrite(void *ptr, size_t size, size_t nmemb, char *output)
172 memcpy(output, ptr, size * nmemb);
176 cha_set_output(FILE * output)
181 * For system having no prototype declarations for the following
182 * functions such as SunOS 4.1.4.
184 extern int fputc(int, FILE *);
185 extern int fputs(const char *, FILE *);
186 extern int fprintf(FILE *, const char *, ...);
188 if (output == NULL) {
192 cha_putc = (void (*)) cha_sputc;
193 cha_puts = (void (*)) cha_sputs;
194 cha_printf = (void (*)) cha_sprintf;
195 cha_write = (void (*)) cha_swrite;
197 * initialize output buffer
199 if (cha_output_nblock > 1) {
201 cha_output_nblock = 0;
203 if (cha_output_nblock == 0)
204 cha_output = malloc(CHA_OUTPUT_SIZE * ++cha_output_nblock);
206 cha_output[0] = '\0';
211 cha_output = (char *) output;
212 cha_putc = (void (*)) fputc;
213 cha_puts = (void (*)) fputs;
214 cha_printf = (void (*)) fprintf;
215 cha_write = (void (*)) fwrite;
220 * returns cha_output for chasen_[fs]arse_tostr()
229 * cha_printf_mrph - print morpheme using format string
231 * about the format of English please see `manual.tex'
236 * %y ÆɤߤÎÂè°ì¸õÊä(³èÍÑ·Á)
237 * %Y ÆɤßÂè°ì¸õÊä(´ðËÜ·Á)
238 * %y0 ÆɤßÁ´ÂÎ(³èÍÑ·Á)
239 * %Y0 ÆɤßÁ´ÂÎ(´ðËÜ·Á)
240 * %a ȯ²»¤ÎÂè°ì¸õÊä(³èÍÑ·Á)
241 * %A ȯ²»¤ÎÂè°ì¸õÊä(´ðËÜ·Á)
242 * %a0 ȯ²»Á´ÂÎ(³èÍÑ·Á)
243 * %A0 ȯ²»Á´ÂÎ(´ðËÜ·Á)
244 * %rabc ¥ë¥Ó¤Ä¤¤Î¸«½Ð¤·("a¸«½Ð¤·bÆɤßc" ¤Èɽ¼¨)
246 * %Ic ÉղþðÊó(¶õʸ»úÎó¤«"NIL"¤Ê¤éʸ»úc)
247 * %Pc ³Æ³¬ÁؤÎÉÊ»ì¤òʸ»úc¤Ç¶èÀڤä¿Ê¸»úÎó(vgram¤Î¤ß)
248 * %Pnc 1¡Án(n:1¡Á9)³¬ÁØÌܤޤǤÎÉÊ»ì¤òʸ»úc¤Ç¶èÀڤä¿Ê¸»úÎó(vgram¤Î¤ß)
250 * %H ÉÊ»ì(vgram¤Î¾ì¹ç¤Ï1³¬ÁØÌÜ)
251 * %Hn n(n:1¡Á9)³¬ÁØÌܤÎÉÊ»ì(¤Ê¤±¤ì¤ÐºÇ¤â¿¼¤¤³¬ÁØ)(vgram¤Î¤ß)
252 * %b ÉÊ»ìºÙʬÎà¤ÎÈÖ¹æ(vgram¤Î¾ì¹ç¤Ï0)
253 * %BB ÉÊ»ìºÙʬÎà(¤Ê¤±¤ì¤ÐÉÊ»ì)
254 * %Bc ÉÊ»ìºÙʬÎà(¤Ê¤±¤ì¤Ðʸ»úc)
256 * %Tc ³èÍÑ·¿(¤Ê¤±¤ì¤Ðʸ»úc)
258 * %Fc ³èÍÑ·Á(¤Ê¤±¤ì¤Ðʸ»úc)
261 * %pb ºÇŬ¥Ñ¥¹¤Ç¤¢¤ì¤Ð "*", ¤½¤¦¤Ç¤Ê¤±¤ì¤Ð " "
263 * %ps ¥Ñ¥¹¤Î·ÁÂÖÁǤγ«»Ï°ÌÃÖ
264 * %pe ¥Ñ¥¹¤Î·ÁÂÖÁǤνªÎ»°ÌÃÖ+1
266 * %ppiC Á°¤ËÀܳ¤¹¤ë¥Ñ¥¹¤ÎÈÖ¹æ¤òʸ»úC¤Ç¶èÀÚ¤êÎóµó
267 * %ppcC Á°¤ËÀܳ¤¹¤ë¥Ñ¥¹¤Î¥³¥¹¥È¤òʸ»úC¤Ç¶èÀÚ¤êÎóµó
268 * %rABC,%Ic,%Bc,%Tc,%Fc ¤Ë¤Ä¤¤¤Æ¤Ï A,B,C,c ¤¬¶õÇòʸ»ú¤Î»þ¤Ï²¿¤â
271 * %?B/STR1/STR2/ ÉÊ»ìºÙʬÎब¤¢¤ì¤ÐSTR1¡¢¤Ê¤±¤ì¤ÐSTR2
272 * %?I/STR1/STR2/ ÉղþðÊó¤¬NIL¤Ç¤â""¤Ç¤â¤Ê¤±¤ì¤ÐSTR1¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤ÐSTR2
273 * %?T/STR1/STR2/ ³èÍѤ¬¤¢¤ì¤ÐSTR1¡¢¤Ê¤±¤ì¤ÐSTR2
274 * %?F/STR1/STR2/ ³èÍѤ¬¤¢¤ì¤ÐSTR1¡¢¤Ê¤±¤ì¤ÐSTR2
275 * %?U/STR1/STR2/ ̤ÄêµÁ¸ì¤Ê¤éSTR1¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤ÐSTR2
276 * %U/STR/ ̤ÄêµÁ¸ì¤Ê¤é"̤ÄêµÁ¸ì"(vgram¤Î¾ì¹ç¤Ï"̤Ãθì")¡¢
277 * ¤½¤¦¤Ç¤Ê¤±¤ì¤ÐSTR(%?U/̤Ãθì/STR/ ¤ÈƱ¤¸)
278 * `/'¤Ë¤ÏǤ°Õ¤Îʸ»ú¤¬»È¤¨¤ë¡£
279 * ¤Þ¤¿¡¢³ç¸Ì¡Ö(){}[]<>¡×¤ò»È¤Ã¤¿°Ê²¼¤Î¤è¤¦¤Ê·Á¼°¤¬»È¤¨¤ë¡£
280 * %?B(STR1)(STR2) %?B{STR1}/STR2/ %?U[STR]
283 * . ¥Õ¥£¡¼¥ë¥ÉÉý¤ò»ØÄê
284 * - ¥Õ¥£¡¼¥ë¥ÉÉý¤ò»ØÄê
285 * 1-9 ¥Õ¥£¡¼¥ë¥ÉÉý¤ò»ØÄê
293 * "%m %y %M %h %b %t %f\n" same as -c option
294 * "%m %U(%y) %M %H %h %B* %b %T* %t %F* %f\n" same as -e option
297 check_con_cost(path_t * path, int con_tbl)
301 cha_check_automaton(path->state,
302 con_tbl, Cha_con_cost_undef, &con_cost);
308 comm_prefix_len(char *s1, char *s2)
314 len = cha_tok_mblen(Cha_tokenizer, s1, 4 /* XXX */);
315 for (i = 0; i < len; i++)
325 set_ruby(char *dest, char *midasi, char *yomi,
326 int par1, int par2, int par3)
335 int plen, maxplen = 0;
337 if (yomi[0] == '\0') {
343 for (; *y; y += cha_tok_mblen(Cha_tokenizer, y, 4 /* XXX */)) {
346 if ((plen = comm_prefix_len(m, y)) > 0) {
356 m += cha_tok_mblen(Cha_tokenizer, m, 4 /* XXX */);
360 if ((plen = comm_prefix_len(m, y)) > 0 && plen > maxplen) {
367 m += cha_tok_mblen(Cha_tokenizer, m, 4 /* XXX */);
371 if (!*m || maxplen > 0) {
375 memcpy(d, m0, m - m0);
379 memcpy(d, y0, y - y0);
393 print_nhinsi(int hinsi, int c, int n)
399 cha_putc(c, cha_output);
401 path = Cha_hinsi[hinsi].path;
403 cha_puts(Cha_hinsi[*path].name, cha_output);
404 if (!*path || !*++path || i == n)
407 cha_puts("'-'", cha_output);
409 cha_putc(c, cha_output);
413 cha_putc(c, cha_output);
417 * int_to_str - convert an integer to ASCII
418 * by Masanao Izumo <masana-i@is.aist-nara.ac.jp>
421 int_to_str(int value)
423 static char buff[32];
431 if (-value == value) { /* value == INT_MIN */
432 sprintf(buff, "%d", value);
440 *--p = value % 10 + '0';
451 * by Masanao Izumo <masana-i@is.aist-nara.ac.jp>
454 fputsn(char *str, char *out, int n)
460 len = (n <= 255 ? n : 255);
461 memcpy(buff, str, len);
469 /* ad-hoc macros XXX */
470 #define strtoi(s, i) \
471 while (isdigit(*(s))) { (i) = (i) * 10 + *(s) - '0'; (s)++; }
473 #define field_putsn(w, o, l) \
474 ((l) == -1) ? cha_puts((w), (o)) : fputsn((w), (o), (l))
477 * by Masanao Izumo <masana-i@is.aist-nara.ac.jp>
480 printf_field(char *width_str, char *word)
482 char *field = width_str;
483 int field_len, word_len, wl;
485 if (width_str == NULL) {
486 cha_puts(word, cha_output);
495 strtoi(field, field_len);
499 word_len = strlen(word);
506 wl = (word_len == -1) ? strlen(word) : word_len;
507 if (*width_str == '-') {
508 field_putsn(word, cha_output, word_len);
510 while (field_len-- > 0)
511 cha_putc(' ', cha_output);
514 while (field_len-- > 0)
515 cha_putc(' ', cha_output);
516 field_putsn(word, cha_output, word_len);
523 get_deli_right(int c)
540 print_anno(cha_lat_t *lat, int path_num, char *format)
542 path_t *path = &Cha_path[path_num];
549 if (path->start <= pos_end) {
557 if (lat->anno >= 0) {
558 while (start > pos_end && lat->anno >= 0) {
559 mrph_t *m = nth_mrph(lat->anno);
561 int anno_no = m->con_tbl; /* XXX */
563 path->start = pos_end;
564 path->end = pos_end + m->headword_len;
565 get_mrph_data(m, &d);
567 if (Cha_anno_info[anno_no].format)
568 cha_printf_mrph(lat, path_num, &d,
569 Cha_anno_info[anno_no].format);
570 else if (Cha_anno_info[anno_no].hinsi)
571 cha_printf_mrph(lat, path_num, &d, format);
573 lat->anno = m->dat_index; /* XXX */
574 pos_end += m->headword_len;
577 path->end = pos_end = end;
583 extract_yomi1(char *dst, char *src)
591 int len = cha_tok_mblen(Cha_tokenizer, (s), 4 /* XXX */);
593 if (state == 0 && len == 1 && *s == '{') {
597 } else if (state == 1 && len == 1 && *s == '/') {
601 } else if (state == 2) {
602 if (len == 1 && *s == '}')
608 for (i = 0; i < len; i++)
615 cha_printf_mrph(cha_lat_t *lat, int path_num, mrph_data_t *mdata, char *format)
617 int letter, value, n, state;
618 int deli_left = 0, deli_right = 0;
619 char *s, *word, *eword;
620 char word_str[CHA_INPUT_SIZE], word_str2[CHA_INPUT_SIZE];
622 path_t *path = &Cha_path[path_num];
623 mrph_t *mrph = mdata->mrph;
625 eword = NULL; /* string in EUC */
626 word = NULL; /* string in EUC(UNIX) or SJIS(Win) */
627 letter = 0; /* character */
628 value = INT_MAX; /* integer value */
631 for (s = format; *s; s++) {
632 if (state == 1 && *s == deli_right) {
633 if (deli_right != deli_left && !*s++)
635 deli_right = get_deli_right(*s);
636 if ((s = strchr(++s, deli_right)) == NULL)
641 if (state == 2 && *s == deli_right) {
647 cha_putc(*s, cha_output);
653 if (*s == '-' || *s == '.' || (*s >= '0' && *s <= '9')) {
655 while (*s == '-' || *s == '.' || (*s >= '0' && *s <= '9'))
670 if (Cha_hinsi[mrph->posid].depth > 1)
674 if (mdata->info[0] && strcmp(mdata->info, "NIL"))
685 deli_right = get_deli_right(deli_left = *s);
687 if ((s = strchr(++s, deli_right)) == NULL)
689 if (deli_left != deli_right)
692 deli_right = get_deli_right(*s);
696 if (mrph->is_undef) {
701 word = (Cha_lang == CHASEN_LANG_EN) ?
702 CHA_LITE(STR_UNKNOWN_WORD) : CHA_LIT(STR_UNKNOWN_WORD);
705 deli_right = get_deli_right(deli_left = *++s);
708 case 'm': /* Surface string (surface form) */
709 memcpy(eword = word_str, mrph->headword, mrph->headword_len);
710 word_str[mrph->headword_len] = '\0';
712 case 'M': /* Surface string (base form) */
715 else if (mdata->stem_len < 0)
718 memcpy(eword = word_str, mrph->headword, mdata->stem_len);
720 word_str[mdata->stem_len] = '\0';
722 strcpy(word_str + mdata->stem_len,
723 Cha_form[mrph->inf_type][Cha_type[mrph->inf_type].
727 case 'y': /* Japanese Reading (surface form) */
728 case 'Y': /* Japanese Reading (base form) */
731 if (mdata->reading_len >= 0) {
732 if (s[0] != 'r' && s[1] != '0')
733 extract_yomi1(word_str, mdata->reading);
735 strcpy(word_str, mdata->reading);
736 if (mrph->inf_type > 0 && mdata->stem_len >= 0) {
739 Cha_form[mrph->inf_type]
740 [mrph->inf_form].ygobi);
743 Cha_form[mrph->inf_type]
744 [Cha_type[mrph->inf_type].basic].ygobi);
749 if (s[1] == '0' || s[1] == '1')
753 if (!s[1] || !s[2] || !s[3])
754 cha_putc(*s, cha_output);
756 extract_yomi1(word_str2, eword);
758 if (memcmp(mrph->headword, eword, mrph->headword_len)) {
759 char reading[CHA_INPUT_SIZE], headword[CHA_INPUT_SIZE];
760 strcpy(reading, eword);
761 memcpy(headword, mrph->headword, mrph->headword_len);
762 headword[mrph->headword_len] = '\0';
763 set_ruby(word_str, headword, reading, s[1], s[2], s[3]);
769 case 'a': /* Japanese pronunciation (surface form) */
770 case 'A': /* Japanese pronunciation (base form) */
772 if (mdata->pron_len >= 0) {
774 extract_yomi1(word_str, mdata->pron);
776 strcpy(word_str, mdata->pron);
777 if (mrph->inf_type > 0 && mdata->stem_len >= 0) {
779 strcat(word_str, Cha_form[mrph->inf_type]
780 [mrph->inf_form].pgobi);
782 strcat(word_str, Cha_form[mrph->inf_type]
783 [Cha_type[mrph->inf_type].basic].pgobi);
787 if (s[1] == '0' || s[1] == '1')
790 case 'i': /* information */
792 extract_yomi1(word_str, mdata->info);
794 strcpy(word_str, mdata->info);
796 if (s[1] == '0' || s[1] == '1')
799 case 'I': /* information */
801 cha_putc(*--s, cha_output);
802 else if (mdata->info[0] && strcmp(mdata->info, "NIL"))
808 n = 99; /* print all level of the POS -- ¤¹¤Ù¤Æ¤Î³¬Áؤòɽ¼¨ */
809 if (s[1] >= '1' && s[1] <= '9')
812 cha_putc(*s, cha_output);
814 print_nhinsi(mrph->posid, *++s, n);
816 case 'h': /* POS number */
819 case 'H': /* POS string */
820 if (s[1] < '1' || s[1] > '9')
824 if (Cha_hinsi[mrph->posid].depth - 1 < n)
825 n = Cha_hinsi[mrph->posid].depth - 1;
827 word = Cha_hinsi[Cha_hinsi[mrph->posid].path[n]].name;
829 case 'b': /* POS subdivision number */
832 case 'B': /* POS subdivision string */
834 cha_putc(*s, cha_output);
835 else if (*++s == 'M' && mrph->is_undef)
836 word = (Cha_lang == CHASEN_LANG_EN) ?
837 CHA_LITE(STR_UNKNOWN_WORD) : CHA_LIT(STR_UNKNOWN_WORD);
839 * ³¬Áز½ÉÊ»ì¤Ê¤é°ìÈÖ²¼¤Î³¬ÁؤÎÉÊ»ì̾¤òɽ¼¨
840 * when the POS has subdivision level,
841 * print the lowest level of the POS name
843 else if (*s == 'M' || *s == 'B'
844 || Cha_hinsi[mrph->posid].depth > 1)
845 word = Cha_hinsi[mrph->posid].name;
849 case 't': /* Conjugation type number */
850 value = mrph->inf_type;
852 case 'T': /* Conjugation type string */
854 cha_putc(*--s, cha_output);
855 else if (mrph->inf_type)
856 word = Cha_type[mrph->inf_type].name;
860 case 'f': /* Conjugation form number */
861 value = mrph->inf_form;
863 case 'F': /* Conjugation form string */
865 cha_putc(*--s, cha_output);
867 word = Cha_form[mrph->inf_type][mrph->inf_form].name;
871 case 'c': /* the cost of morpheme */
872 if (mrph->is_undef) {
873 value = Cha_undef_info[mrph->is_undef - 1].cost
874 + Cha_undef_info[mrph->is_undef -
875 1].cost_step * mrph->headword_len / 2;
877 value = Cha_hinsi[mrph->posid].cost;
879 value *= mrph->weight * Cha_mrph_cost_weight;
881 case 'S': /* entire sentence */
882 memcpy(eword = word_str, lat->text, lat->len);
883 word_str[lat->len] = '\0';
885 case 'p': /* the information about path */
887 cha_putc(*s, cha_output);
904 letter = path->do_print == 2 ? '*' : ' ';
907 if ((s[1] != 'i' && s[1] != 'c') || s[2] == '\0')
908 cha_putc(*s, cha_output);
909 else if (*++s == 'i') {
911 if (Cha_cost_width < 0)
912 cha_printf(cha_output, "%d", path->best_path);
915 for (j = 0; path->path[j] != -1; j++) {
917 cha_putc(c, cha_output);
918 cha_printf(cha_output, "%d", path->path[j]);
922 int con_tbl = mrph->con_tbl;
924 if (Cha_cost_width < 0)
925 cha_printf(cha_output, "%d", Cha_con_cost_weight *
926 check_con_cost(&Cha_path[path->best_path],
930 for (j = 0; path->path[j] != -1; j++) {
932 cha_putc(c, cha_output);
933 cha_printf(cha_output, "%d",
934 Cha_con_cost_weight *
935 check_con_cost(&Cha_path[path->path[j]],
945 default: /* includes '%' */
946 cha_putc(*s, cha_output);
951 printf_field(width_str, word);
953 } else if (eword != NULL) {
954 printf_field(width_str, eword);
957 word_str[0] = letter;
959 printf_field(width_str, word_str);
961 } else if (value != INT_MAX) {
962 printf_field(width_str, int_to_str(value));
969 print_bos_eos(cha_lat_t *lat, char *str)
973 for (s = str; *s; s++) {
974 if (*s == '%' && *++s == 'S' && lat)
975 cha_write(lat->text, sizeof(char), lat->len, cha_output);
977 cha_putc(*s, cha_output);
982 print_bos(cha_lat_t *lat, int opt_form)
984 if (opt_form != 'W' && opt_form != 'd' && *Cha_bos_string)
985 print_bos_eos(lat, Cha_bos_string);
989 print_eos(cha_lat_t *lat, int opt_form)
992 cha_putc('\n', cha_output);
993 else if (opt_form != 'd' && *Cha_eos_string)
994 print_bos_eos(lat, Cha_eos_string);
998 get_compound(mrph_data_t *data, char *headword, darts_t *da, long index)
1001 int has_next, hw_len;
1002 char *base = da_get_lex_base(da) + index;
1004 hw_len = ((short *)base)[0];
1005 has_next = ((short *)base)[1];
1006 base += sizeof(short) * 2;
1007 memcpy(&mrph, base, sizeof(da_lex_t));
1008 mrph.headword = headword;
1009 mrph.headword_len = hw_len;
1013 get_mrph_data(&mrph, data);
1021 print_mrph(cha_lat_t *lat, int path_num, mrph_data_t *mdata, char *format)
1023 mrph_t *mrph = mdata->mrph;
1025 print_anno(lat, path_num, format);
1027 if (Cha_output_iscompound || mdata->compound == NO_COMPOUND) {
1028 cha_printf_mrph(lat, path_num, mdata, format);
1031 long index = mdata->compound;
1033 char *headword = mrph->headword;
1036 has_next = get_compound(&data, headword, mrph->darts, index);
1038 if (mrph->inf_type > 0 && mdata->stem_len >= 0) {
1039 data.mrph->inf_form = mrph->inf_form;
1040 data.mrph->headword_len +=
1041 strlen(Cha_form[mrph->inf_type][mrph->inf_form].gobi);
1043 data.mrph->con_tbl = mrph->con_tbl;
1045 cha_printf_mrph(lat, path_num, &data, format);
1046 headword += data.mrph->headword_len;
1047 index += sizeof(short) * 2 + sizeof(da_lex_t);
1053 print_path_mrph(cha_lat_t *lat, int path_num, char *format)
1057 get_mrph_data(nth_mrph(Cha_path[path_num].mrph_p), &data);
1058 print_mrph(lat, path_num, &data, format);
1062 concat_composit_mrph(mrph_data_t *composit_mrph, mrph_data_t *cur_mrph)
1067 if (!composit_mrph->mrph->posid) {
1068 composit_mrph->mrph->posid
1069 = Cha_hinsi[cur_mrph->mrph->posid].composit;
1070 composit_mrph->mrph->headword = cur_mrph->mrph->headword;
1071 composit_mrph->mrph->headword_len = composit_mrph->mrph->weight = 0;
1072 composit_mrph->reading[0] = '\0';
1073 composit_mrph->pron[0] = '\0';
1074 composit_mrph->base[0] = '\0';
1079 if (cur_mrph->reading[0])
1080 strcat(composit_mrph->reading, cur_mrph->reading);
1082 int len = strlen(composit_mrph->reading);
1083 memcpy(composit_mrph->reading + len,
1084 cur_mrph->mrph->headword, cur_mrph->stem_len);
1085 composit_mrph->reading[len + cur_mrph->stem_len] = '\0';
1087 if (cur_mrph->mrph->inf_type > 0)
1088 strcat(composit_mrph->reading,
1090 [cur_mrph->mrph->inf_type][cur_mrph->mrph->inf_form].ygobi);
1094 if (cur_mrph->pron[0])
1095 strcat(composit_mrph->pron, cur_mrph->pron);
1096 else if (cur_mrph->reading[0])
1097 strcat(composit_mrph->pron, cur_mrph->reading);
1099 int len = strlen(composit_mrph->pron);
1100 memcpy(composit_mrph->pron + len,
1101 cur_mrph->mrph->headword, cur_mrph->stem_len);
1102 composit_mrph->pron[len + cur_mrph->stem_len] = '\0';
1104 if (cur_mrph->mrph->inf_type > 0)
1105 strcat(composit_mrph->pron,
1107 [cur_mrph->mrph->inf_type][cur_mrph->mrph->inf_form].pgobi);
1109 strcat(composit_mrph->base, cur_mrph->base);
1110 composit_mrph->mrph->headword_len += cur_mrph->mrph->headword_len;
1111 composit_mrph->mrph->weight += cur_mrph->mrph->weight;
1115 concat_composit_mrph_end(mrph_data_t *composit_mrph, mrph_data_t *cur_mrph)
1120 if (cur_mrph->reading[0])
1121 strcat(composit_mrph->reading, cur_mrph->reading);
1123 int len = strlen(composit_mrph->reading);
1124 memcpy(composit_mrph->reading + len, cur_mrph->mrph->headword,
1125 cur_mrph->stem_len);
1126 composit_mrph->reading[len + cur_mrph->stem_len] = '\0';
1129 * Japanese Pronunciation
1131 if (cur_mrph->pron[0])
1132 strcat(composit_mrph->pron, cur_mrph->pron);
1133 else if (cur_mrph->reading[0])
1134 strcat(composit_mrph->pron, cur_mrph->reading);
1136 int len = strlen(composit_mrph->pron);
1137 memcpy(composit_mrph->pron + len, cur_mrph->mrph->headword,
1138 cur_mrph->stem_len);
1139 composit_mrph->pron[len + cur_mrph->stem_len] = '\0';
1142 strcat(composit_mrph->base, cur_mrph->base);
1143 composit_mrph->stem_len =
1144 composit_mrph->mrph->headword_len + cur_mrph->stem_len;
1145 composit_mrph->mrph->headword_len += cur_mrph->mrph->headword_len;
1146 composit_mrph->mrph->weight += cur_mrph->mrph->weight;
1147 composit_mrph->info = cur_mrph->info;
1148 composit_mrph->mrph->inf_type = cur_mrph->mrph->inf_type;
1149 composit_mrph->mrph->inf_form = cur_mrph->mrph->inf_form;
1150 composit_mrph->mrph->is_undef = cur_mrph->mrph->is_undef;
1153 #define print_anno_eos() \
1154 { print_anno(lat, Cha_path_num - 1, format); print_eos(lat, opt_form); }
1159 print_best_path(cha_lat_t *lat, int opt_form, char *format)
1161 int i, last, pbuf_last, isfirst = 1;
1162 int path_num_composit = 0;
1163 char reading[CHA_INPUT_SIZE];
1164 char pron[CHA_INPUT_SIZE];
1165 char base[CHA_INPUT_SIZE];
1166 mrph_data_t composit_mrph;
1168 mrph_t *cur_mrph, *pre_mrph;
1170 print_bos(lat, opt_form);
1172 last = Cha_path[Cha_path_num - 1].best_path;
1178 for (pbuf_last = 0; last; last = Cha_path[last].best_path, pbuf_last++) {
1179 path_buffer[pbuf_last] = last;
1183 * print composit POSs as one word
1185 /* initialization */
1187 composit_mrph.mrph = &m;
1188 composit_mrph.mrph->posid = 0;
1189 composit_mrph.reading = reading;
1190 composit_mrph.pron = pron;
1191 composit_mrph.base = base;
1192 composit_mrph.compound = NO_COMPOUND;
1193 cur_mrph = nth_mrph(Cha_path[path_buffer[pbuf_last - 1]].mrph_p);
1196 * chunking the composit POSs from EOS to BOS
1198 for (i = pbuf_last - 1; i >= 0; i--) {
1201 get_mrph_data(cur_mrph, &data);
1202 pre_mrph = (i == 0) ?
1203 NULL : nth_mrph(Cha_path[path_buffer[i - 1]].mrph_p);
1205 if (i > 0 && !cur_mrph->is_undef && !pre_mrph->is_undef
1206 && (Cha_path[path_buffer[i]].end == Cha_path[path_buffer[i - 1]].start)
1207 && Cha_hinsi[cur_mrph->posid].composit
1208 && (Cha_hinsi[cur_mrph->posid].composit == Cha_hinsi[pre_mrph->posid].composit)) {
1210 if (!composit_mrph.mrph->posid)
1211 path_num_composit = path_buffer[i];
1213 concat_composit_mrph(&composit_mrph, &data);
1216 if (opt_form == 'd') {
1220 cha_putc(',', cha_output);
1222 if (!composit_mrph.mrph->posid)
1223 print_mrph(lat, path_buffer[i], &data, format);
1225 concat_composit_mrph_end(&composit_mrph, &data);
1226 Cha_path[path_num_composit].end =
1227 Cha_path[path_num_composit].start
1228 + composit_mrph.mrph->headword_len;
1229 print_mrph(lat, path_num_composit, &composit_mrph, format);
1230 composit_mrph.mrph->posid = 0;
1233 cur_mrph = pre_mrph;
1240 * print_all_mrph - Àµ¤·¤¤²òÀÏ·ë²Ì¤Ë´Þ¤Þ¤ì¤ëÁ´¤Æ¤Î·ÁÂÖÁǤòɽ¼¨
1241 * -m, -d, -v ¥ª¥×¥·¥ç¥ó¤Ç»ÈÍÑ
1244 collect_all_mrph(int path_num)
1248 if ((j = Cha_path[path_num].best_path) &&
1249 (path_num == Cha_path_num - 1 || Cha_path[path_num].do_print == 2)) {
1250 Cha_path[j].do_print = 2;
1251 collect_all_mrph(j);
1254 for (i = 0; (j = Cha_path[path_num].path[i]) && j != -1; i++) {
1255 if (!Cha_path[j].do_print) {
1256 Cha_path[j].do_print = 1;
1257 collect_all_mrph(j);
1263 print_all_mrph(cha_lat_t *lat, int opt_form, char *format)
1266 int isfirst = 1; /* ʸƬ¤«¤É¤¦¤«¤Î¥Õ¥é¥° for -d option */
1268 for (i = 0; i < Cha_path_num; i++)
1269 Cha_path[i].do_print = 0;
1270 collect_all_mrph(Cha_path_num - 1);
1273 * -v ¤Î¤È¤¤ÏʸƬ¡¦Ê¸Ëö¤Î¾ðÊó¤âɽ¼¨
1275 if (opt_form == 'v') {
1276 Cha_path[0].do_print = 2;
1277 Cha_path[Cha_path_num - 1].do_print = 2;
1280 print_bos(lat, opt_form);
1281 for (i = 0; i < Cha_path_num; i++) {
1282 if (Cha_path[i].do_print) {
1283 if (opt_form == 'd') {
1287 cha_putc(',', cha_output);
1289 print_path_mrph(lat, i, format);
1292 print_anno(lat, Cha_path_num - 1, format);
1293 print_eos(lat, opt_form);
1300 print_all_path_sub(cha_lat_t *lat, int path_num, int paths, char *format)
1304 for (i = 0; Cha_path[path_num].path[i] != -1; i++) {
1305 if (Cha_path[path_num].path[0] == 0) {
1307 for (j = paths - 1; j >= 0; j--)
1308 print_path_mrph(lat, path_buffer[j], format);
1309 print_anno(lat, Cha_path_num - 1, format);
1310 cha_puts("EOP\n", cha_output);
1312 path_buffer[paths] = Cha_path[path_num].path[i];
1313 print_all_path_sub(lat, Cha_path[path_num].path[i], paths + 1,
1320 print_all_path(cha_lat_t *lat, int opt_form, char *format)
1322 print_bos(lat, opt_form);
1323 print_all_path_sub(lat, Cha_path_num - 1, 0, format);
1324 print_eos(lat, opt_form);
1328 cha_print_path(cha_lat_t *lat, int opt_show, int opt_form, char *format)
1330 if (opt_form == 'd')
1331 cha_putc('[', cha_output);
1335 print_all_mrph(lat, opt_form, format);
1338 print_all_path(lat, opt_form, format);
1341 print_best_path(lat, opt_form, format); /* 'b' */
1344 if (opt_form == 'd')
1345 cha_puts("].\n", cha_output);
1349 cha_print_bos_eos(int opt_form)
1352 print_bos(NULL, opt_form);
1353 print_eos(NULL, opt_form);
1357 cha_print_hinsi_table(void)
1361 for (i = 0; Cha_hinsi[i].name; i++) {
1362 cha_printf(cha_output, "%d ", i);
1363 print_nhinsi(i, '-', 99);
1364 cha_putc('\n', cha_output);
1369 cha_print_ctype_table(void)
1372 for (i = 1; Cha_type[i].name; i++)
1373 cha_printf(cha_output, "%d %s\n", i, Cha_type[i].name);
1377 cha_print_cform_table(void)
1380 for (i = 1; Cha_type[i].name; i++)
1381 for (j = 1; Cha_form[i][j].name; j++)
1382 printf("%d %d %s\n", i, j, Cha_form[i][j].name);