2 * Copyright (c) 2003 Nara Institute of Science and Technology
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name Nara Institute of Science and Technology may not be used to
15 * endorse or promote products derived from this software without
16 * specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Nara Institute
22 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * NOTE: An idea of these procedures are taken from youhcan's jutils.c
33 * $Id: jfgets.c,v 1.1.1.1 2007/03/13 07:40:10 masayu-a Exp $
40 #define INNER_BUFSIZE 8192
43 * delimiter for cha_jfgets()
45 static char jfgets_delimiter[256] = "¡¥¡£¡ª¡©";
48 cha_set_jfgets_delimiter(char *delimiter)
50 strncpy(jfgets_delimiter, delimiter, sizeof(jfgets_delimiter));
54 cha_jistoeuc(unsigned char *ibuffer, unsigned char *obuffer)
63 for (p = ibuffer; *p; p++) {
66 } else if (level == 1) {
68 level = 2; /* ESC $ */
70 level = 12; /* ESC ( */
77 if (level == 2 && *p == '@')
78 flag = 1; /* ESC $ @ */
79 if (level == 2 && *p == 'B')
80 flag = 1; /* ESC $ B */
81 if (level == 12 && *p == 'B')
82 flag = 0; /* ESC ( B */
83 if (level == 12 && *p == 'J')
84 flag = 0; /* ESC ( J */
87 * Give up to parse escape sequence
90 } else if (flag && *p >= 0x20) {
92 * KANJI mode without control characters
98 * ASCII mode or control character in KANJI mode
101 * plural space characters -> single space
103 else if (*p == ' ' || *p == '\t') {
104 if (o == obuffer || o[-1] != ' ')
116 * isterminator - check it is terminator or not
125 isterminator(unsigned char *target, unsigned char *termlist)
127 if (termlist == NULL || target == NULL) {
132 if (*termlist & 0x80) {
133 if (*termlist == *target && *(termlist + 1) == *(target + 1))
137 if (*termlist == *target)
146 * inner buffer and inner position.
147 * if stream is empty. 'pos' point NULL.
151 iskanji1(unsigned char *str, int idx)
155 for (n = 0; idx >= 0 && str[idx] >= 0x80; n++, idx--);
161 * cha_fget_line - get line via fgets(). So it is really reading function :-)
164 cha_fget_line(char *buffer, int bufsize, FILE * stream)
166 static unsigned char tmp_buf[INNER_BUFSIZE];
169 if (fgets(tmp_buf, bufsize, stream) == NULL)
173 * remove the last extra character
175 last = strlen(tmp_buf) - 1;
176 if (iskanji1(tmp_buf, last)) {
177 ungetc(tmp_buf[last], stream);
183 * NOTE: EUC string is short than JIS string.
184 * if you want to other conversion,
185 * you must care about string length.
188 cha_jistoeuc(tmp_buf, buffer);
194 * cha_jfgets - fgets() for Japanese Text.
198 cha_jfgets(char *buffer, int bufsize, FILE * stream)
200 static unsigned char ibuf[INNER_BUFSIZE];
201 /* set to the end of line */
202 static unsigned char *pos = (unsigned char *) "";
205 int kflag; /* kanji flag(0=not found, 1=found) */
208 (pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
212 q = (unsigned char *) buffer;
215 for (count = bufsize; count > 0; count--) {
217 * line is end without '\n', long string read more
220 if ((pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
226 if (*pos >= 0x80 && *(pos + 1)) {
237 if (isterminator(pos - 2, jfgets_delimiter)) {
252 * eliminate space characters at the end of line
254 while (q > (unsigned char *) buffer
255 && (q[-1] == ' ' || q[-1] == '\t'))
259 cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
262 while (*pos == ' ' || *pos == '\t')
266 * not have kanji or no space, return with this line
272 * have kanji, connect next line
275 * double '\n' is paragraph end. so it is delimiter
281 * "ASCII\nASCII" -> "ASCII ASCII"
283 if (!kflag && !(*pos & 0x80))
286 if (*pos != ' ' && *pos != '\t')
293 if (isterminator(pos - 1, jfgets_delimiter)) {