2 /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the Free
16 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
21 * Besides uClibc, I'm using this code in my libc for elks, which is
22 * a 16-bit environment with a fairly limited compiler. It would make
23 * things much easier for me if this file isn't modified unnecessarily.
24 * In particular, please put any new or replacement functions somewhere
25 * else, and modify the makefile to use your version instead.
28 * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
31 /* May 23, 2002 Initial Notes:
33 * I'm still tweaking this stuff, but it passes the tests I've thrown
34 * at it, and Erik needs it for the gcc port. The glibc extension
35 * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
36 * in the glibc source. I also need to fix the behavior of
37 * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
39 * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
40 * file on my platform (x86) show about 5-10% faster conversion speed than
41 * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
42 * individual mbrtowc()/wcrtomb() calls.
44 * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
45 * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
46 * needs to deal gracefully with whatever is sent to it. In that mode,
47 * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
48 * an arg to force that behavior, so the interface will be changing.
50 * I need to fix the error checking for 16-bit wide chars. This isn't
51 * an issue for uClibc, but may be for ELKS. I'm currently not sure
52 * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
56 * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
57 * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
59 * Enabled building of a C/POSIX-locale-only version, so full locale support
60 * no longer needs to be enabled.
64 * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
65 * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
66 * order to support %ls in printf. See comments below for details.
67 * Change behaviour of wc<->mb functions when in the C locale. Now they do
68 * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
69 * and consistency with the stds requirements that a printf format string by
70 * a valid multibyte string beginning and ending in it's initial shift state.
74 * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
78 * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
79 * Added some size/speed optimizations and integrated it into my locale
80 * framework. Minimally tested at the moment, but the stub C-locale
81 * version (which most people would probably be using) should be fine.
85 * Revert the wc<->mb changes from earlier this month involving the C-locale.
86 * Add a couple of ugly hacks to support *wprintf.
87 * Add a mini iconv() and iconv implementation (requires locale support).
90 * Bug fix for mbrtowc.
93 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
96 * Bug fix: Fix size check for remaining output space in iconv().
105 #include <inttypes.h>
111 #include <bits/uClibc_uwchar.h>
113 /**********************************************************************/
114 #ifdef __UCLIBC_HAS_LOCALE__
115 #ifdef __UCLIBC_MJN3_ONLY__
117 /* generates one warning */
118 #warning TODO: Fix Cc2wc* and Cwc2c* defines!
120 #endif /* __UCLIBC_MJN3_ONLY__ */
122 #define ENCODING (__UCLIBC_CURLOCALE->encoding)
124 #define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
125 #define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
126 #define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
127 #define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
128 #define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
129 #define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
131 #ifndef __CTYPE_HAS_UTF_8_LOCALES
132 #warning __CTYPE_HAS_UTF_8_LOCALES not set!
135 #else /* __UCLIBC_HAS_LOCALE__ */
137 #ifdef __UCLIBC_MJN3_ONLY__
140 #warning fix preprocessor logic testing locale settings
144 #define ENCODING (__ctype_encoding_7_bit)
145 #ifdef __CTYPE_HAS_8_BIT_LOCALES
146 #error __CTYPE_HAS_8_BIT_LOCALES is defined!
148 #ifdef __CTYPE_HAS_UTF_8_LOCALES
149 #error __CTYPE_HAS_UTF_8_LOCALES is defined!
151 #undef L__wchar_utf8sntowcs
152 #undef L__wchar_wcsntoutf8s
154 #endif /* __UCLIBC_HAS_LOCALE__ */
155 /**********************************************************************/
157 #if WCHAR_MAX > 0xffffUL
158 #define UTF_8_MAX_LEN 6
160 #define UTF_8_MAX_LEN 3
165 /* Implementation-specific work functions. */
167 extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
168 const char **__restrict src, size_t n,
169 mbstate_t *ps, int allow_continuation) attribute_hidden;
171 extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
172 const wchar_t **__restrict src, size_t wn) attribute_hidden;
174 /**********************************************************************/
177 /* libc_hidden_proto(mbrtowc) */
179 /* libc_hidden_proto(btowc) */
182 #ifdef __CTYPE_HAS_8_BIT_LOCALES
185 unsigned char buf[1];
189 *buf = (unsigned char) c;
190 mbstate.__mask = 0; /* Initialize the mbstate. */
191 if (mbrtowc(&wc, (char*) buf, 1, &mbstate) <= 1) {
197 #else /* !__CTYPE_HAS_8_BIT_LOCALES */
199 #ifdef __UCLIBC_HAS_LOCALE__
200 assert((ENCODING == __ctype_encoding_7_bit)
201 || (ENCODING == __ctype_encoding_utf8));
204 /* If we don't have 8-bit locale support, then this is trivial since
205 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
206 return (((unsigned int)c) < 0x80) ? c : WEOF;
208 #endif /* !__CTYPE_HAS_8_BIT_LOCALES */
210 libc_hidden_def(btowc)
213 /**********************************************************************/
216 /* Note: We completely ignore ps in all currently supported conversions. */
218 /* libc_hidden_proto(wcrtomb) */
222 #ifdef __CTYPE_HAS_8_BIT_LOCALES
224 unsigned char buf[MB_LEN_MAX];
226 return (wcrtomb((char*) buf, c, NULL) == 1) ? *buf : EOF;
228 #else /* __CTYPE_HAS_8_BIT_LOCALES */
230 #ifdef __UCLIBC_HAS_LOCALE__
231 assert((ENCODING == __ctype_encoding_7_bit)
232 || (ENCODING == __ctype_encoding_utf8));
233 #endif /* __UCLIBC_HAS_LOCALE__ */
235 /* If we don't have 8-bit locale support, then this is trivial since
236 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
238 /* TODO: need unsigned version of wint_t... */
239 /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
240 return ((c >= 0) && (c < 0x80)) ? c : EOF;
242 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
246 /**********************************************************************/
249 /* libc_hidden_proto(mbsinit) */
250 int mbsinit(const mbstate_t *ps)
252 return !ps || !ps->__mask;
254 libc_hidden_def(mbsinit)
257 /**********************************************************************/
260 /* libc_hidden_proto(mbrtowc) */
262 /* libc_hidden_proto(mbrlen) */
263 size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
265 static mbstate_t mbstate; /* Rely on bss 0-init. */
267 return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
269 libc_hidden_def(mbrlen)
272 /**********************************************************************/
275 /* libc_hidden_proto(mbsnrtowcs) */
277 /* libc_hidden_proto(mbrtowc) */
278 size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
279 size_t n, mbstate_t *__restrict ps)
281 static mbstate_t mbstate; /* Rely on bss 0-init. */
285 char empty_string[1]; /* Avoid static to be fPIC friendly. */
292 pwc = (wchar_t *) s; /* NULL */
293 empty_string[0] = 0; /* Init the empty string when necessary. */
296 } else if (*s == '\0') {
297 /* According to the ISO C 89 standard this is the expected behaviour. */
300 /* TODO: change error code? */
302 return (ps->__mask && (ps->__wc == 0xffffU))
303 ? ((size_t) -1) : ((size_t) -2);
311 #ifdef __CTYPE_HAS_UTF_8_LOCALES
312 /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
313 if (ENCODING == __ctype_encoding_utf8) {
317 r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
318 return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
322 #ifdef __UCLIBC_MJN3_ONLY__
323 #warning TODO: This adds a trailing nul!
324 #endif /* __UCLIBC_MJN3_ONLY__ */
326 r = mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
328 if (((ssize_t) r) >= 0) {
335 libc_hidden_def(mbrtowc)
338 /**********************************************************************/
341 /* libc_hidden_proto(wcsnrtombs) */
343 /* Note: We completely ignore ps in all currently supported conversions. */
344 /* TODO: Check for valid state anyway? */
346 /* libc_hidden_proto(wcrtomb) */
347 size_t wcrtomb(register char *__restrict s, wchar_t wc,
348 mbstate_t *__restrict ps)
350 #ifdef __UCLIBC_MJN3_ONLY__
351 #warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc.
352 #endif /* __UCLIBC_MJN3_ONLY__ */
356 char buf[MB_LEN_MAX];
366 r = wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
367 return (r != 0) ? r : 1;
369 libc_hidden_def(wcrtomb)
372 /**********************************************************************/
375 /* libc_hidden_proto(mbsnrtowcs) */
377 /* libc_hidden_proto(mbsrtowcs) */
378 size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
379 size_t len, mbstate_t *__restrict ps)
381 static mbstate_t mbstate; /* Rely on bss 0-init. */
383 return mbsnrtowcs(dst, src, SIZE_MAX, len,
384 ((ps != NULL) ? ps : &mbstate));
386 libc_hidden_def(mbsrtowcs)
389 /**********************************************************************/
392 /* Note: We completely ignore ps in all currently supported conversions.
394 * TODO: Check for valid state anyway? */
396 /* libc_hidden_proto(wcsnrtombs) */
398 /* libc_hidden_proto(wcsrtombs) */
399 size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
400 size_t len, mbstate_t *__restrict ps)
402 return wcsnrtombs(dst, src, SIZE_MAX, len, ps);
404 libc_hidden_def(wcsrtombs)
407 /**********************************************************************/
408 #ifdef L__wchar_utf8sntowcs
410 /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
411 * UTF-8-test.txt strss test.
413 /* #define DECODER */
421 size_t attribute_hidden _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
422 const char **__restrict src, size_t n,
423 mbstate_t *ps, int allow_continuation)
425 register const char *s;
438 /* NOTE: The following is an AWFUL HACK! In order to support %s in
439 * wprintf, we need to be able to compute the number of wchars needed
440 * for the mbs conversion, not to exceed the precision specified.
441 * But if dst is NULL, the return value is the length assuming a
442 * sufficiently sized buffer. So, we allow passing of (wchar_t *) ps
443 * as pwc in order to flag that we really want the length, subject
444 * to the restricted buffer size and no partial conversions.
445 * See mbsnrtowcs() as well. */
446 if (!pwc || (pwc == ((wchar_t *)ps))) {
454 /* This is really here only to support the glibc extension function
455 * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
456 * check on the validity of the mbstate. */
461 if ((mask = (__uwchar_t) ps->__mask) != 0) { /* A continuation... */
463 wc = (__uwchar_t) ps->__wc;
469 if ((wc = (__uwchar_t) ps->__wc) != 0xffffU) {
470 /* TODO: change error code here and below? */
477 return (size_t) -1; /* We're in an error state. */
486 if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
488 #ifdef __UCLIBC_MJN3_ONLY__
489 #warning TODO: Fix range for 16 bit wchar_t case.
491 if (( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) &&
492 (((unsigned char)s[-1] != 0xc0 ) && ((unsigned char)s[-1] != 0xc1 ))) {
503 return (size_t) -1; /* Illegal start byte! */
509 if ((*s & 0xc0) != 0x80) {
514 wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
519 if ((wc & mask) == 0) { /* Character completed. */
520 if ((mask >>= 5) == 0x40) {
523 /* Check for invalid sequences (longer than necessary)
524 * and invalid chars. */
525 if ( (wc < mask) /* Sequence not minimal length. */
527 #if UTF_8_MAX_LEN == 3
528 #error broken since mask can overflow!!
529 /* For plane 0, these are the only defined values.*/
532 /* Note that we don't need to worry about exceeding */
533 /* 31 bits as that is the most that UTF-8 provides. */
534 || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
536 || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
544 /* Character potentially valid but incomplete. */
545 if (!allow_continuation) {
549 /* NOTE: The following can fail if you allow and then disallow
551 #if UTF_8_MAX_LEN == 3
552 #error broken since mask can overflow!!
554 /* Need to back up... */
557 } while ((mask >>= 5) >= 0x40);
560 ps->__mask = (wchar_t) mask;
561 ps->__wc = (wchar_t) wc;
572 while (wc && --count);
580 /* ps->__wc is irrelavent here. */
590 /**********************************************************************/
591 #ifdef L__wchar_wcsntoutf8s
593 size_t attribute_hidden _wchar_wcsntoutf8s(char *__restrict s, size_t n,
594 const wchar_t **__restrict src, size_t wn)
599 const __uwchar_t *swc;
601 char buf[MB_LEN_MAX];
605 /* NOTE: The following is an AWFUL HACK! In order to support %ls in
606 * printf, we need to be able to compute the number of bytes needed
607 * for the mbs conversion, not to exceed the precision specified.
608 * But if dst is NULL, the return value is the length assuming a
609 * sufficiently sized buffer. So, we allow passing of (char *) src
610 * as dst in order to flag that we really want the length, subject
611 * to the restricted buffer size and no partial conversions.
612 * See wcsnrtombs() as well. */
613 if (!s || (s == ((char *) src))) {
622 swc = (const __uwchar_t *) *src;
635 #if UTF_8_MAX_LEN == 3
636 /* For plane 0, these are the only defined values.*/
637 /* Note that we don't need to worry about exceeding */
638 /* 31 bits as that is the most that UTF-8 provides. */
641 /* UTF_8_MAX_LEN == 6 */
643 || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
645 || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
651 #if UTF_8_MAX_LEN != 3
652 if (wc > 0x7fffffffUL) { /* Value too large. */
665 if ((len = p - s) > t) { /* Not enough space. */
672 *--p = (wc & 0x3f) | 0x80;
676 } else if (wc == 0) { /* End of string. */
690 *src = (const wchar_t *) swc;
698 /**********************************************************************/
701 /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
703 /* libc_hidden_proto(mbsnrtowcs) */
704 size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
705 size_t NMC, size_t len, mbstate_t *__restrict ps)
707 static mbstate_t mbstate; /* Rely on bss 0-init. */
717 #ifdef __CTYPE_HAS_UTF_8_LOCALES
718 if (ENCODING == __ctype_encoding_utf8) {
720 return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
721 != (size_t) -2) ? r : 0;
725 /* NOTE: The following is an AWFUL HACK! In order to support %s in
726 * wprintf, we need to be able to compute the number of wchars needed
727 * for the mbs conversion, not to exceed the precision specified.
728 * But if dst is NULL, the return value is the length assuming a
729 * sufficiently sized buffer. So, we allow passing of ((wchar_t *)ps)
730 * as dst in order to flag that we really want the length, subject
731 * to the restricted buffer size and no partial conversions.
732 * See _wchar_utf8sntowcs() as well. */
733 if (!dst || (dst == ((wchar_t *)ps))) {
741 /* Since all the following encodings are single-byte encodings... */
749 #ifdef __CTYPE_HAS_8_BIT_LOCALES
750 if (ENCODING == __ctype_encoding_8_bit) {
753 if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
755 wc = __UCLIBC_CURLOCALE->tbl8c2wc[
756 (__UCLIBC_CURLOCALE->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
757 << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
777 #ifdef __UCLIBC_HAS_LOCALE__
778 assert(ENCODING == __ctype_encoding_7_bit);
782 if ((*dst = (unsigned char) *s) == 0) {
787 #ifdef __CTYPE_HAS_8_BIT_LOCALES
802 libc_hidden_def(mbsnrtowcs)
805 /**********************************************************************/
808 /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
810 /* Note: We completely ignore ps in all currently supported conversions.
811 * TODO: Check for valid state anyway? */
813 /* libc_hidden_proto(wcsnrtombs) */
814 size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
815 size_t NWC, size_t len, mbstate_t *__restrict ps)
820 char buf[MB_LEN_MAX];
822 #ifdef __CTYPE_HAS_UTF_8_LOCALES
823 if (ENCODING == __ctype_encoding_utf8) {
824 return _wchar_wcsntoutf8s(dst, len, src, NWC);
826 #endif /* __CTYPE_HAS_UTF_8_LOCALES */
829 /* NOTE: The following is an AWFUL HACK! In order to support %ls in
830 * printf, we need to be able to compute the number of bytes needed
831 * for the mbs conversion, not to exceed the precision specified.
832 * But if dst is NULL, the return value is the length assuming a
833 * sufficiently sized buffer. So, we allow passing of (char *) src
834 * as dst in order to flag that we really want the length, subject
835 * to the restricted buffer size and no partial conversions.
836 * See _wchar_wcsntoutf8s() as well. */
837 if (!dst || (dst == ((char *) src))) {
845 /* Since all the following encodings are single-byte encodings... */
851 s = (const __uwchar_t *) *src;
853 #ifdef __CTYPE_HAS_8_BIT_LOCALES
854 if (ENCODING == __ctype_encoding_8_bit) {
858 if ((wc = *s) <= 0x7f) {
859 if (!(*dst = (unsigned char) wc)) {
865 if (wc <= Cwc2c_DOMAIN_MAX) {
866 u = __UCLIBC_CURLOCALE->idx8wc2c[wc >> (Cwc2c_TI_SHIFT
868 u = __UCLIBC_CURLOCALE->tbl8wc2c[(u << Cwc2c_TI_SHIFT)
869 + ((wc >> Cwc2c_TT_SHIFT)
870 & ((1 << Cwc2c_TI_SHIFT)-1))];
871 u = __UCLIBC_CURLOCALE->tbl8wc2c[Cwc2c_TI_LEN
872 + (u << Cwc2c_TT_SHIFT)
873 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
876 #ifdef __WCHAR_REPLACEMENT_CHAR
877 *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
878 #else /* __WCHAR_REPLACEMENT_CHAR */
882 *dst = (unsigned char) u;
883 #endif /* __WCHAR_REPLACEMENT_CHAR */
890 *src = (const wchar_t *) s;
894 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
896 #ifdef __UCLIBC_HAS_LOCALE__
897 assert(ENCODING == __ctype_encoding_7_bit);
902 #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
908 if ((*dst = (unsigned char) *s) == 0) {
917 *src = (const wchar_t *) s;
921 libc_hidden_def(wcsnrtombs)
924 /**********************************************************************/
927 /* libc_hidden_proto(wcswidth) */
929 #ifdef __UCLIBC_MJN3_ONLY__
930 #warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
931 #warning TODO: Update wcwidth to match latest by Kuhn.
934 #if defined(__UCLIBC_HAS_LOCALE__) && \
935 ( defined(__CTYPE_HAS_8_BIT_LOCALES) || defined(__CTYPE_HAS_UTF_8_LOCALES) )
937 static const unsigned char new_idx[] = {
938 0, 5, 5, 6, 10, 15, 28, 39,
939 48, 48, 71, 94, 113, 128, 139, 154,
940 175, 186, 188, 188, 188, 188, 188, 188,
941 203, 208, 208, 208, 208, 208, 208, 208,
942 208, 219, 219, 219, 222, 222, 222, 222,
943 222, 222, 222, 222, 222, 222, 222, 224,
944 224, 231, 231, 231, 231, 231, 231, 231,
945 231, 231, 231, 231, 231, 231, 231, 231,
946 231, 231, 231, 231, 231, 231, 231, 231,
947 231, 231, 231, 231, 231, 231, 231, 231,
948 231, 231, 231, 231, 231, 231, 231, 231,
949 231, 231, 231, 231, 231, 231, 231, 231,
950 231, 231, 231, 231, 231, 231, 231, 231,
951 231, 231, 231, 231, 231, 231, 231, 231,
952 231, 231, 231, 231, 231, 231, 231, 231,
953 231, 231, 231, 231, 231, 231, 231, 231,
954 231, 231, 231, 231, 231, 231, 231, 231,
955 231, 231, 231, 231, 231, 231, 231, 231,
956 231, 231, 231, 231, 231, 231, 231, 231,
957 231, 231, 231, 231, 231, 231, 231, 231,
958 231, 231, 231, 231, 231, 233, 233, 233,
959 233, 233, 233, 233, 234, 234, 234, 234,
960 234, 234, 234, 234, 234, 234, 234, 234,
961 234, 234, 234, 234, 234, 234, 234, 234,
962 234, 234, 234, 234, 234, 234, 234, 234,
963 234, 234, 234, 234, 234, 234, 234, 234,
964 234, 234, 234, 234, 234, 234, 234, 234,
965 236, 236, 236, 236, 236, 236, 236, 236,
966 236, 236, 236, 236, 236, 236, 236, 236,
967 236, 236, 236, 236, 236, 236, 236, 236,
968 236, 236, 236, 236, 236, 236, 236, 236,
969 236, 237, 237, 238, 241, 241, 242, 249,
973 static const unsigned char new_tbl[] = {
974 0x00, 0x01, 0x20, 0x7f, 0xa0, 0x00, 0x00, 0x50,
975 0x60, 0x70, 0x00, 0x83, 0x87, 0x88, 0x8a, 0x00,
976 0x91, 0xa2, 0xa3, 0xba, 0xbb, 0xbe, 0xbf, 0xc0,
977 0xc1, 0xc3, 0xc4, 0xc5, 0x00, 0x4b, 0x56, 0x70,
978 0x71, 0xd6, 0xe5, 0xe7, 0xe9, 0xea, 0xee, 0x00,
979 0x0f, 0x10, 0x11, 0x12, 0x30, 0x4b, 0xa6, 0xb1,
980 0x00, 0x01, 0x03, 0x3c, 0x3d, 0x41, 0x49, 0x4d,
981 0x4e, 0x51, 0x55, 0x62, 0x64, 0x81, 0x82, 0xbc,
982 0xbd, 0xc1, 0xc5, 0xcd, 0xce, 0xe2, 0xe4, 0x00,
983 0x02, 0x03, 0x3c, 0x3d, 0x41, 0x43, 0x47, 0x49,
984 0x4b, 0x4e, 0x70, 0x72, 0x81, 0x83, 0xbc, 0xbd,
985 0xc1, 0xc6, 0xc7, 0xc9, 0xcd, 0xce, 0x00, 0x01,
986 0x02, 0x3c, 0x3d, 0x3f, 0x40, 0x41, 0x44, 0x4d,
987 0x4e, 0x56, 0x57, 0x82, 0x83, 0xc0, 0xc1, 0xcd,
988 0xce, 0x00, 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4e,
989 0x55, 0x57, 0xbf, 0xc0, 0xc6, 0xc7, 0xcc, 0xce,
990 0x00, 0x41, 0x44, 0x4d, 0x4e, 0xca, 0xcb, 0xd2,
991 0xd5, 0xd6, 0xd7, 0x00, 0x31, 0x32, 0x34, 0x3b,
992 0x47, 0x4f, 0xb1, 0xb2, 0xb4, 0xba, 0xbb, 0xbd,
993 0xc8, 0xce, 0x00, 0x18, 0x1a, 0x35, 0x36, 0x37,
994 0x38, 0x39, 0x3a, 0x71, 0x7f, 0x80, 0x85, 0x86,
995 0x88, 0x90, 0x98, 0x99, 0xbd, 0xc6, 0xc7, 0x00,
996 0x2d, 0x31, 0x32, 0x33, 0x36, 0x38, 0x39, 0x3a,
997 0x58, 0x5a, 0x00, 0x60, 0x00, 0x12, 0x15, 0x32,
998 0x35, 0x52, 0x54, 0x72, 0x74, 0xb7, 0xbe, 0xc6,
999 0xc7, 0xc9, 0xd4, 0x00, 0x0b, 0x0f, 0xa9, 0xaa,
1000 0x00, 0x0b, 0x10, 0x2a, 0x2f, 0x60, 0x64, 0x6a,
1001 0x70, 0xd0, 0xeb, 0x00, 0x29, 0x2b, 0x00, 0x80,
1002 0x00, 0x2a, 0x30, 0x3f, 0x40, 0x99, 0x9b, 0x00,
1003 0xd0, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x1e,
1004 0x1f, 0x00, 0x00, 0x10, 0x20, 0x24, 0x30, 0x70,
1005 0xff, 0x00, 0x61, 0xe0, 0xe7, 0xf9, 0xfc,
1008 static const signed char new_wtbl[] = {
1009 0, -1, 1, -1, 1, 1, 0, 1,
1010 0, 1, 1, 0, 1, 0, 1, 1,
1011 0, 1, 0, 1, 0, 1, 0, 1,
1012 0, 1, 0, 1, 1, 0, 1, 0,
1013 1, 0, 1, 0, 1, 0, 1, 1,
1014 0, 1, 0, 1, 0, 1, 0, 1,
1015 1, 0, 1, 0, 1, 0, 1, 0,
1016 1, 0, 1, 0, 1, 0, 1, 0,
1017 1, 0, 1, 0, 1, 0, 1, 1,
1018 0, 1, 0, 1, 0, 1, 0, 1,
1019 0, 1, 0, 1, 0, 1, 0, 1,
1020 0, 1, 0, 1, 0, 1, 1, 0,
1021 1, 0, 1, 0, 1, 0, 1, 0,
1022 1, 0, 1, 0, 1, 0, 1, 0,
1023 1, 1, 0, 1, 0, 1, 0, 1,
1024 0, 1, 0, 1, 0, 1, 0, 1,
1025 1, 0, 1, 0, 1, 0, 1, 0,
1026 1, 0, 1, 1, 0, 1, 0, 1,
1027 0, 1, 0, 1, 0, 1, 0, 1,
1028 0, 1, 1, 0, 1, 0, 1, 0,
1029 1, 0, 1, 0, 1, 0, 1, 0,
1030 1, 0, 1, 0, 1, 0, 1, 1,
1031 0, 1, 0, 1, 0, 1, 0, 1,
1032 0, 1, 2, 0, 1, 0, 1, 0,
1033 1, 0, 1, 0, 1, 0, 1, 0,
1034 1, 0, 1, 1, 0, 1, 0, 1,
1035 1, 0, 1, 0, 1, 0, 1, 0,
1036 1, 0, 1, 1, 2, 1, 1, 2,
1037 2, 0, 2, 1, 2, 0, 2, 2,
1038 1, 1, 2, 1, 1, 2, 1, 0,
1039 1, 1, 0, 1, 0, 1, 2, 1,
1040 0, 2, 1, 2, 1, 0, 1,
1043 /* libc_hidden_proto(wcsnrtombs) */
1045 int wcswidth(const wchar_t *pwcs, size_t n)
1051 if (ENCODING == __ctype_encoding_7_bit) {
1054 for (i = 0 ; (i < n) && pwcs[i] ; i++) {
1055 if (pwcs[i] != (pwcs[i] & 0x7f)) {
1060 #ifdef __CTYPE_HAS_8_BIT_LOCALES
1061 else if (ENCODING == __ctype_encoding_8_bit) {
1064 mbstate.__mask = 0; /* Initialize the mbstate. */
1065 if (wcsnrtombs(NULL, &pwcs, n, SIZE_MAX, &mbstate) == ((size_t) - 1)) {
1069 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
1070 #if defined(__CTYPE_HAS_UTF_8_LOCALES) && defined(KUHN)
1071 /* For stricter handling of allowed unicode values... see comments above. */
1072 else if (ENCODING == __ctype_encoding_utf8) {
1075 for (i = 0 ; (i < n) && pwcs[i] ; i++) {
1076 if ( (((__uwchar_t)((pwcs[i]) - 0xfffeU)) < 2)
1077 || (((__uwchar_t)((pwcs[i]) - 0xd800U)) < (0xe000U - 0xd800U))
1083 #endif /* __CTYPE_HAS_UTF_8_LOCALES */
1085 for (count = 0 ; n && (wc = *pwcs++) ; n--) {
1087 /* If we're here, wc != 0. */
1088 if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
1094 if (((unsigned int) wc) <= 0xffff) {
1099 while ((m = (l+h) >> 1) != l) {
1100 if (b >= new_tbl[m]) {
1102 } else { /* wc < tbl[m] */
1106 count += new_wtbl[l]; /* none should be -1. */
1110 /* Redo this to minimize average number of compares?*/
1111 if (wc >= 0x1d167) {
1112 if (wc <= 0x1d1ad) {
1118 || (wc >= 0x1d1aa))))))
1122 } else if (((wc >= 0xe0020) && (wc <= 0xe007f)) || (wc == 0xe0001)) {
1124 } else if ((wc >= 0x20000) && (wc <= 0x2ffff)) {
1125 ++count; /* need 2.. add one here */
1127 #if (WCHAR_MAX > 0x7fffffffL)
1128 else if (wc > 0x7fffffffL) {
1131 #endif /* (WCHAR_MAX > 0x7fffffffL) */
1140 #else /* __UCLIBC_HAS_LOCALE__ */
1142 int wcswidth(const wchar_t *pwcs, size_t n)
1148 for (i = 0 ; (i < n) && pwcs[i] ; i++) {
1149 if (pwcs[i] != (pwcs[i] & 0x7f)) {
1154 for (count = 0 ; n && (wc = *pwcs++) ; n--) {
1156 /* If we're here, wc != 0. */
1157 if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
1170 #endif /* __UCLIBC_HAS_LOCALE__ */
1172 libc_hidden_def(wcswidth)
1175 /**********************************************************************/
1178 /* libc_hidden_proto(wcswidth) */
1180 int wcwidth(wchar_t wc)
1182 return wcswidth(&wc, 1);
1186 /**********************************************************************/
1191 mbstate_t fromstate;
1199 int skip_invalid_input; /* To support iconv -c option. */
1209 #include <byteswap.h>
1211 #if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)
1212 #error unsupported endianness for iconv
1215 #ifndef __CTYPE_HAS_8_BIT_LOCALES
1216 #error currently iconv requires 8 bit locales
1218 #ifndef __CTYPE_HAS_UTF_8_LOCALES
1219 #error currently iconv requires UTF-8 locales
1225 IC_MULTIBYTE = 0xe0,
1226 #if __BYTE_ORDER == __BIG_ENDIAN
1241 /* For the multibyte
1242 * bit 0 means swap endian
1243 * bit 1 means 2 byte
1244 * bit 2 means 4 byte
1248 /* Used externally only by iconv utility */
1249 extern const unsigned char __iconv_codesets[];
1250 libc_hidden_proto(__iconv_codesets)
1252 const unsigned char __iconv_codesets[] =
1253 "\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
1254 #if __BYTE_ORDER == __BIG_ENDIAN
1255 "\x08\xec""UCS-4\x00" /* always BE */
1256 "\x0a\xec""UCS-4BE\x00"
1257 "\x0a\xed""UCS-4LE\x00"
1258 "\x09\xe4""UTF-32\x00" /* platform endian with BOM */
1259 "\x0b\xe4""UTF-32BE\x00"
1260 "\x0b\xe5""UTF-32LE\x00"
1261 "\x08\xe2""UCS-2\x00" /* always BE */
1262 "\x0a\xe2""UCS-2BE\x00"
1263 "\x0a\xe3""UCS-2LE\x00"
1264 "\x09\xea""UTF-16\x00" /* platform endian with BOM */
1265 "\x0b\xea""UTF-16BE\x00"
1266 "\x0b\xeb""UTF-16LE\x00"
1267 #elif __BYTE_ORDER == __LITTLE_ENDIAN
1268 "\x08\xed""UCS-4\x00" /* always BE */
1269 "\x0a\xed""UCS-4BE\x00"
1270 "\x0a\xec""UCS-4LE\x00"
1271 "\x09\xf4""UTF-32\x00" /* platform endian with BOM */
1272 "\x0b\xe5""UTF-32BE\x00"
1273 "\x0b\xe4""UTF-32LE\x00"
1274 "\x08\xe3""UCS-2\x00" /* always BE */
1275 "\x0a\xe3""UCS-2BE\x00"
1276 "\x0a\xe2""UCS-2LE\x00"
1277 "\x09\xfa""UTF-16\x00" /* platform endian with BOM */
1278 "\x0b\xeb""UTF-16BE\x00"
1279 "\x0b\xea""UTF-16LE\x00"
1281 "\x08\x02""UTF-8\x00"
1282 "\x0b\x01""US-ASCII\x00"
1283 "\x07\x01""ASCII"; /* Must be last! (special case to save a nul) */
1284 libc_hidden_data_def(__iconv_codesets)
1286 /* Experimentally off - libc_hidden_proto(strcasecmp) */
1288 static int find_codeset(const char *name)
1290 const unsigned char *s;
1293 for (s = __iconv_codesets; *s; s += *s) {
1294 if (!strcasecmp((char*) (s + 2), name)) {
1299 /* The following is ripped from find_locale in locale.c. */
1301 /* TODO: maybe CODESET_LIST + *s ??? */
1302 /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
1304 s = (const unsigned char *) __LOCALE_DATA_CODESET_LIST;
1306 ++codeset; /* Increment codeset first. */
1307 if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
1312 return 0; /* No matching codeset! */
1315 iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
1317 register _UC_iconv_t *px;
1318 int tocodeset, fromcodeset;
1320 if (((tocodeset = find_codeset(tocode)) != 0)
1321 && ((fromcodeset = find_codeset(fromcode)) != 0)) {
1322 if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {
1323 px->tocodeset = tocodeset;
1324 px->tobom0 = px->tobom = (tocodeset & 0x10) >> 4;
1325 px->fromcodeset0 = px->fromcodeset = fromcodeset;
1326 px->frombom0 = px->frombom = (fromcodeset & 0x10) >> 4;
1327 px->skip_invalid_input = px->tostate.__mask
1328 = px->fromstate.__mask = 0;
1329 return (iconv_t) px;
1332 __set_errno(EINVAL);
1334 return (iconv_t)(-1);
1337 int weak_function iconv_close(iconv_t cd)
1344 size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
1345 size_t *__restrict inbytesleft,
1346 char **__restrict outbuf,
1347 size_t *__restrict outbytesleft)
1349 _UC_iconv_t *px = (_UC_iconv_t *) cd;
1354 assert(px != (_UC_iconv_t *)(-1));
1355 assert(sizeof(wchar_t) == 4);
1357 if (!inbuf || !*inbuf) { /* Need to reinitialze conversion state. */
1358 /* Note: For shift-state encodings we possibly need to output the
1359 * shift sequence to return to initial state! */
1360 if ((px->fromcodeset & 0xf0) == 0xe0) {
1362 px->tostate.__mask = px->fromstate.__mask = 0;
1363 px->fromcodeset = px->fromcodeset0;
1364 px->tobom = px->tobom0;
1365 px->frombom = px->frombom0;
1370 while (*inbytesleft) {
1371 if (!*outbytesleft) {
1378 if (px->fromcodeset >= IC_MULTIBYTE) {
1379 inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);
1380 if (*inbytesleft < inci) goto INVALID;
1381 wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)
1382 + ((unsigned char)((*inbuf)[1]));
1384 wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
1385 + ((unsigned char)((*inbuf)[3])) + (wc << 16);
1386 if (!(px->fromcodeset & 1)) wc = bswap_32(wc);
1388 if (!(px->fromcodeset & 1)) wc = bswap_16(wc);
1389 if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)
1390 && (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))
1393 if (*inbytesleft < 4) goto INVALID;
1394 wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
1395 + ((unsigned char)((*inbuf)[3]));
1396 if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);
1397 if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {
1400 inci = 4; /* Change inci here in case skipping illegals. */
1401 wc = 0x10000UL + (wc << 10) + wc2;
1408 || (wc == ((inci == 4)
1409 ? (((wchar_t) 0xfffe0000UL))
1410 : ((wchar_t)(0xfffeUL))))
1412 if (wc != 0xfeffU) {
1413 px->fromcodeset ^= 1; /* toggle endianness */
1417 goto BOM_SKIP_OUTPUT;
1423 if (px->fromcodeset != IC_WCHAR_T) {
1424 if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)
1425 ? 0x7fffffffUL : 0x10ffffUL)
1427 || (((__uwchar_t)(wc - 0xfffeU)) < 2)
1428 || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
1434 } else if (px->fromcodeset == IC_UTF_8) {
1435 const char *p = *inbuf;
1436 r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);
1437 if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */
1438 if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */
1439 assert((r == (size_t)(-1)) || (r == (size_t)(-2)));
1440 if (r == (size_t)(-2)) {
1442 __set_errno(EINVAL);
1444 px->fromstate.__mask = 0;
1447 if (px->skip_invalid_input) {
1448 px->skip_invalid_input = 2; /* flag for iconv utility */
1449 goto BOM_SKIP_OUTPUT;
1451 __set_errno(EILSEQ);
1453 return (size_t)(-1);
1455 #ifdef __UCLIBC_MJN3_ONLY__
1456 #warning TODO: optimize this.
1458 if (p != NULL) { /* incomplete char case */
1461 p = *inbuf + 1; /* nul */
1464 } else if ((wc = ((unsigned char)(**inbuf))) >= 0x80) { /* Non-ASCII... */
1465 if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
1467 } else { /* some other 8-bit ascii-extension codeset */
1468 const __codeset_8_bit_t *c8b
1469 = __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
1471 wc = __UCLIBC_CURLOCALE->tbl8c2wc[
1472 (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
1473 << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
1488 if (px->tocodeset >= IC_MULTIBYTE) {
1489 inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);
1490 if (*outbytesleft < inco) goto TOO_BIG;
1491 if (px->tocodeset != IC_WCHAR_T) {
1492 if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)
1493 ? 0x7fffffffUL : 0x10ffffUL)
1495 || (((__uwchar_t)(wc - 0xfffeU)) < 2)
1496 || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
1505 if (px->tocodeset & 1) wc = bswap_32(wc);
1507 if (((__uwchar_t)wc ) > 0xffffU) {
1508 if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {
1511 if (*outbytesleft < (inco = 4)) goto TOO_BIG;
1512 wc2 = 0xdc00U + (wc & 0x3ff);
1513 wc = 0xd800U + ((wc >> 10) & 0x3ff);
1514 if (px->tocodeset & 1) {
1516 wc2 = bswap_16(wc2);
1519 } else if (px->tocodeset & 1) wc = bswap_16(wc);
1521 (*outbuf)[0] = (char)((unsigned char)(wc));
1522 (*outbuf)[1] = (char)((unsigned char)(wc >> 8));
1524 (*outbuf)[2] = (char)((unsigned char)(wc >> 16));
1525 (*outbuf)[3] = (char)((unsigned char)(wc >> 24));
1527 } else if (px->tocodeset == IC_UTF_8) {
1528 const wchar_t *pw = &wc;
1530 r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
1531 if (r != (size_t)(-1)) {
1532 #ifdef __UCLIBC_MJN3_ONLY__
1533 #warning TODO: What happens for a nul?
1547 } else if (((__uwchar_t)(wc)) < 0x80) {
1551 if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
1552 const __codeset_8_bit_t *c8b
1553 = __locale_mmap->codeset_8_bit + px->tocodeset - 3;
1555 u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
1556 u = __UCLIBC_CURLOCALE->tbl8wc2c[(u << Cwc2c_TI_SHIFT)
1557 + ((wc >> Cwc2c_TT_SHIFT)
1558 & ((1 << Cwc2c_TI_SHIFT)-1))];
1559 wc = __UCLIBC_CURLOCALE->tbl8wc2c[Cwc2c_TI_LEN
1560 + (u << Cwc2c_TT_SHIFT)
1561 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
1571 *outbytesleft -= inco;
1574 *inbytesleft -= inci;
1580 /**********************************************************************/
1588 extern const unsigned char __iconv_codesets[];
1593 static char *progname;
1594 static int hide_errors;
1596 static void error_msg(const char *fmt, ...)
1597 __attribute__ ((noreturn, format (printf, 1, 2)));
1599 static void error_msg(const char *fmt, ...)
1604 fprintf(stderr, "%s: ", progname);
1606 vfprintf(stderr, fmt, arg);
1613 int main(int argc, char **argv)
1616 FILE *ofile = stdout;
1619 static const char opt_chars[] = "tfocsl";
1621 const char *opts[sizeof(opt_chars)]; /* last is infile name */
1627 size_t ni, no, r, pos;
1631 for (s = opt_chars ; *s ; s++) {
1632 opts[ s - opt_chars ] = NULL;
1638 if ((*p != '-') || (*++p == 0)) {
1642 if ((s = strchr(opt_chars,*p)) == NULL) {
1644 s = basename(progname);
1646 "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
1647 " or\n%s -l\n", s, s);
1648 return EXIT_FAILURE;
1650 if ((s - opt_chars) < 3) {
1651 if ((--argc == 0) || opts[s - opt_chars]) {
1654 opts[s - opt_chars] = *++argv;
1656 opts[s - opt_chars] = p;
1661 if (opts[5]) { /* -l */
1662 fprintf(stderr, "Recognized codesets:\n");
1663 for (s = (char *)__iconv_codesets ; *s ; s += *s) {
1664 fprintf(stderr," %s\n", s+2);
1666 s = __LOCALE_DATA_CODESET_LIST;
1668 fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
1671 return EXIT_SUCCESS;
1678 if (!opts[0] || !opts[1]) {
1681 if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
1682 error_msg( "unsupported codeset in %s -> %s conversion\n", opts[0], opts[1]);
1684 if (opts[3]) { /* -c */
1685 ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
1688 if ((s = opts[2]) != NULL) {
1689 if (!(ofile = fopen(s, "w"))) {
1690 error_msg( "couldn't open %s for writing\n", s);
1696 if (!argc || ((**argv == '-') && !((*argv)[1]))) {
1697 ifile = stdin; /* we don't check for duplicates */
1698 } else if (!(ifile = fopen(*argv, "r"))) {
1699 error_msg( "couldn't open %s for reading\n", *argv);
1702 while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
1708 if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
1709 if ((errno != EINVAL) && (errno != E2BIG)) {
1710 error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
1713 if ((r = OBUF - no) > 0) {
1714 if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
1715 error_msg( "write error\n");
1718 if (ni) { /* still bytes in buffer! */
1719 memmove(ibuf, pi, ni);
1723 if (ferror(ifile)) {
1724 error_msg( "read error\n");
1729 if (ifile != stdin) {
1733 } while (--argc > 0);
1738 error_msg( "incomplete sequence\n");
1741 return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
1742 ? EXIT_SUCCESS : EXIT_FAILURE;
1746 /**********************************************************************/