2 /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the Free
16 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
21 * Besides uClibc, I'm using this code in my libc for elks, which is
22 * a 16-bit environment with a fairly limited compiler. It would make
23 * things much easier for me if this file isn't modified unnecessarily.
24 * In particular, please put any new or replacement functions somewhere
25 * else, and modify the makefile to use your version instead.
28 * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
31 /* May 23, 2002 Initial Notes:
33 * I'm still tweaking this stuff, but it passes the tests I've thrown
34 * at it, and Erik needs it for the gcc port. The glibc extension
35 * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
36 * in the glibc source. I also need to fix the behavior of
37 * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
39 * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
40 * file on my platform (x86) show about 5-10% faster conversion speed than
41 * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
42 * individual mbrtowc()/wcrtomb() calls.
44 * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
45 * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
46 * needs to deal gracefully with whatever is sent to it. In that mode,
47 * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
48 * an arg to force that behavior, so the interface will be changing.
50 * I need to fix the error checking for 16-bit wide chars. This isn't
51 * an issue for uClibc, but may be for ELKS. I'm currently not sure
52 * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
56 * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
57 * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
59 * Enabled building of a C/POSIX-locale-only version, so full locale support
60 * no longer needs to be enabled.
64 * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
65 * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
66 * order to support %ls in printf. See comments below for details.
67 * Change behaviour of wc<->mb functions when in the C locale. Now they do
68 * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
69 * and consistency with the stds requirements that a printf format string by
70 * a valid multibyte string beginning and ending in it's initial shift state.
74 * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
78 * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
79 * Added some size/speed optimizations and integrated it into my locale
80 * framework. Minimally tested at the moment, but the stub C-locale
81 * version (which most people would probably be using) should be fine.
85 * Revert the wc<->mb changes from earlier this month involving the C-locale.
86 * Add a couple of ugly hacks to support *wprintf.
87 * Add a mini iconv() and iconv implementation (requires locale support).
90 * Bug fix for mbrtowc.
93 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
96 * Bug fix: Fix size check for remaining output space in iconv().
105 #include <inttypes.h>
111 #include <bits/uClibc_uwchar.h>
113 /**********************************************************************/
114 #ifdef __UCLIBC_HAS_LOCALE__
115 #ifdef __UCLIBC_MJN3_ONLY__
117 /* generates one warning */
118 #warning TODO: Fix Cc2wc* and Cwc2c* defines!
120 #endif /* __UCLIBC_MJN3_ONLY__ */
122 #define ENCODING ((__UCLIBC_CURLOCALE_DATA).encoding)
124 #define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
125 #define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
126 #define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
127 #define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
128 #define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
129 #define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
131 #ifndef __CTYPE_HAS_UTF_8_LOCALES
132 #warning __CTYPE_HAS_UTF_8_LOCALES not set!
135 #else /* __UCLIBC_HAS_LOCALE__ */
137 #ifdef __UCLIBC_MJN3_ONLY__
140 #warning fix preprocessor logic testing locale settings
144 #define ENCODING (__ctype_encoding_7_bit)
145 #ifdef __CTYPE_HAS_8_BIT_LOCALES
146 #error __CTYPE_HAS_8_BIT_LOCALES is defined!
148 #ifdef __CTYPE_HAS_UTF_8_LOCALES
149 #error __CTYPE_HAS_UTF_8_LOCALES is defined!
151 #undef L__wchar_utf8sntowcs
152 #undef L__wchar_wcsntoutf8s
154 #endif /* __UCLIBC_HAS_LOCALE__ */
155 /**********************************************************************/
157 #if WCHAR_MAX > 0xffffUL
158 #define UTF_8_MAX_LEN 6
160 #define UTF_8_MAX_LEN 3
165 /* Implementation-specific work functions. */
167 extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
168 const char **__restrict src, size_t n,
169 mbstate_t *ps, int allow_continuation) attribute_hidden;
171 extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
172 const wchar_t **__restrict src, size_t wn) attribute_hidden;
174 /**********************************************************************/
177 /* libc_hidden_proto(mbrtowc) */
179 /* libc_hidden_proto(btowc) */
182 #ifdef __CTYPE_HAS_8_BIT_LOCALES
185 unsigned char buf[1];
189 *buf = (unsigned char) c;
190 mbstate.__mask = 0; /* Initialize the mbstate. */
191 if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
197 #else /* __CTYPE_HAS_8_BIT_LOCALES */
199 #ifdef __UCLIBC_HAS_LOCALE__
200 assert((ENCODING == __ctype_encoding_7_bit)
201 || (ENCODING == __ctype_encoding_utf8));
202 #endif /* __UCLIBC_HAS_LOCALE__ */
204 /* If we don't have 8-bit locale support, then this is trivial since
205 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
206 return (((unsigned int)c) < 0x80) ? c : WEOF;
208 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
210 libc_hidden_def(btowc)
213 /**********************************************************************/
216 /* Note: We completely ignore ps in all currently supported conversions. */
218 /* libc_hidden_proto(wcrtomb) */
222 #ifdef __CTYPE_HAS_8_BIT_LOCALES
224 unsigned char buf[MB_LEN_MAX];
226 return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
228 #else /* __CTYPE_HAS_8_BIT_LOCALES */
230 #ifdef __UCLIBC_HAS_LOCALE__
231 assert((ENCODING == __ctype_encoding_7_bit)
232 || (ENCODING == __ctype_encoding_utf8));
233 #endif /* __UCLIBC_HAS_LOCALE__ */
235 /* If we don't have 8-bit locale support, then this is trivial since
236 * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
238 /* TODO: need unsigned version of wint_t... */
239 /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
240 return ((c >= 0) && (c < 0x80)) ? c : EOF;
242 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
246 /**********************************************************************/
249 /* libc_hidden_proto(mbsinit) */
250 int mbsinit(const mbstate_t *ps)
252 return !ps || !ps->__mask;
254 libc_hidden_def(mbsinit)
257 /**********************************************************************/
260 /* libc_hidden_proto(mbrtowc) */
262 /* libc_hidden_proto(mbrlen) */
263 size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
265 static mbstate_t mbstate; /* Rely on bss 0-init. */
267 return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
269 libc_hidden_def(mbrlen)
272 /**********************************************************************/
275 /* libc_hidden_proto(mbsnrtowcs) */
277 /* libc_hidden_proto(mbrtowc) */
278 size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
279 size_t n, mbstate_t *__restrict ps)
281 static mbstate_t mbstate; /* Rely on bss 0-init. */
285 char empty_string[1]; /* Avoid static to be fPIC friendly. */
292 pwc = (wchar_t *) s; /* NULL */
293 empty_string[0] = 0; /* Init the empty string when necessary. */
296 } else if (*s == '\0') {
297 /* According to the ISO C 89 standard this is the expected behaviour. */
300 /* TODO: change error code? */
302 return (ps->__mask && (ps->__wc == 0xffffU))
303 ? ((size_t) -1) : ((size_t) -2);
311 #ifdef __CTYPE_HAS_UTF_8_LOCALES
312 /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
313 if (ENCODING == __ctype_encoding_utf8) {
317 r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
318 return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
322 #ifdef __UCLIBC_MJN3_ONLY__
323 #warning TODO: This adds a trailing nul!
324 #endif /* __UCLIBC_MJN3_ONLY__ */
326 r = mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
328 if (((ssize_t) r) >= 0) {
335 libc_hidden_def(mbrtowc)
338 /**********************************************************************/
341 /* libc_hidden_proto(wcsnrtombs) */
343 /* Note: We completely ignore ps in all currently supported conversions. */
344 /* TODO: Check for valid state anyway? */
346 /* libc_hidden_proto(wcrtomb) */
347 size_t wcrtomb(register char *__restrict s, wchar_t wc,
348 mbstate_t *__restrict ps)
350 #ifdef __UCLIBC_MJN3_ONLY__
351 #warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc.
352 #endif /* __UCLIBC_MJN3_ONLY__ */
356 char buf[MB_LEN_MAX];
366 r = wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
367 return (r != 0) ? r : 1;
369 libc_hidden_def(wcrtomb)
372 /**********************************************************************/
375 /* libc_hidden_proto(mbsnrtowcs) */
377 /* libc_hidden_proto(mbsrtowcs) */
378 size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
379 size_t len, mbstate_t *__restrict ps)
381 static mbstate_t mbstate; /* Rely on bss 0-init. */
383 return mbsnrtowcs(dst, src, SIZE_MAX, len,
384 ((ps != NULL) ? ps : &mbstate));
386 libc_hidden_def(mbsrtowcs)
389 /**********************************************************************/
392 /* Note: We completely ignore ps in all currently supported conversions.
394 * TODO: Check for valid state anyway? */
396 /* libc_hidden_proto(wcsnrtombs) */
398 /* libc_hidden_proto(wcsrtombs) */
399 size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
400 size_t len, mbstate_t *__restrict ps)
402 return wcsnrtombs(dst, src, SIZE_MAX, len, ps);
404 libc_hidden_def(wcsrtombs)
407 /**********************************************************************/
408 #ifdef L__wchar_utf8sntowcs
410 /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
411 * UTF-8-test.txt strss test.
413 /* #define DECODER */
421 size_t attribute_hidden _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
422 const char **__restrict src, size_t n,
423 mbstate_t *ps, int allow_continuation)
425 register const char *s;
438 /* NOTE: The following is an AWFUL HACK! In order to support %s in
439 * wprintf, we need to be able to compute the number of wchars needed
440 * for the mbs conversion, not to exceed the precision specified.
441 * But if dst is NULL, the return value is the length assuming a
442 * sufficiently sized buffer. So, we allow passing of (wchar_t *) ps
443 * as pwc in order to flag that we really want the length, subject
444 * to the restricted buffer size and no partial conversions.
445 * See mbsnrtowcs() as well. */
446 if (!pwc || (pwc == ((wchar_t *)ps))) {
454 /* This is really here only to support the glibc extension function
455 * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
456 * check on the validity of the mbstate. */
461 if ((mask = (__uwchar_t) ps->__mask) != 0) { /* A continuation... */
463 wc = (__uwchar_t) ps->__wc;
469 if ((wc = (__uwchar_t) ps->__wc) != 0xffffU) {
470 /* TODO: change error code here and below? */
477 return (size_t) -1; /* We're in an error state. */
486 if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
488 #ifdef __UCLIBC_MJN3_ONLY__
489 #warning TODO: Fix range for 16 bit wchar_t case.
491 if (( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) &&
492 (((unsigned char)s[-1] != 0xc0 ) && ((unsigned char)s[-1] != 0xc1 ))) {
503 return (size_t) -1; /* Illegal start byte! */
509 if ((*s & 0xc0) != 0x80) {
514 wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
519 if ((wc & mask) == 0) { /* Character completed. */
520 if ((mask >>= 5) == 0x40) {
523 /* Check for invalid sequences (longer than necessary)
524 * and invalid chars. */
525 if ( (wc < mask) /* Sequence not minimal length. */
527 #if UTF_8_MAX_LEN == 3
528 #error broken since mask can overflow!!
529 /* For plane 0, these are the only defined values.*/
532 /* Note that we don't need to worry about exceeding */
533 /* 31 bits as that is the most that UTF-8 provides. */
534 || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
536 || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
544 /* Character potentially valid but incomplete. */
545 if (!allow_continuation) {
549 /* NOTE: The following can fail if you allow and then disallow
551 #if UTF_8_MAX_LEN == 3
552 #error broken since mask can overflow!!
554 /* Need to back up... */
557 } while ((mask >>= 5) >= 0x40);
560 ps->__mask = (wchar_t) mask;
561 ps->__wc = (wchar_t) wc;
572 while (wc && --count);
580 /* ps->__wc is irrelavent here. */
590 /**********************************************************************/
591 #ifdef L__wchar_wcsntoutf8s
593 size_t attribute_hidden _wchar_wcsntoutf8s(char *__restrict s, size_t n,
594 const wchar_t **__restrict src, size_t wn)
599 const __uwchar_t *swc;
601 char buf[MB_LEN_MAX];
605 /* NOTE: The following is an AWFUL HACK! In order to support %ls in
606 * printf, we need to be able to compute the number of bytes needed
607 * for the mbs conversion, not to exceed the precision specified.
608 * But if dst is NULL, the return value is the length assuming a
609 * sufficiently sized buffer. So, we allow passing of (char *) src
610 * as dst in order to flag that we really want the length, subject
611 * to the restricted buffer size and no partial conversions.
612 * See wcsnrtombs() as well. */
613 if (!s || (s == ((char *) src))) {
622 swc = (const __uwchar_t *) *src;
635 #if UTF_8_MAX_LEN == 3
636 /* For plane 0, these are the only defined values.*/
637 /* Note that we don't need to worry about exceeding */
638 /* 31 bits as that is the most that UTF-8 provides. */
641 /* UTF_8_MAX_LEN == 6 */
643 || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
645 || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
651 #if UTF_8_MAX_LEN != 3
652 if (wc > 0x7fffffffUL) { /* Value too large. */
665 if ((len = p - s) > t) { /* Not enough space. */
672 *--p = (wc & 0x3f) | 0x80;
676 } else if (wc == 0) { /* End of string. */
690 *src = (const wchar_t *) swc;
698 /**********************************************************************/
701 /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
703 /* libc_hidden_proto(mbsnrtowcs) */
704 size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
705 size_t NMC, size_t len, mbstate_t *__restrict ps)
707 static mbstate_t mbstate; /* Rely on bss 0-init. */
717 #ifdef __CTYPE_HAS_UTF_8_LOCALES
718 if (ENCODING == __ctype_encoding_utf8) {
720 return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
721 != (size_t) -2) ? r : 0;
725 /* NOTE: The following is an AWFUL HACK! In order to support %s in
726 * wprintf, we need to be able to compute the number of wchars needed
727 * for the mbs conversion, not to exceed the precision specified.
728 * But if dst is NULL, the return value is the length assuming a
729 * sufficiently sized buffer. So, we allow passing of ((wchar_t *)ps)
730 * as dst in order to flag that we really want the length, subject
731 * to the restricted buffer size and no partial conversions.
732 * See _wchar_utf8sntowcs() as well. */
733 if (!dst || (dst == ((wchar_t *)ps))) {
741 /* Since all the following encodings are single-byte encodings... */
749 #ifdef __CTYPE_HAS_8_BIT_LOCALES
750 if (ENCODING == __ctype_encoding_8_bit) {
753 if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
755 wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
756 (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
757 << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
777 #ifdef __UCLIBC_HAS_LOCALE__
778 assert(ENCODING == __ctype_encoding_7_bit);
782 if ((*dst = (unsigned char) *s) == 0) {
787 #ifdef __CTYPE_HAS_8_BIT_LOCALES
802 libc_hidden_def(mbsnrtowcs)
805 /**********************************************************************/
808 /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
810 /* Note: We completely ignore ps in all currently supported conversions.
811 * TODO: Check for valid state anyway? */
813 /* libc_hidden_proto(wcsnrtombs) */
814 size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
815 size_t NWC, size_t len, mbstate_t *__restrict ps)
820 char buf[MB_LEN_MAX];
822 #ifdef __CTYPE_HAS_UTF_8_LOCALES
823 if (ENCODING == __ctype_encoding_utf8) {
824 return _wchar_wcsntoutf8s(dst, len, src, NWC);
826 #endif /* __CTYPE_HAS_UTF_8_LOCALES */
829 /* NOTE: The following is an AWFUL HACK! In order to support %ls in
830 * printf, we need to be able to compute the number of bytes needed
831 * for the mbs conversion, not to exceed the precision specified.
832 * But if dst is NULL, the return value is the length assuming a
833 * sufficiently sized buffer. So, we allow passing of (char *) src
834 * as dst in order to flag that we really want the length, subject
835 * to the restricted buffer size and no partial conversions.
836 * See _wchar_wcsntoutf8s() as well. */
837 if (!dst || (dst == ((char *) src))) {
845 /* Since all the following encodings are single-byte encodings... */
851 s = (const __uwchar_t *) *src;
853 #ifdef __CTYPE_HAS_8_BIT_LOCALES
854 if (ENCODING == __ctype_encoding_8_bit) {
858 if ((wc = *s) <= 0x7f) {
859 if (!(*dst = (unsigned char) wc)) {
865 if (wc <= Cwc2c_DOMAIN_MAX) {
866 u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
868 u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
869 + ((wc >> Cwc2c_TT_SHIFT)
870 & ((1 << Cwc2c_TI_SHIFT)-1))];
871 u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
872 + (u << Cwc2c_TT_SHIFT)
873 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
876 #ifdef __WCHAR_REPLACEMENT_CHAR
877 *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
878 #else /* __WCHAR_REPLACEMENT_CHAR */
882 *dst = (unsigned char) u;
883 #endif /* __WCHAR_REPLACEMENT_CHAR */
890 *src = (const wchar_t *) s;
894 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
896 #ifdef __UCLIBC_HAS_LOCALE__
897 assert(ENCODING == __ctype_encoding_7_bit);
902 #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
908 if ((*dst = (unsigned char) *s) == 0) {
917 *src = (const wchar_t *) s;
921 libc_hidden_def(wcsnrtombs)
924 /**********************************************************************/
927 /* libc_hidden_proto(wcswidth) */
929 #ifdef __UCLIBC_MJN3_ONLY__
930 #warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
931 #warning TODO: Update wcwidth to match latest by Kuhn.
934 #if defined(__UCLIBC_HAS_LOCALE__) && \
935 ( defined(__CTYPE_HAS_8_BIT_LOCALES) || defined(__CTYPE_HAS_UTF_8_LOCALES) )
937 static const unsigned char new_idx[] = {
938 0, 5, 5, 6, 10, 15, 28, 39,
939 48, 48, 71, 94, 113, 128, 139, 154,
940 175, 186, 188, 188, 188, 188, 188, 188,
941 203, 208, 208, 208, 208, 208, 208, 208,
942 208, 219, 219, 219, 222, 222, 222, 222,
943 222, 222, 222, 222, 222, 222, 222, 224,
944 224, 231, 231, 231, 231, 231, 231, 231,
945 231, 231, 231, 231, 231, 231, 231, 231,
946 231, 231, 231, 231, 231, 231, 231, 231,
947 231, 231, 231, 231, 231, 231, 231, 231,
948 231, 231, 231, 231, 231, 231, 231, 231,
949 231, 231, 231, 231, 231, 231, 231, 231,
950 231, 231, 231, 231, 231, 231, 231, 231,
951 231, 231, 231, 231, 231, 231, 231, 231,
952 231, 231, 231, 231, 231, 231, 231, 231,
953 231, 231, 231, 231, 231, 231, 231, 231,
954 231, 231, 231, 231, 231, 231, 231, 231,
955 231, 231, 231, 231, 231, 231, 231, 231,
956 231, 231, 231, 231, 231, 231, 231, 231,
957 231, 231, 231, 231, 231, 231, 231, 231,
958 231, 231, 231, 231, 231, 233, 233, 233,
959 233, 233, 233, 233, 234, 234, 234, 234,
960 234, 234, 234, 234, 234, 234, 234, 234,
961 234, 234, 234, 234, 234, 234, 234, 234,
962 234, 234, 234, 234, 234, 234, 234, 234,
963 234, 234, 234, 234, 234, 234, 234, 234,
964 234, 234, 234, 234, 234, 234, 234, 234,
965 236, 236, 236, 236, 236, 236, 236, 236,
966 236, 236, 236, 236, 236, 236, 236, 236,
967 236, 236, 236, 236, 236, 236, 236, 236,
968 236, 236, 236, 236, 236, 236, 236, 236,
969 236, 237, 237, 238, 241, 241, 242, 249,
973 static const unsigned char new_tbl[] = {
974 0x00, 0x01, 0x20, 0x7f, 0xa0, 0x00, 0x00, 0x50,
975 0x60, 0x70, 0x00, 0x83, 0x87, 0x88, 0x8a, 0x00,
976 0x91, 0xa2, 0xa3, 0xba, 0xbb, 0xbe, 0xbf, 0xc0,
977 0xc1, 0xc3, 0xc4, 0xc5, 0x00, 0x4b, 0x56, 0x70,
978 0x71, 0xd6, 0xe5, 0xe7, 0xe9, 0xea, 0xee, 0x00,
979 0x0f, 0x10, 0x11, 0x12, 0x30, 0x4b, 0xa6, 0xb1,
980 0x00, 0x01, 0x03, 0x3c, 0x3d, 0x41, 0x49, 0x4d,
981 0x4e, 0x51, 0x55, 0x62, 0x64, 0x81, 0x82, 0xbc,
982 0xbd, 0xc1, 0xc5, 0xcd, 0xce, 0xe2, 0xe4, 0x00,
983 0x02, 0x03, 0x3c, 0x3d, 0x41, 0x43, 0x47, 0x49,
984 0x4b, 0x4e, 0x70, 0x72, 0x81, 0x83, 0xbc, 0xbd,
985 0xc1, 0xc6, 0xc7, 0xc9, 0xcd, 0xce, 0x00, 0x01,
986 0x02, 0x3c, 0x3d, 0x3f, 0x40, 0x41, 0x44, 0x4d,
987 0x4e, 0x56, 0x57, 0x82, 0x83, 0xc0, 0xc1, 0xcd,
988 0xce, 0x00, 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4e,
989 0x55, 0x57, 0xbf, 0xc0, 0xc6, 0xc7, 0xcc, 0xce,
990 0x00, 0x41, 0x44, 0x4d, 0x4e, 0xca, 0xcb, 0xd2,
991 0xd5, 0xd6, 0xd7, 0x00, 0x31, 0x32, 0x34, 0x3b,
992 0x47, 0x4f, 0xb1, 0xb2, 0xb4, 0xba, 0xbb, 0xbd,
993 0xc8, 0xce, 0x00, 0x18, 0x1a, 0x35, 0x36, 0x37,
994 0x38, 0x39, 0x3a, 0x71, 0x7f, 0x80, 0x85, 0x86,
995 0x88, 0x90, 0x98, 0x99, 0xbd, 0xc6, 0xc7, 0x00,
996 0x2d, 0x31, 0x32, 0x33, 0x36, 0x38, 0x39, 0x3a,
997 0x58, 0x5a, 0x00, 0x60, 0x00, 0x12, 0x15, 0x32,
998 0x35, 0x52, 0x54, 0x72, 0x74, 0xb7, 0xbe, 0xc6,
999 0xc7, 0xc9, 0xd4, 0x00, 0x0b, 0x0f, 0xa9, 0xaa,
1000 0x00, 0x0b, 0x10, 0x2a, 0x2f, 0x60, 0x64, 0x6a,
1001 0x70, 0xd0, 0xeb, 0x00, 0x29, 0x2b, 0x00, 0x80,
1002 0x00, 0x2a, 0x30, 0x3f, 0x40, 0x99, 0x9b, 0x00,
1003 0xd0, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x1e,
1004 0x1f, 0x00, 0x00, 0x10, 0x20, 0x24, 0x30, 0x70,
1005 0xff, 0x00, 0x61, 0xe0, 0xe7, 0xf9, 0xfc,
1008 static const signed char new_wtbl[] = {
1009 0, -1, 1, -1, 1, 1, 0, 1,
1010 0, 1, 1, 0, 1, 0, 1, 1,
1011 0, 1, 0, 1, 0, 1, 0, 1,
1012 0, 1, 0, 1, 1, 0, 1, 0,
1013 1, 0, 1, 0, 1, 0, 1, 1,
1014 0, 1, 0, 1, 0, 1, 0, 1,
1015 1, 0, 1, 0, 1, 0, 1, 0,
1016 1, 0, 1, 0, 1, 0, 1, 0,
1017 1, 0, 1, 0, 1, 0, 1, 1,
1018 0, 1, 0, 1, 0, 1, 0, 1,
1019 0, 1, 0, 1, 0, 1, 0, 1,
1020 0, 1, 0, 1, 0, 1, 1, 0,
1021 1, 0, 1, 0, 1, 0, 1, 0,
1022 1, 0, 1, 0, 1, 0, 1, 0,
1023 1, 1, 0, 1, 0, 1, 0, 1,
1024 0, 1, 0, 1, 0, 1, 0, 1,
1025 1, 0, 1, 0, 1, 0, 1, 0,
1026 1, 0, 1, 1, 0, 1, 0, 1,
1027 0, 1, 0, 1, 0, 1, 0, 1,
1028 0, 1, 1, 0, 1, 0, 1, 0,
1029 1, 0, 1, 0, 1, 0, 1, 0,
1030 1, 0, 1, 0, 1, 0, 1, 1,
1031 0, 1, 0, 1, 0, 1, 0, 1,
1032 0, 1, 2, 0, 1, 0, 1, 0,
1033 1, 0, 1, 0, 1, 0, 1, 0,
1034 1, 0, 1, 1, 0, 1, 0, 1,
1035 1, 0, 1, 0, 1, 0, 1, 0,
1036 1, 0, 1, 1, 2, 1, 1, 2,
1037 2, 0, 2, 1, 2, 0, 2, 2,
1038 1, 1, 2, 1, 1, 2, 1, 0,
1039 1, 1, 0, 1, 0, 1, 2, 1,
1040 0, 2, 1, 2, 1, 0, 1,
1043 /* libc_hidden_proto(wcsnrtombs) */
1045 int wcswidth(const wchar_t *pwcs, size_t n)
1051 if (ENCODING == __ctype_encoding_7_bit) {
1054 for (i = 0 ; (i < n) && pwcs[i] ; i++) {
1055 if (pwcs[i] != (pwcs[i] & 0x7f)) {
1060 #ifdef __CTYPE_HAS_8_BIT_LOCALES
1061 else if (ENCODING == __ctype_encoding_8_bit) {
1064 mbstate.__mask = 0; /* Initialize the mbstate. */
1065 if (wcsnrtombs(NULL, &pwcs, n, SIZE_MAX, &mbstate) == ((size_t) - 1)) {
1069 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
1070 #if defined(__CTYPE_HAS_UTF_8_LOCALES) && defined(KUHN)
1071 /* For stricter handling of allowed unicode values... see comments above. */
1072 else if (ENCODING == __ctype_encoding_utf8) {
1075 for (i = 0 ; (i < n) && pwcs[i] ; i++) {
1076 if ( (((__uwchar_t)((pwcs[i]) - 0xfffeU)) < 2)
1077 || (((__uwchar_t)((pwcs[i]) - 0xd800U)) < (0xe000U - 0xd800U))
1083 #endif /* __CTYPE_HAS_UTF_8_LOCALES */
1085 for (count = 0 ; n && (wc = *pwcs++) ; n--) {
1087 /* If we're here, wc != 0. */
1088 if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
1094 if (((unsigned int) wc) <= 0xffff) {
1099 while ((m = (l+h) >> 1) != l) {
1100 if (b >= new_tbl[m]) {
1102 } else { /* wc < tbl[m] */
1106 count += new_wtbl[l]; /* none should be -1. */
1110 /* Redo this to minimize average number of compares?*/
1111 if (wc >= 0x1d167) {
1112 if (wc <= 0x1d1ad) {
1118 || (wc >= 0x1d1aa))))))
1122 } else if (((wc >= 0xe0020) && (wc <= 0xe007f)) || (wc == 0xe0001)) {
1124 } else if ((wc >= 0x20000) && (wc <= 0x2ffff)) {
1125 ++count; /* need 2.. add one here */
1127 #if (WCHAR_MAX > 0x7fffffffL)
1128 else if (wc > 0x7fffffffL) {
1131 #endif /* (WCHAR_MAX > 0x7fffffffL) */
1140 #else /* __UCLIBC_HAS_LOCALE__ */
1142 int wcswidth(const wchar_t *pwcs, size_t n)
1147 for (count = 0 ; n && (wc = *pwcs++) ; n--) {
1149 /* If we're here, wc != 0. */
1150 if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
1163 #endif /* __UCLIBC_HAS_LOCALE__ */
1165 libc_hidden_def(wcswidth)
1168 /**********************************************************************/
1171 /* libc_hidden_proto(wcswidth) */
1173 int wcwidth(wchar_t wc)
1175 return wcswidth(&wc, 1);
1179 /**********************************************************************/
1184 mbstate_t fromstate;
1192 int skip_invalid_input; /* To support iconv -c option. */
1202 #include <byteswap.h>
1204 #if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)
1205 #error unsupported endianness for iconv
1208 #ifndef __CTYPE_HAS_8_BIT_LOCALES
1209 #error currently iconv requires 8 bit locales
1211 #ifndef __CTYPE_HAS_UTF_8_LOCALES
1212 #error currently iconv requires UTF-8 locales
1218 IC_MULTIBYTE = 0xe0,
1219 #if __BYTE_ORDER == __BIG_ENDIAN
1234 /* For the multibyte
1235 * bit 0 means swap endian
1236 * bit 1 means 2 byte
1237 * bit 2 means 4 byte
1241 //vda:TODO: make hidden
1243 extern const unsigned char __iconv_codesets[];
1244 /* libc_hidden_proto(__iconv_codesets) */
1245 const unsigned char __iconv_codesets[] =
1246 "\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
1247 #if __BYTE_ORDER == __BIG_ENDIAN
1248 "\x08\xec""UCS-4\x00" /* always BE */
1249 "\x0a\xec""UCS-4BE\x00"
1250 "\x0a\xed""UCS-4LE\x00"
1251 "\x09\fe4""UTF-32\x00" /* platform endian with BOM */
1252 "\x0b\xe4""UTF-32BE\x00"
1253 "\x0b\xe5""UTF-32LE\x00"
1254 "\x08\xe2""UCS-2\x00" /* always BE */
1255 "\x0a\xe2""UCS-2BE\x00"
1256 "\x0a\xe3""UCS-2LE\x00"
1257 "\x09\xea""UTF-16\x00" /* platform endian with BOM */
1258 "\x0b\xea""UTF-16BE\x00"
1259 "\x0b\xeb""UTF-16LE\x00"
1260 #elif __BYTE_ORDER == __LITTLE_ENDIAN
1261 "\x08\xed""UCS-4\x00" /* always BE */
1262 "\x0a\xed""UCS-4BE\x00"
1263 "\x0a\xec""UCS-4LE\x00"
1264 "\x09\xf4""UTF-32\x00" /* platform endian with BOM */
1265 "\x0b\xe5""UTF-32BE\x00"
1266 "\x0b\xe4""UTF-32LE\x00"
1267 "\x08\xe3""UCS-2\x00" /* always BE */
1268 "\x0a\xe3""UCS-2BE\x00"
1269 "\x0a\xe2""UCS-2LE\x00"
1270 "\x09\xfa""UTF-16\x00" /* platform endian with BOM */
1271 "\x0b\xeb""UTF-16BE\x00"
1272 "\x0b\xea""UTF-16LE\x00"
1274 "\x08\x02""UTF-8\x00"
1275 "\x0b\x01""US-ASCII\x00"
1276 "\x07\x01""ASCII"; /* Must be last! (special case to save a nul) */
1277 libc_hidden_data_def(__iconv_codesets)
1279 /* Experimentally off - libc_hidden_proto(strcasecmp) */
1281 static int find_codeset(const char *name)
1283 const unsigned char *s;
1286 for (s = __iconv_codesets ; *s ; s += *s) {
1287 if (!strcasecmp(s+2, name)) {
1292 /* The following is ripped from find_locale in locale.c. */
1294 /* TODO: maybe CODESET_LIST + *s ??? */
1295 /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
1297 s = __LOCALE_DATA_CODESET_LIST;
1299 ++codeset; /* Increment codeset first. */
1300 if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
1305 return 0; /* No matching codeset! */
1308 iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
1310 register _UC_iconv_t *px;
1311 int tocodeset, fromcodeset;
1313 if (((tocodeset = find_codeset(tocode)) != 0)
1314 && ((fromcodeset = find_codeset(fromcode)) != 0)) {
1315 if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {
1316 px->tocodeset = tocodeset;
1317 px->tobom0 = px->tobom = (tocodeset & 0x10) >> 4;
1318 px->fromcodeset0 = px->fromcodeset = fromcodeset;
1319 px->frombom0 = px->frombom = (fromcodeset & 0x10) >> 4;
1320 px->skip_invalid_input = px->tostate.__mask
1321 = px->fromstate.__mask = 0;
1322 return (iconv_t) px;
1325 __set_errno(EINVAL);
1327 return (iconv_t)(-1);
1330 int weak_function iconv_close(iconv_t cd)
1337 size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
1338 size_t *__restrict inbytesleft,
1339 char **__restrict outbuf,
1340 size_t *__restrict outbytesleft)
1342 _UC_iconv_t *px = (_UC_iconv_t *) cd;
1347 assert(px != (_UC_iconv_t *)(-1));
1348 assert(sizeof(wchar_t) == 4);
1350 if (!inbuf || !*inbuf) { /* Need to reinitialze conversion state. */
1351 /* Note: For shift-state encodings we possibly need to output the
1352 * shift sequence to return to initial state! */
1353 if ((px->fromcodeset & 0xf0) == 0xe0) {
1355 px->tostate.__mask = px->fromstate.__mask = 0;
1356 px->fromcodeset = px->fromcodeset0;
1357 px->tobom = px->tobom0;
1358 px->frombom = px->frombom0;
1363 while (*inbytesleft) {
1364 if (!*outbytesleft) {
1371 if (px->fromcodeset >= IC_MULTIBYTE) {
1372 inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);
1373 if (*inbytesleft < inci) goto INVALID;
1374 wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)
1375 + ((unsigned char)((*inbuf)[1]));
1377 wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
1378 + ((unsigned char)((*inbuf)[3])) + (wc << 16);
1379 if (!(px->fromcodeset & 1)) wc = bswap_32(wc);
1381 if (!(px->fromcodeset & 1)) wc = bswap_16(wc);
1382 if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)
1383 && (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))
1386 if (*inbytesleft < 4) goto INVALID;
1387 wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
1388 + ((unsigned char)((*inbuf)[3]));
1389 if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);
1390 if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {
1393 inci = 4; /* Change inci here in case skipping illegals. */
1394 wc = 0x10000UL + (wc << 10) + wc2;
1401 || (wc == ((inci == 4)
1402 ? (((wchar_t) 0xfffe0000UL))
1403 : ((wchar_t)(0xfffeUL))))
1405 if (wc != 0xfeffU) {
1406 px->fromcodeset ^= 1; /* toggle endianness */
1410 goto BOM_SKIP_OUTPUT;
1416 if (px->fromcodeset != IC_WCHAR_T) {
1417 if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)
1418 ? 0x7fffffffUL : 0x10ffffUL)
1420 || (((__uwchar_t)(wc - 0xfffeU)) < 2)
1421 || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
1427 } else if (px->fromcodeset == IC_UTF_8) {
1428 const char *p = *inbuf;
1429 r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);
1430 if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */
1431 if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */
1432 assert((r == (size_t)(-1)) || (r == (size_t)(-2)));
1433 if (r == (size_t)(-2)) {
1435 __set_errno(EINVAL);
1437 px->fromstate.__mask = 0;
1440 if (px->skip_invalid_input) {
1441 px->skip_invalid_input = 2; /* flag for iconv utility */
1442 goto BOM_SKIP_OUTPUT;
1444 __set_errno(EILSEQ);
1446 return (size_t)(-1);
1448 #ifdef __UCLIBC_MJN3_ONLY__
1449 #warning TODO: optimize this.
1451 if (p != NULL) { /* incomplete char case */
1454 p = *inbuf + 1; /* nul */
1457 } else if ((wc = ((unsigned char)(**inbuf))) >= 0x80) { /* Non-ASCII... */
1458 if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
1460 } else { /* some other 8-bit ascii-extension codeset */
1461 const __codeset_8_bit_t *c8b
1462 = __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
1464 wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
1465 (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
1466 << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
1481 if (px->tocodeset >= IC_MULTIBYTE) {
1482 inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);
1483 if (*outbytesleft < inco) goto TOO_BIG;
1484 if (px->tocodeset != IC_WCHAR_T) {
1485 if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)
1486 ? 0x7fffffffUL : 0x10ffffUL)
1488 || (((__uwchar_t)(wc - 0xfffeU)) < 2)
1489 || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
1498 if (px->tocodeset & 1) wc = bswap_32(wc);
1500 if (((__uwchar_t)wc ) > 0xffffU) {
1501 if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {
1504 if (*outbytesleft < (inco = 4)) goto TOO_BIG;
1505 wc2 = 0xdc00U + (wc & 0x3ff);
1506 wc = 0xd800U + ((wc >> 10) & 0x3ff);
1507 if (px->tocodeset & 1) {
1509 wc2 = bswap_16(wc2);
1512 } else if (px->tocodeset & 1) wc = bswap_16(wc);
1514 (*outbuf)[0] = (char)((unsigned char)(wc));
1515 (*outbuf)[1] = (char)((unsigned char)(wc >> 8));
1517 (*outbuf)[2] = (char)((unsigned char)(wc >> 16));
1518 (*outbuf)[3] = (char)((unsigned char)(wc >> 24));
1520 } else if (px->tocodeset == IC_UTF_8) {
1521 const wchar_t *pw = &wc;
1523 r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
1524 if (r != (size_t)(-1)) {
1525 #ifdef __UCLIBC_MJN3_ONLY__
1526 #warning TODO: What happens for a nul?
1540 } else if (((__uwchar_t)(wc)) < 0x80) {
1544 if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
1545 const __codeset_8_bit_t *c8b
1546 = __locale_mmap->codeset_8_bit + px->tocodeset - 3;
1548 u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
1549 u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
1550 + ((wc >> Cwc2c_TT_SHIFT)
1551 & ((1 << Cwc2c_TI_SHIFT)-1))];
1552 wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
1553 + (u << Cwc2c_TT_SHIFT)
1554 + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
1564 *outbytesleft -= inco;
1567 *inbytesleft -= inci;
1573 /**********************************************************************/
1581 extern const unsigned char __iconv_codesets[];
1589 static void error_msg(const char *fmt, ...)
1590 __attribute__ ((noreturn, format (printf, 1, 2)));
1592 static void error_msg(const char *fmt, ...)
1597 fprintf(stderr, "%s: ", progname);
1599 vfprintf(stderr, fmt, arg);
1606 int main(int argc, char **argv)
1609 FILE *ofile = stdout;
1612 static const char opt_chars[] = "tfocsl";
1614 const char *opts[sizeof(opt_chars)]; /* last is infile name */
1620 size_t ni, no, r, pos;
1624 for (s = opt_chars ; *s ; s++) {
1625 opts[ s - opt_chars ] = NULL;
1631 if ((*p != '-') || (*++p == 0)) {
1635 if ((s = strchr(opt_chars,*p)) == NULL) {
1637 s = basename(progname);
1639 "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
1640 " or\n%s -l\n", s, s);
1641 return EXIT_FAILURE;
1643 if ((s - opt_chars) < 3) {
1644 if ((--argc == 0) || opts[s - opt_chars]) {
1647 opts[s - opt_chars] = *++argv;
1649 opts[s - opt_chars] = p;
1654 if (opts[5]) { /* -l */
1655 fprintf(stderr, "Recognized codesets:\n");
1656 for (s = __iconv_codesets ; *s ; s += *s) {
1657 fprintf(stderr," %s\n", s+2);
1659 s = __LOCALE_DATA_CODESET_LIST;
1661 fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
1664 return EXIT_SUCCESS;
1671 if (!opts[0] || !opts[1]) {
1674 if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
1675 error_msg( "unsupported codeset in %s -> %s conversion\n", opts[0], opts[1]);
1677 if (opts[3]) { /* -c */
1678 ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
1681 if ((s = opts[2]) != NULL) {
1682 if (!(ofile = fopen(s, "w"))) {
1683 error_msg( "couldn't open %s for writing\n", s);
1689 if (!argc || ((**argv == '-') && !((*argv)[1]))) {
1690 ifile = stdin; /* we don't check for duplicates */
1691 } else if (!(ifile = fopen(*argv, "r"))) {
1692 error_msg( "couldn't open %s for reading\n", *argv);
1695 while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
1701 if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
1702 if ((errno != EINVAL) && (errno != E2BIG)) {
1703 error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
1706 if ((r = OBUF - no) > 0) {
1707 if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
1708 error_msg( "write error\n");
1711 if (ni) { /* still bytes in buffer! */
1712 memmove(ibuf, pi, ni);
1716 if (ferror(ifile)) {
1717 error_msg( "read error\n");
1722 if (ifile != stdin) {
1726 } while (--argc > 0);
1731 error_msg( "incomplete sequence\n");
1734 return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
1735 ? EXIT_SUCCESS : EXIT_FAILURE;
1739 /**********************************************************************/