1 /* iconvtc.c -- Interface to iconv transcoding routines
3 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4 See tidy.h for the copyright notice.
12 #ifdef TIDY_ICONV_SUPPORT
16 /* maximum number of bytes for a single character */
17 #define TC_INBUFSIZE 16
19 /* maximum number of characters per byte sequence */
20 #define TC_OUTBUFSIZE 16
22 Bool IconvInitInputTranscoder(void)
27 void IconvUninitInputTranscoder(void)
32 int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
35 TidyInputSource * source;
36 char inbuf[TC_INBUFSIZE] = { 0 };
37 char outbuf[TC_OUTBUFSIZE] = { 0 };
41 assert( &in->source != NULL );
42 assert( bytesRead != NULL );
43 assert( in->iconvptr != 0 );
45 cd = (iconv_t)in->iconvptr;
48 inbuf[inbufsize++] = (char)firstByte;
50 while(inbufsize < TC_INBUFSIZE)
52 char * outbufptr = (char*)outbuf;
53 char * inbufptr = (char*)inbuf;
54 size_t readNow = inbufsize;
55 size_t writeNow = TC_OUTBUFSIZE;
58 int nextByte = EndOfStream;
60 result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
63 if (result != (size_t)(-1))
67 /* create codepoint from UTF-32LE octets */
68 c = (unsigned char)outbuf[0];
69 c += (unsigned char)outbuf[1] << 8;
70 c += (unsigned char)outbuf[2] << 16;
71 c += (unsigned char)outbuf[3] << 32;
73 /* set number of read bytes */
74 *bytesRead = inbufsize;
79 assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
80 assert( iconv_errno != E2BIG ); /* not enough memory */
81 assert( iconv_errno == EINVAL ); /* incomplete sequence */
83 /* we need more bytes */
84 nextByte = source->getByte(source->sourceData);
86 if (nextByte == EndOfStream)
88 /* todo: error message for broken stream? */
90 *bytesRead = inbufsize;
94 inbuf[inbufsize++] = (char)nextByte;
97 /* No full character found after reading TC_INBUFSIZE bytes, */
98 /* give up to read this stream, it's obviously unreadable. */
100 /* todo: error message for broken stream? */
104 #endif /* TIDY_ICONV_SUPPORT */