3 #if !defined (STLPORT) || !defined (_STLP_USE_NO_IOSTREAMS)
7 # include <cstdio> // for WEOF
9 # include "cppunit/cppunit_proxy.h"
11 # if !defined (STLPORT) || defined(_STLP_USE_NAMESPACES)
18 class CodecvtTest : public CPPUNIT_NS::TestCase
20 CPPUNIT_TEST_SUITE(CodecvtTest);
21 #if defined (STLPORT) && defined (_STLP_NO_MEMBER_TEMPLATES)
24 CPPUNIT_TEST(variable_encoding);
26 #if defined (STLPORT) && (defined (_STLP_NO_WCHAR_T) || !defined (_STLP_USE_EXCEPTIONS))
29 CPPUNIT_TEST(in_out_test);
30 CPPUNIT_TEST(length_test);
31 CPPUNIT_TEST(imbue_while_reading);
32 CPPUNIT_TEST(special_encodings);
33 CPPUNIT_TEST_SUITE_END();
36 void variable_encoding();
39 void imbue_while_reading();
40 void special_encodings();
43 CPPUNIT_TEST_SUITE_REGISTRATION(CodecvtTest);
46 # define __NO_THROW _STLP_NOTHROW
48 # define __NO_THROW throw()
52 /* Codecvt facet eating some characters from the external buffer.
53 * Transform '01' in 'a'
55 struct eater_codecvt : public codecvt<char, char, mbstate_t> {
56 typedef codecvt<char,char,mbstate_t> base;
58 explicit eater_codecvt(size_t refs = 0) : base(refs) {}
60 // primitive conversion
63 const char* ebegin, const char* eend, const char*& ecur,
64 char* ibegin, char* iend, char*& icur) const __NO_THROW {
65 char *state = (char*)&mb;
69 while (ecur != eend) {
72 if (*ecur == '0' || *state == 1) {
107 // claim it's not a null-conversion
108 virtual bool do_always_noconv() const __NO_THROW
111 // claim it doesn't have a fixed-length encoding
112 virtual int do_encoding() const __NO_THROW
115 // implemented for consistency with do_in overload
116 virtual int do_length(mbstate_t &state,
117 const char *efrom, const char *eend, size_t m) const {
118 char *ibegin = new char[m];
119 const char *ecur = efrom;
121 mbstate_t tmp = state;
122 do_in(tmp, efrom, eend, ecur, ibegin, ibegin + m, icur);
127 virtual int do_max_length() const __NO_THROW
131 static locale::id id;
136 locale::id eater_codecvt::id;
138 locale::id& _GetFacetId(const eater_codecvt*)
139 { return eater_codecvt::id; }
142 /* Codecvt facet generating more characters than the ones read from the
143 * external buffer, transform '01' in 'abc'
144 * This kind of facet do not allow systematical positionning in the external
145 * buffer (tellg -> -1), when you just read a 'a' you are at an undefined
146 * external buffer position.
148 struct generator_codecvt : public codecvt<char, char, mbstate_t> {
149 typedef codecvt<char,char,mbstate_t> base;
151 explicit generator_codecvt(size_t refs = 0) : base(refs) {}
153 // primitive conversion
156 const char* ebegin, const char* eend, const char*& ecur,
157 char* ibegin, char* iend, char*& icur) const __NO_THROW {
158 //Access the mbstate information in a portable way:
159 char *state = (char*)&mb;
163 if (icur == iend) return ok;
174 else if (*state == 3) {
179 while (ecur != eend) {
182 if (*ecur == '0' || *state == 1) {
227 // claim it's not a null-conversion
228 virtual bool do_always_noconv() const __NO_THROW
231 // claim it doesn't have a fixed-length encoding
232 virtual int do_encoding() const __NO_THROW
235 // implemented for consistency with do_in overload
236 virtual int do_length(mbstate_t &mb,
237 const char *efrom, const char *eend, size_t m) const {
238 const char *state = (const char*)&mb;
242 else if (*state == 3)
245 char *ibegin = new char[m + offset];
246 const char *ecur = efrom;
248 mbstate_t tmpState = mb;
249 do_in(tmpState, efrom, eend, ecur, ibegin, ibegin + m + offset, icur);
251 char *state = (char*)&tmpState;
255 else if (*state == 2 || *state == 3) {
256 //Undefined position, we return -1:
261 if (*((char*)&mb) != 0) {
262 //We take into account the character that hasn't been counted yet in
263 //the previous decoding step:
269 return (int)min((size_t)(ecur - efrom), m);
272 virtual int do_max_length() const __NO_THROW
275 static locale::id id;
280 locale::id generator_codecvt::id;
282 locale::id& _GetFacetId(const generator_codecvt*)
283 { return generator_codecvt::id; }
287 // tests implementation
290 void CodecvtTest::variable_encoding()
292 #if !defined (STLPORT) || !defined (_STLP_NO_MEMBER_TEMPLATES)
293 //We first generate the file used for test:
294 const char* fileName = "test_file.txt";
296 ofstream ostr(fileName);
297 //Maybe we simply do not have write access to repository
298 CPPUNIT_ASSERT( ostr.good() );
299 for (int i = 0; i < 2048; ++i) {
300 ostr << "0123456789";
302 CPPUNIT_ASSERT( ostr.good() );
306 ifstream istr(fileName);
307 CPPUNIT_ASSERT( istr.good() );
308 CPPUNIT_ASSERT( !istr.eof() );
310 eater_codecvt codec(1);
311 locale loc(locale::classic(), &codec);
314 CPPUNIT_ASSERT( istr.good() );
315 CPPUNIT_ASSERT( (int)istr.tellg() == 0 );
317 int theoricalPos = 0;
320 if (char_traits<char>::eq_int_type(c, char_traits<char>::eof())) {
328 CPPUNIT_ASSERT( (int)istr.tellg() == theoricalPos );
332 CPPUNIT_ASSERT( istr.eof() );
337 /* This test is broken, not sure if it is really possible to get a position in
338 * a locale having a codecvt such as generator_codecvt. Maybe generator_codecvt
339 * is not a valid theorical example of codecvt implementation. */
341 ifstream istr(fileName);
342 CPPUNIT_ASSERT( istr.good() );
343 CPPUNIT_ASSERT( !istr.eof() );
345 generator_codecvt codec(1);
346 locale loc(locale::classic(), &codec);
349 CPPUNIT_ASSERT( istr.good() );
350 CPPUNIT_ASSERT( (int)istr.tellg() == 0 );
352 int theoricalPos = 0;
356 if (c == char_traits<char>::eof()) {
368 theoricalTellg = theoricalPos;
372 if ((int)istr.tellg() != theoricalTellg) {
373 CPPUNIT_ASSERT( (int)istr.tellg() == theoricalTellg );
378 CPPUNIT_ASSERT( istr.eof() );
384 void CodecvtTest::in_out_test()
386 #if !defined (STLPORT) || !(defined (_STLP_NO_WCHAR_T) || !defined (_STLP_USE_EXCEPTIONS))
390 typedef codecvt<wchar_t, char, mbstate_t> cdecvt_type;
391 if (has_facet<cdecvt_type>(loc)) {
392 cdecvt_type const& cdect = use_facet<cdecvt_type>(loc);
394 cdecvt_type::state_type state;
395 memset(&state, 0, sizeof(cdecvt_type::state_type));
396 string from("abcdef");
397 const char* next_from;
400 cdecvt_type::result res = cdect.in(state, from.data(), from.data() + from.size(), next_from,
401 to, to + sizeof(to) / sizeof(wchar_t), next_to);
402 CPPUNIT_ASSERT( res == cdecvt_type::ok );
403 CPPUNIT_ASSERT( next_from == from.data() + 1 );
404 CPPUNIT_ASSERT( next_to == &to[0] + 1 );
405 CPPUNIT_ASSERT( to[0] == L'a');
408 cdecvt_type::state_type state;
409 memset(&state, 0, sizeof(cdecvt_type::state_type));
410 wstring from(L"abcdef");
411 const wchar_t* next_from;
414 cdecvt_type::result res = cdect.out(state, from.data(), from.data() + from.size(), next_from,
415 to, to + sizeof(to) / sizeof(char), next_to);
416 CPPUNIT_ASSERT( res == cdecvt_type::ok );
417 CPPUNIT_ASSERT( next_from == from.data() + 1 );
418 CPPUNIT_ASSERT( next_to == &to[0] + 1 );
419 CPPUNIT_ASSERT( to[0] == 'a');
423 catch (runtime_error const&) {
431 void CodecvtTest::length_test()
433 #if !defined (STLPORT) || !(defined (_STLP_NO_WCHAR_T) || !defined (_STLP_USE_EXCEPTIONS))
437 typedef codecvt<wchar_t, char, mbstate_t> cdecvt_type;
438 if (has_facet<cdecvt_type>(loc)) {
439 cdecvt_type const& cdect = use_facet<cdecvt_type>(loc);
441 cdecvt_type::state_type state;
442 memset(&state, 0, sizeof(cdecvt_type::state_type));
443 string from("abcdef");
444 int res = cdect.length(state, from.data(), from.data() + from.size(), from.size());
445 CPPUNIT_ASSERT( (size_t)res == from.size() );
449 catch (runtime_error const&) {
457 #if !defined (STLPORT) || !(defined (_STLP_NO_WCHAR_T) || !defined (_STLP_USE_EXCEPTIONS))
458 typedef std::codecvt<wchar_t, char, mbstate_t> my_codecvt_base;
460 class my_codecvt : public my_codecvt_base {
462 explicit my_codecvt(size_t r = 0)
463 : my_codecvt_base(r) {}
466 virtual result do_in(state_type& /*state*/, const extern_type* first1,
467 const extern_type* last1, const extern_type*& next1,
468 intern_type* first2, intern_type* last2,
469 intern_type*& next2) const {
470 for ( next1 = first1, next2 = first2; next1 < last1; next1 += 2 ) {
471 if ( (last1 - next1) < 2 || (last2 - next2) < 1 )
473 *next2++ = (intern_type)((*(next1 + 1) << 8) | (*next1 & 255));
477 virtual bool do_always_noconv() const __NO_THROW
479 virtual int do_max_length() const __NO_THROW
481 virtual int do_encoding() const __NO_THROW
486 void CodecvtTest::imbue_while_reading()
488 #if !defined (STLPORT) || !(defined (_STLP_NO_WCHAR_T) || !defined (_STLP_USE_EXCEPTIONS))
490 wofstream ofs( "test.txt" );
491 const wchar_t buf[] = L" ";
492 for ( int i = 0; i < 4098; ++i ) {
497 wifstream ifs("test.txt"); // a file containing 4098 wchars
499 ifs.imbue( locale(locale(), new my_codecvt) );
502 ifs.imbue( locale() );
505 CPPUNIT_CHECK( ch != (int)WEOF );
509 void CodecvtTest::special_encodings()
511 #if !defined (STLPORT) || (!defined (_STLP_NO_WCHAR_T) && defined (_STLP_USE_EXCEPTIONS))
513 locale loc(locale::classic(), new codecvt_byname<wchar_t, char, mbstate_t>("C"));
514 codecvt<wchar_t, char, mbstate_t> const& cvt = use_facet<codecvt<wchar_t, char, mbstate_t> >(loc);
516 memset(&state, 0, sizeof(mbstate_t));
518 const char *from_next;
521 CPPUNIT_ASSERT( cvt.in(state, &c, &c + 1, from_next, &wc, &wc, to_next) == codecvt_base::ok );
522 CPPUNIT_ASSERT( to_next == &wc );
523 CPPUNIT_ASSERT( cvt.in(state, &c, &c + 1, from_next, &wc, &wc + 1, to_next) == codecvt_base::ok );
524 CPPUNIT_ASSERT( wc == L'0' );
525 CPPUNIT_ASSERT( to_next == &wc + 1 );
530 const string cp936_str = "\xd6\xd0\xb9\xfa\xc9\xe7\xbb\xe1\xbf\xc6\xd1\xa7\xd4\xba\xb7\xa2\xb2\xbc\x32\x30\x30\x38\xc4\xea\xa1\xb6\xbe\xad\xbc\xc3\xc0\xb6\xc6\xa4\xca\xe9\xa1\xb7\xd6\xb8\xb3\xf6\xa3\xac\x32\x30\x30\x37\xc4\xea\xd6\xd0\xb9\xfa\xbe\xad\xbc\xc3\xd4\xf6\xb3\xa4\xd3\xc9\xc6\xab\xbf\xec\xd7\xaa\xcf\xf2\xb9\xfd\xc8\xc8\xb5\xc4\xc7\xf7\xca\xc6\xc3\xf7\xcf\xd4\xd4\xa4\xbc\xc6\xc8\xab\xc4\xea\x47\x44\x50\xd4\xf6\xcb\xd9\xbd\xab\xb4\xef\x31\x31\x2e\x36\x25\xa1\xa3";
531 locale loc(locale::classic(), ".936", locale::ctype);
532 codecvt<wchar_t, char, mbstate_t> const& cvt = use_facet<codecvt<wchar_t, char, mbstate_t> >(loc);
534 memset(&state, 0, sizeof(mbstate_t));
536 codecvt_base::result res;
540 // Check we will have enough room for the generated wide string generated from the whole char buffer:
541 int len = cvt.length(state, cp936_str.data(), cp936_str.data() + cp936_str.size(), sizeof(wbuf) / sizeof(wchar_t));
542 CPPUNIT_ASSERT( cp936_str.size() == (size_t)len );
544 const char *from_next;
546 res = cvt.in(state, cp936_str.data(), cp936_str.data() + cp936_str.size(), from_next,
547 wbuf, wbuf + sizeof(wbuf) / sizeof(wchar_t), to_next);
548 CPPUNIT_ASSERT( res == codecvt_base::ok );
549 CPPUNIT_ASSERT( from_next == cp936_str.data() + cp936_str.size() );
550 cp936_wstr.assign(wbuf, to_next);
554 const wchar_t *from_next;
557 res = cvt.out(state, cp936_wstr.data(), cp936_wstr.data() + cp936_wstr.size(), from_next,
558 buf, buf + sizeof(buf), to_next);
559 CPPUNIT_ASSERT( res == codecvt_base::ok );
560 CPPUNIT_CHECK( string(buf, to_next) == cp936_str );
563 catch (const runtime_error&)
565 CPPUNIT_MESSAGE("Not enough platform localization support to check 936 code page encoding.");
569 const string utf8_str = "\xe4\xb8\xad\xe5\x9b\xbd\xe7\xa4\xbe\xe4\xbc\x9a\xe7\xa7\x91\xe5\xad\xa6\xe9\x99\xa2\xe5\x8f\x91\xe5\xb8\x83\x32\x30\x30\x38\xe5\xb9\xb4\xe3\x80\x8a\xe7\xbb\x8f\xe6\xb5\x8e\xe8\x93\x9d\xe7\x9a\xae\xe4\xb9\xa6\xe3\x80\x8b\xe6\x8c\x87\xe5\x87\xba\xef\xbc\x8c\x32\x30\x30\x37\xe5\xb9\xb4\xe4\xb8\xad\xe5\x9b\xbd\xe7\xbb\x8f\xe6\xb5\x8e\xe5\xa2\x9e\xe9\x95\xbf\xe7\x94\xb1\xe5\x81\x8f\xe5\xbf\xab\xe8\xbd\xac\xe5\x90\x91\xe8\xbf\x87\xe7\x83\xad\xe7\x9a\x84\xe8\xb6\x8b\xe5\x8a\xbf\xe6\x98\x8e\xe6\x98\xbe\xe9\xa2\x84\xe8\xae\xa1\xe5\x85\xa8\xe5\xb9\xb4\x47\x44\x50\xe5\xa2\x9e\xe9\x80\x9f\xe5\xb0\x86\xe8\xbe\xbe\x31\x31\x2e\x36\x25\xe3\x80\x82";
571 locale loc(locale::classic(), new codecvt_byname<wchar_t, char, mbstate_t>(".utf8"));
572 codecvt<wchar_t, char, mbstate_t> const& cvt = use_facet<codecvt<wchar_t, char, mbstate_t> >(loc);
574 memset(&state, 0, sizeof(mbstate_t));
576 codecvt_base::result res;
580 // Check we will have enough room for the wide string generated from the whole char buffer:
581 int len = cvt.length(state, utf8_str.data(), utf8_str.data() + utf8_str.size(), sizeof(wbuf) / sizeof(wchar_t));
582 CPPUNIT_ASSERT( utf8_str.size() == (size_t)len );
584 const char *from_next;
586 res = cvt.in(state, utf8_str.data(), utf8_str.data() + utf8_str.size(), from_next,
587 wbuf, wbuf + sizeof(wbuf) / sizeof(wchar_t), to_next);
588 CPPUNIT_ASSERT( res == codecvt_base::ok );
589 CPPUNIT_ASSERT( from_next == utf8_str.data() + utf8_str.size() );
590 utf8_wstr.assign(wbuf, to_next);
592 // Try to read one char after the other:
594 const char* from = utf8_str.data();
595 const char* from_end = from + utf8_str.size();
596 from_next = utf8_str.data();
599 while (from + length <= from_end) {
600 res = cvt.in(state, from, from + length, from_next,
601 &wc, &wc + 1, to_next);
603 case codecvt_base::ok:
607 CPPUNIT_ASSERT( wc == utf8_wstr[windex++] );
610 case codecvt_base::partial:
611 if (from_next == from)
612 // from_next hasn't move so we have to pass more chars
615 // char between from and from_next has been eaten, we simply restart
616 // conversion from from_next:
619 case codecvt_base::error:
620 case codecvt_base::noconv:
625 CPPUNIT_ASSERT( windex == utf8_wstr.size() );
629 const wchar_t *from_next;
632 res = cvt.out(state, utf8_wstr.data(), utf8_wstr.data() + utf8_wstr.size(), from_next,
633 buf, buf + sizeof(buf), to_next);
634 CPPUNIT_ASSERT( res == codecvt_base::ok );
635 CPPUNIT_CHECK( string(buf, to_next) == utf8_str );
639 // Check that an obviously wrong UTF8 encoded string is correctly detected:
640 const string bad_utf8_str("\xdf\xdf\xdf\xdf\xdf");
642 const char *from_next;
644 res = cvt.in(state, bad_utf8_str.data(), bad_utf8_str.data() + bad_utf8_str.size(), from_next,
645 &wc, &wc + 1, to_next);
646 CPPUNIT_ASSERT( res == codecvt_base::error );
649 catch (const runtime_error&)
651 CPPUNIT_MESSAGE("Not enough platform localization support to check UTF8 encoding.");