3 * @author Perry Rapp, Creator, 2003-2004
4 * @date Created: 2003-10
5 * @date Edited: 2006-02-20 (Perry Rapp)
7 * @brief Declaration of utility unicode conversion routines
11 #include "UnicodeString.h"
17 * @brief A simple buffer struct.
21 unsigned char * ptr; /**< Pointer to a buffer. */
22 size_t capacity; /**< Buffer's size in bytes. */
23 size_t size; /**< Size of the data in the buffer, <= capacity. */
25 explicit buffer(size_t initialSize);
27 void resize(size_t newSize);
42 /** @brief Known Unicode encodings. */
43 enum UNICODESET : char
45 NONE = 0, /**< No unicode. */
46 UCS2LE, /**< UCS-2 / UTF-16 little endian. */
47 UCS2BE, /**< UCS-2 / UTF-16 big endian. */
49 UCS4LE, /**< UTF-32 little endian */
50 UCS4BE, /**< UTF-32 big-endian */
53 int Ucs4_to_Utf8(unsigned unich, unsigned char * utf8);
54 int Utf8len_fromLeadByte(unsigned char ch);
55 int Utf8len_fromCodepoint(unsigned ch);
57 * @brief How many bytes will it take to write string as UTF-8 ?
59 * @param size size argument as filemapping are not 0 terminated
61 * @bug Fails for files larger than 2gigs
64 size_t Utf8len_of_string(const C* text, size_t size)
67 for (size_t i = 0; i < size; ++i)
69 int chlen = Utf8len_fromCodepoint(text[i]);
70 if (chlen < 1) chlen = 1;
75 size_t stringlen_of_utf8(const char* text, size_t size);
76 unsigned GetUtf8Char(unsigned char * str);
77 int to_utf8_advance(unsigned u, unsigned char * &lpd);
78 void maketchar(String & ch, unsigned unich, bool & lossy);
79 int writeBom(void* dest, UNICODESET unicoding);
80 int getBomSize(UNICODESET unicoding);
81 unsigned get_unicode_char(unsigned char * ptr, UNICODESET unicoding, int codepage=0);
82 bool maketstring(String & line, const char* lpd, size_t len, int codepage, bool * lossy);
83 void maketchar(String & ch, unsigned unich, bool & lossy, unsigned codepage);
84 unsigned byteToUnicode(unsigned char ch);
85 unsigned byteToUnicode(unsigned char ch, unsigned codepage);
86 void getInternalEncoding(UNICODESET * unicoding, int * codepage);
88 // generic function to do all conversions
89 bool convert(UNICODESET unicoding1, int codepage1, const unsigned char * src, size_t srcbytes, UNICODESET unicoding2, int codepage2, buffer * dest);
90 bool convert(int codepage1, const unsigned char * src, int srcbytes, int codepage2, buffer * dest);
92 unsigned char *convertTtoUTF8(buffer * dest, const TCHAR *src, int srcbytes = -1);
93 unsigned char *convertTtoUTF8(const TCHAR *src, int srcbytes = -1);
94 TCHAR *convertUTF8toT(buffer * dest, const char* src, int srcbytes = -1);
95 TCHAR *convertUTF8toT(const char* src, int srcbytes = -1);
96 void dealloc(void *ptr);
98 String toTString(const std::wstring& str);
99 String toTString(const std::string& str);
100 void toUTF16(const String& tstr, std::wstring& wstr);
101 inline std::wstring toUTF16(const String& tstr)
112 void toUTF8(const String& tstr, std::string& u8str);
113 std::string toUTF8(const String& tstr);
114 std::string toSystemCP(const std::string& str);
115 std::string toSystemCP(const std::wstring& str);
116 std::string toThreadCP(const std::string& str);
117 std::string toThreadCP(const std::wstring& str);
119 int CrossConvert(const char* src, unsigned srclen, char* dest, unsigned destsize, int cpin, int cpout, bool * lossy);
121 String CrossConvertToStringA(const char* src, unsigned srclen, int cpin, int cpout, bool * lossy);
124 bool CheckForInvalidUtf8(const char *pBuffer, size_t size);
126 UNICODESET DetermineEncoding(const unsigned char *pBuffer, uint64_t size, bool * pBom);
128 int getDefaultCodepage();
129 void setDefaultCodepage(int cp);
131 bool EqualCodepages(int cp1, int cp2);