3 * ====================================================================
\r
4 * Copyright (c) 2000-2004, 2008 CollabNet. All rights reserved.
\r
6 * This software is licensed as described in the file COPYING, which
\r
7 * you should have received as part of this distribution. The terms
\r
8 * are also available at http://subversion.tigris.org/license-1.html.
\r
9 * If newer versions of this license are posted there, you may use a
\r
10 * newer version instead, at your option.
\r
12 * This software consists of voluntary contributions made by many
\r
13 * individuals. For exact contribution history, see the revision
\r
14 * history and logs, available at http://subversion.tigris.org/.
\r
15 * ====================================================================
\r
19 * @brief UTF-8 conversion routines
\r
20 * Whenever a conversion routine cannot convert to or from UTF-8, the
\r
21 * error returned has code @c APR_EINVAL.
\r
29 #include <apr_pools.h>
\r
30 #include <apr_xlate.h> /* for APR_*_CHARSET */
\r
32 #include "svn_types.h"
\r
33 #include "svn_string.h"
\r
37 #endif /* __cplusplus */
\r
39 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
\r
40 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
\r
43 * Initialize the UTF-8 encoding/decoding routines.
\r
44 * Allocate cached translation handles in a subpool of @a pool.
\r
46 * @note It is optional to call this function, but if it is used, no other
\r
47 * svn function may be in use in other threads during the call of this
\r
48 * function or when @a pool is cleared or destroyed.
\r
49 * Initializing the UTF-8 routines will improve performance.
\r
51 * @since New in 1.1.
\r
54 svn_utf_initialize(apr_pool_t *pool);
\r
56 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
\r
57 * allocate @a *dest in @a pool.
\r
60 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
\r
61 const svn_stringbuf_t *src,
\r
65 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate
\r
66 * @a *dest in @a pool.
\r
69 svn_utf_string_to_utf8(const svn_string_t **dest,
\r
70 const svn_string_t *src,
\r
74 /** Set @a *dest to a utf8-encoded C string from native C string @a src;
\r
75 * allocate @a *dest in @a pool.
\r
78 svn_utf_cstring_to_utf8(const char **dest,
\r
83 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
\r
84 * string @a src; allocate @a *dest in @a pool.
\r
86 * @since New in 1.4.
\r
89 svn_utf_cstring_to_utf8_ex2(const char **dest,
\r
91 const char *frompage,
\r
95 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
\r
98 * @deprecated Provided for backward compatibility with the 1.3 API.
\r
102 svn_utf_cstring_to_utf8_ex(const char **dest,
\r
104 const char *frompage,
\r
105 const char *convset_key,
\r
109 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
\r
110 * allocate @a *dest in @a pool.
\r
113 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
\r
114 const svn_stringbuf_t *src,
\r
118 /** Set @a *dest to a natively-encoded string from utf8 string @a src;
\r
119 * allocate @a *dest in @a pool.
\r
122 svn_utf_string_from_utf8(const svn_string_t **dest,
\r
123 const svn_string_t *src,
\r
127 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
\r
128 * allocate @a *dest in @a pool.
\r
131 svn_utf_cstring_from_utf8(const char **dest,
\r
136 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
\r
137 * @a src; allocate @a *dest in @a pool.
\r
139 * @since New in 1.4.
\r
142 svn_utf_cstring_from_utf8_ex2(const char **dest,
\r
144 const char *topage,
\r
148 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
\r
151 * @deprecated Provided for backward compatibility with the 1.3 API.
\r
155 svn_utf_cstring_from_utf8_ex(const char **dest,
\r
157 const char *topage,
\r
158 const char *convset_key,
\r
162 /** Return a fuzzily native-encoded C string from utf8 C string @a src,
\r
163 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii
\r
164 * characters the same, and substitutes "?\\XXX" for others, where XXX
\r
165 * is the unsigned decimal code for that character.
\r
167 * This function cannot error; it is guaranteed to return something.
\r
168 * First it will recode as described above and then attempt to convert
\r
169 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it
\r
170 * will return the raw fuzzily recoded string, which may or may not be
\r
171 * meaningful in the client's locale, but is (presumably) better than
\r
176 * Improvement is possible, even imminent. The original problem was
\r
177 * that if you converted a UTF-8 string (say, a log message) into a
\r
178 * locale that couldn't represent all the characters, you'd just get a
\r
179 * static placeholder saying "[unconvertible log message]". Then
\r
180 * Justin Erenkrantz pointed out how on platforms that didn't support
\r
181 * conversion at all, "svn log" would still fail completely when it
\r
182 * encountered unconvertible data.
\r
184 * Now for both cases, the caller can at least fall back on this
\r
185 * function, which converts the message as best it can, substituting
\r
186 * "?\\XXX" escape codes for the non-ascii characters.
\r
188 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
\r
189 * so when we can detect that at configure time, things will change.
\r
190 * Also, this should (?) be moved to apr/apu eventually.
\r
192 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
\r
196 svn_utf_cstring_from_utf8_fuzzy(const char *src,
\r
200 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
\r
201 * allocate @a *dest in @a pool.
\r
204 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
\r
205 const svn_stringbuf_t *src,
\r
209 /** Set @a *dest to a natively-encoded C string from utf8 string @a src;
\r
210 * allocate @a *dest in @a pool.
\r
213 svn_utf_cstring_from_utf8_string(const char **dest,
\r
214 const svn_string_t *src,
\r
219 #endif /* __cplusplus */
\r
221 #endif /* SVN_UTF_H */
\r