1 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
\r
5 #include "stringtool.h"
\r
9 #include <mbstring.h>
\r
12 /* ************************************************************************** *
\r
14 STRLCPY(3) OpenBSD Programmer's Manual STRLCPY(3)
\r
17 strlcpy, strlcat - size-bounded string copying and concatenation
\r
25 strlcpy(char *dst, const char *src, size_t size);
\r
28 strlcat(char *dst, const char *src, size_t size);
\r
31 The strlcpy() and strlcat() functions copy and concatenate strings re-
\r
32 spectively. They are designed to be safer, more consistent, and less er-
\r
33 ror prone replacements for strncpy(3) and strncat(3). Unlike those func-
\r
34 tions, strlcpy() and strlcat() take the full size of the buffer (not just
\r
35 the length) and guarantee to NUL-terminate the result (as long as size is
\r
36 larger than 0). Note that you should include a byte for the NUL in size.
\r
38 The strlcpy() function copies up to size - 1 characters from the NUL-ter-
\r
39 minated string src to dst, NUL-terminating the result.
\r
41 The strlcat() function appends the NUL-terminated string src to the end
\r
42 of dst. It will append at most size - strlen(dst) - 1 bytes, NUL-termi-
\r
46 The strlcpy() and strlcat() functions return the total length of the
\r
47 string they tried to create. For strlcpy() that means the length of src.
\r
48 For strlcat() that means the initial length of dst plus the length of
\r
49 src. While this may seem somewhat confusing it was done to make trunca-
\r
50 tion detection simple.
\r
53 The following code fragment illustrates the simple case:
\r
55 char *s, *p, buf[BUFSIZ];
\r
59 (void)strlcpy(buf, s, sizeof(buf));
\r
60 (void)strlcat(buf, p, sizeof(buf));
\r
62 To detect truncation, perhaps while building a pathname, something like
\r
63 the following might be used:
\r
65 char *dir, *file, pname[MAXPATHNAMELEN];
\r
69 if (strlcpy(pname, dir, sizeof(pname)) >= sizeof(pname))
\r
71 if (strlcat(pname, file, sizeof(pname)) >= sizeof(pname))
\r
74 Since we know how many characters we copied the first time, we can speed
\r
75 things up a bit by using a copy instead on an append:
\r
77 char *dir, *file, pname[MAXPATHNAMELEN];
\r
82 n = strlcpy(pname, dir, sizeof(pname));
\r
83 if (n >= sizeof(pname))
\r
85 if (strlcpy(pname + n, file, sizeof(pname) - n) >= sizeof(pname)-n)
\r
88 However, one may question the validity of such optimizations, as they de-
\r
89 feat the whole purpose of strlcpy() and strlcat(). As a matter of fact,
\r
90 the first version of this manual page got it wrong.
\r
93 snprintf(3), strncat(3), strncpy(3)
\r
95 OpenBSD 2.6 June 22, 1998 2
\r
98 -------------------------------------------------------------------------------
\r
100 Source: OpenBSD 2.6 man pages. Copyright: Portions are copyrighted by BERKELEY
\r
101 SOFTWARE DESIGN, INC., The Regents of the University of California,
\r
102 Massachusetts Institute of Technology, Free Software Foundation, FreeBSD Inc.,
\r
105 * ************************************************************************** */
\r
110 static inline size_t xstrlcpy(T *o_dest, const T *i_src, size_t i_destSize)
\r
113 const T *s = i_src;
\r
114 size_t n = i_destSize;
\r
116 ASSERT( o_dest != NULL );
\r
117 ASSERT( i_src != NULL );
\r
119 // Copy as many bytes as will fit
\r
120 if (n != 0 && --n != 0) {
\r
122 if ((*d++ = *s++) == 0)
\r
124 } while (--n != 0);
\r
127 // Not enough room in o_dest, add NUL and traverse rest of i_src
\r
129 if (i_destSize != 0)
\r
130 *d = T(); // NUL-terminate o_dest
\r
135 return (s - i_src - 1); // count does not include NUL
\r
140 size_t strlcpy(char *o_dest, const char *i_src, size_t i_destSize)
\r
142 return xstrlcpy(o_dest, i_src, i_destSize);
\r
147 size_t wcslcpy(wchar_t *o_dest, const wchar_t *i_src, size_t i_destSize)
\r
149 return xstrlcpy(o_dest, i_src, i_destSize);
\r
154 size_t mbslcpy(unsigned char *o_dest, const unsigned char *i_src,
\r
157 unsigned char *d = o_dest;
\r
158 const unsigned char *s = i_src;
\r
159 size_t n = i_destSize;
\r
161 ASSERT( o_dest != NULL );
\r
162 ASSERT( i_src != NULL );
\r
165 return strlen(reinterpret_cast<const char *>(i_src));
\r
167 // Copy as many bytes as will fit
\r
168 for (-- n; *s && 0 < n; -- n) {
\r
169 if (_ismbblead(*s)) {
\r
170 if (!(s[1] && 2 <= n))
\r
187 tostream &operator<<(tostream &i_ost, const tstringq &i_data)
\r
190 for (const _TCHAR *s = i_data.c_str(); *s; ++ s) {
\r
193 i_ost << _T("\\a");
\r
196 i_ost << _T("\\f");
\r
199 i_ost << _T("\\n");
\r
202 i_ost << _T("\\r");
\r
205 i_ost << _T("\\t");
\r
208 i_ost << _T("\\v");
\r
211 i_ost << _T("\\\"");
\r
214 if (_istlead(*s)) {
\r
215 _TCHAR buf[3] = { s[0], s[1], 0 };
\r
218 } else if (_istprint(*s)) {
\r
219 _TCHAR buf[2] = { *s, 0 };
\r
222 i_ost << _T("\\x");
\r
225 _sntprintf(buf, NUMBER_OF(buf), _T("%04x"), *s);
\r
227 _sntprintf(buf, NUMBER_OF(buf), _T("%02x"), *s);
\r
239 // interpret meta characters such as \n
\r
240 tstring interpretMetaCharacters(const _TCHAR *i_str, size_t i_len,
\r
241 const _TCHAR *i_quote,
\r
242 bool i_doesUseRegexpBackReference)
\r
244 // interpreted string is always less than i_len
\r
245 Array<_TCHAR> result(i_len + 1);
\r
247 _TCHAR *d = result.get();
\r
249 const _TCHAR *end = i_str + i_len;
\r
251 while (i_str < end && *i_str) {
\r
252 if (*i_str != _T('\\')) {
\r
253 if (_istlead(*i_str) && *(i_str + 1) && i_str + 1 < end)
\r
256 } else if (*(i_str + 1) != _T('\0')) {
\r
258 if (i_quote && _tcschr(i_quote, *i_str))
\r
266 //case _T('b'): *d++ = _T('\b'); i_str ++; break;
\r
291 //case _T('?'): *d++ = _T('\x7f'); i_str ++; break;
\r
292 //case _T('_'): *d++ = _T(' '); i_str ++; break;
\r
293 //case _T('\\'): *d++ = _T('\\'); i_str ++; break;
\r
306 case _T('c'): // control code, for example '\c[' is escape: '\x1b'
\r
308 if (i_str < end && *i_str) {
\r
309 static const _TCHAR *ctrlchar =
\r
310 _T("@ABCDEFGHIJKLMNO")
\r
311 _T("PQRSTUVWXYZ[\\]^_")
\r
312 _T("@abcdefghijklmno")
\r
313 _T("pqrstuvwxyz@@@@?");
\r
314 static const _TCHAR *ctrlcode =
\r
315 _T("\00\01\02\03\04\05\06\07\10\11\12\13\14\15\16\17")
\r
316 _T("\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37")
\r
317 _T("\00\01\02\03\04\05\06\07\10\11\12\13\14\15\16\17")
\r
318 _T("\20\21\22\23\24\25\26\27\30\31\32\00\00\00\00\177");
\r
319 if (const _TCHAR *c = _tcschr(ctrlchar, *i_str))
\r
320 *d++ = ctrlcode[c - ctrlchar], i_str ++;
\r
326 static const _TCHAR *hexchar = _T("0123456789ABCDEFabcdef");
\r
327 static int hexvalue[] = { 0, 1, 2, 3, 4, 5 ,6, 7, 8, 9,
\r
328 10, 11, 12, 13, 14, 15,
\r
329 10, 11, 12, 13, 14, 15,
\r
331 bool brace = false;
\r
332 if (i_str < end && *i_str == _T('{')) {
\r
337 for (; i_str < end && *i_str; i_str ++)
\r
338 if (const _TCHAR *c = _tcschr(hexchar, *i_str))
\r
339 n = n * 16 + hexvalue[c - hexchar];
\r
342 if (i_str < end && *i_str == _T('}') && brace)
\r
345 *d++ = static_cast<_TCHAR>(n);
\r
355 if (i_doesUseRegexpBackReference)
\r
359 static const _TCHAR *octalchar = _T("01234567");
\r
360 static int octalvalue[] = { 0, 1, 2, 3, 4, 5 ,6, 7, };
\r
362 for (; i_str < end && *i_str; i_str ++)
\r
363 if (const _TCHAR *c = _tcschr(octalchar, *i_str))
\r
364 n = n * 8 + octalvalue[c - octalchar];
\r
368 *d++ = static_cast<_TCHAR>(n);
\r
374 if (_istlead(*i_str) && *(i_str + 1) && i_str + 1 < end)
\r
382 return result.get();
\r
386 // add session id to i_str
\r
387 tstring addSessionId(const _TCHAR *i_str)
\r
391 if (ProcessIdToSessionId(GetCurrentProcessId(), &sessionId)) {
\r
393 _sntprintf(buf, NUMBER_OF(buf), _T("%u"), sessionId);
\r
401 // escape regexp special characters in MBCS trail bytes
\r
402 std::string guardRegexpFromMbcs(const char *i_str)
\r
404 size_t len = strlen(i_str);
\r
405 Array<char> buf(len * 2 + 1);
\r
406 char *p = buf.get();
\r
408 if (_ismbblead(static_cast<u_char>(*i_str)) && i_str[1]) {
\r
410 if (strchr(".*?+(){}[]^$", *i_str))
\r
415 return std::string(buf.get(), p);
\r
421 std::wstring to_wstring(const std::string &i_str)
\r
423 size_t size = mbstowcs(NULL, i_str.c_str(), i_str.size() + 1);
\r
424 if (size == (size_t)-1)
\r
425 return std::wstring();
\r
426 Array<wchar_t> result(size + 1);
\r
427 mbstowcs(result.get(), i_str.c_str(), i_str.size() + 1);
\r
428 return std::wstring(result.get());
\r
433 std::string to_string(const std::wstring &i_str)
\r
435 size_t size = wcstombs(NULL, i_str.c_str(), i_str.size() + 1);
\r
436 if (size == (size_t)-1)
\r
437 return std::string();
\r
438 Array<char> result(size + 1);
\r
439 wcstombs(result.get(), i_str.c_str(), i_str.size() + 1);
\r
440 return std::string(result.get());
\r
445 tostream &operator<<(tostream &i_ost, const tregex &i_data)
\r
447 return i_ost << i_data.str();
\r
451 /// get lower string
\r
452 tstring toLower(const tstring &i_str)
\r
454 tstring str(i_str);
\r
455 for (size_t i = 0; i < str.size(); ++ i) {
\r
456 if (_ismbblead(str[i]))
\r
459 str[i] = tolower(str[i]);
\r
465 // convert wstring to UTF-8
\r
466 std::string to_UTF_8(const std::wstring &i_str)
\r
469 // 110xxxxx 10xxxxxx: 0080-07FF
\r
470 // 1110xxxx 10xxxxxx 10xxxxxx: 0800 - FFFF
\r
474 // count needed buffer size
\r
475 for (std::wstring::const_iterator i = i_str.begin(); i != i_str.end(); ++ i) {
\r
476 if (0x0000 <= *i && *i <= 0x007f)
\r
478 else if (0x0080 <= *i && *i <= 0x07ff)
\r
480 else if (0x0800 <= *i && *i <= 0xffff)
\r
484 Array<char> result(size);
\r
488 for (std::wstring::const_iterator i = i_str.begin(); i != i_str.end(); ++ i) {
\r
489 if (0x0000 <= *i && *i <= 0x007f)
\r
490 result[ri ++] = static_cast<char>(*i);
\r
491 else if (0x0080 <= *i && *i <= 0x07ff) {
\r
492 result[ri ++] = static_cast<char>(((*i & 0x0fc0) >> 6) | 0xc0);
\r
493 result[ri ++] = static_cast<char>(( *i & 0x003f ) | 0x80);
\r
494 } else if (0x0800 <= *i && *i <= 0xffff) {
\r
495 result[ri ++] = static_cast<char>(((*i & 0xf000) >> 12) | 0xe0);
\r
496 result[ri ++] = static_cast<char>(((*i & 0x0fc0) >> 6) | 0x80);
\r
497 result[ri ++] = static_cast<char>(( *i & 0x003f ) | 0x80);
\r
501 return std::string(result.begin(), result.end());
\r