1 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 #include "stringtool.h"
12 /* ************************************************************************** *
14 STRLCPY(3) OpenBSD Programmer's Manual STRLCPY(3)
17 strlcpy, strlcat - size-bounded string copying and concatenation
25 strlcpy(char *dst, const char *src, size_t size);
28 strlcat(char *dst, const char *src, size_t size);
31 The strlcpy() and strlcat() functions copy and concatenate strings re-
32 spectively. They are designed to be safer, more consistent, and less er-
33 ror prone replacements for strncpy(3) and strncat(3). Unlike those func-
34 tions, strlcpy() and strlcat() take the full size of the buffer (not just
35 the length) and guarantee to NUL-terminate the result (as long as size is
36 larger than 0). Note that you should include a byte for the NUL in size.
38 The strlcpy() function copies up to size - 1 characters from the NUL-ter-
39 minated string src to dst, NUL-terminating the result.
41 The strlcat() function appends the NUL-terminated string src to the end
42 of dst. It will append at most size - strlen(dst) - 1 bytes, NUL-termi-
46 The strlcpy() and strlcat() functions return the total length of the
47 string they tried to create. For strlcpy() that means the length of src.
48 For strlcat() that means the initial length of dst plus the length of
49 src. While this may seem somewhat confusing it was done to make trunca-
50 tion detection simple.
53 The following code fragment illustrates the simple case:
55 char *s, *p, buf[BUFSIZ];
59 (void)strlcpy(buf, s, sizeof(buf));
60 (void)strlcat(buf, p, sizeof(buf));
62 To detect truncation, perhaps while building a pathname, something like
63 the following might be used:
65 char *dir, *file, pname[MAXPATHNAMELEN];
69 if (strlcpy(pname, dir, sizeof(pname)) >= sizeof(pname))
71 if (strlcat(pname, file, sizeof(pname)) >= sizeof(pname))
74 Since we know how many characters we copied the first time, we can speed
75 things up a bit by using a copy instead on an append:
77 char *dir, *file, pname[MAXPATHNAMELEN];
82 n = strlcpy(pname, dir, sizeof(pname));
83 if (n >= sizeof(pname))
85 if (strlcpy(pname + n, file, sizeof(pname) - n) >= sizeof(pname)-n)
88 However, one may question the validity of such optimizations, as they de-
89 feat the whole purpose of strlcpy() and strlcat(). As a matter of fact,
90 the first version of this manual page got it wrong.
93 snprintf(3), strncat(3), strncpy(3)
95 OpenBSD 2.6 June 22, 1998 2
98 -------------------------------------------------------------------------------
100 Source: OpenBSD 2.6 man pages. Copyright: Portions are copyrighted by BERKELEY
101 SOFTWARE DESIGN, INC., The Regents of the University of California,
102 Massachusetts Institute of Technology, Free Software Foundation, FreeBSD Inc.,
105 * ************************************************************************** */
110 static inline size_t xstrlcpy(T *o_dest, const T *i_src, size_t i_destSize)
114 size_t n = i_destSize;
116 ASSERT( o_dest != NULL );
117 ASSERT( i_src != NULL );
119 // Copy as many bytes as will fit
120 if (n != 0 && --n != 0)
124 if ((*d++ = *s++) == 0)
129 // Not enough room in o_dest, add NUL and traverse rest of i_src
133 *d = T(); // NUL-terminate o_dest
138 return (s - i_src - 1); // count does not include NUL
143 size_t strlcpy(char *o_dest, const char *i_src, size_t i_destSize)
145 return xstrlcpy(o_dest, i_src, i_destSize);
150 size_t wcslcpy(wchar_t *o_dest, const wchar_t *i_src, size_t i_destSize)
152 return xstrlcpy(o_dest, i_src, i_destSize);
157 size_t mbslcpy(unsigned char *o_dest, const unsigned char *i_src,
160 unsigned char *d = o_dest;
161 const unsigned char *s = i_src;
162 size_t n = i_destSize;
164 ASSERT( o_dest != NULL );
165 ASSERT( i_src != NULL );
168 return strlen(reinterpret_cast<const char *>(i_src));
170 // Copy as many bytes as will fit
171 for (-- n; *s && 0 < n; -- n)
175 if (!(s[1] && 2 <= n))
192 tostream &operator<<(tostream &i_ost, const tstringq &i_data)
195 for (const _TCHAR *s = i_data.c_str(); *s; ++ s)
199 case _T('\a'): i_ost << _T("\\a"); break;
200 case _T('\f'): i_ost << _T("\\f"); break;
201 case _T('\n'): i_ost << _T("\\n"); break;
202 case _T('\r'): i_ost << _T("\\r"); break;
203 case _T('\t'): i_ost << _T("\\t"); break;
204 case _T('\v'): i_ost << _T("\\v"); break;
205 case _T('"'): i_ost << _T("\\\""); break;
209 _TCHAR buf[3] = { s[0], s[1], 0 };
213 else if (_istprint(*s))
215 _TCHAR buf[2] = { *s, 0 };
223 _sntprintf(buf, NUMBER_OF(buf), _T("%04x"), *s);
225 _sntprintf(buf, NUMBER_OF(buf), _T("%02x"), *s);
237 // interpret meta characters such as \n
238 tstring interpretMetaCharacters(const _TCHAR *i_str, size_t i_len,
239 const _TCHAR *i_quote,
240 bool i_doesUseRegexpBackReference)
242 // interpreted string is always less than i_len
243 Array<_TCHAR> result(i_len + 1);
245 _TCHAR *d = result.get();
247 const _TCHAR *end = i_str + i_len;
249 while (i_str < end && *i_str)
251 if (*i_str != _T('\\'))
253 if (_istlead(*i_str) && *(i_str + 1) && i_str + 1 < end)
257 else if (*(i_str + 1) != _T('\0'))
260 if (i_quote && _tcschr(i_quote, *i_str))
265 case _T('a'): *d++ = _T('\x07'); i_str ++; break;
266 //case _T('b'): *d++ = _T('\b'); i_str ++; break;
267 case _T('e'): *d++ = _T('\x1b'); i_str ++; break;
268 case _T('f'): *d++ = _T('\f'); i_str ++; break;
269 case _T('n'): *d++ = _T('\n'); i_str ++; break;
270 case _T('r'): *d++ = _T('\r'); i_str ++; break;
271 case _T('t'): *d++ = _T('\t'); i_str ++; break;
272 case _T('v'): *d++ = _T('\v'); i_str ++; break;
273 //case _T('?'): *d++ = _T('\x7f'); i_str ++; break;
274 //case _T('_'): *d++ = _T(' '); i_str ++; break;
275 //case _T('\\'): *d++ = _T('\\'); i_str ++; break;
276 case _T('\''): *d++ = _T('\''); i_str ++; break;
277 case _T('"'): *d++ = _T('"'); i_str ++; break;
278 case _T('\\'): *d++ = _T('\\'); i_str ++; break;
279 case _T('c'): // control code, for example '\c[' is escape: '\x1b'
281 if (i_str < end && *i_str)
283 static const _TCHAR *ctrlchar =
284 _T("@ABCDEFGHIJKLMNO")
285 _T("PQRSTUVWXYZ[\\]^_")
286 _T("@abcdefghijklmno")
287 _T("pqrstuvwxyz@@@@?");
288 static const _TCHAR *ctrlcode =
289 _T("\00\01\02\03\04\05\06\07\10\11\12\13\14\15\16\17")
290 _T("\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37")
291 _T("\00\01\02\03\04\05\06\07\10\11\12\13\14\15\16\17")
292 _T("\20\21\22\23\24\25\26\27\30\31\32\00\00\00\00\177");
293 if (const _TCHAR *c = _tcschr(ctrlchar, *i_str))
294 *d++ = ctrlcode[c - ctrlchar], i_str ++;
297 case _T('x'): case _T('X'):
300 static const _TCHAR *hexchar = _T("0123456789ABCDEFabcdef");
301 static int hexvalue[] = { 0, 1, 2, 3, 4, 5 ,6, 7, 8, 9,
302 10, 11, 12, 13, 14, 15,
303 10, 11, 12, 13, 14, 15, };
305 if (i_str < end && *i_str == _T('{'))
311 for (; i_str < end && *i_str; i_str ++)
312 if (const _TCHAR *c = _tcschr(hexchar, *i_str))
313 n = n * 16 + hexvalue[c - hexchar];
316 if (i_str < end && *i_str == _T('}') && brace)
319 *d++ = static_cast<_TCHAR>(n);
322 case _T('1'): case _T('2'): case _T('3'):
323 case _T('4'): case _T('5'): case _T('6'): case _T('7'):
324 if (i_doesUseRegexpBackReference)
329 static const _TCHAR *octalchar = _T("01234567");
330 static int octalvalue[] = { 0, 1, 2, 3, 4, 5 ,6, 7, };
332 for (; i_str < end && *i_str; i_str ++)
333 if (const _TCHAR *c = _tcschr(octalchar, *i_str))
334 n = n * 8 + octalvalue[c - octalchar];
338 *d++ = static_cast<_TCHAR>(n);
344 if (_istlead(*i_str) && *(i_str + 1) && i_str + 1 < end)
356 // add session id to i_str
357 tstring addSessionId(const _TCHAR *i_str)
361 if (ProcessIdToSessionId(GetCurrentProcessId(), &sessionId)) {
363 _sntprintf(buf, NUMBER_OF(buf), _T("%u"), sessionId);
371 // escape regexp special characters in MBCS trail bytes
372 std::string guardRegexpFromMbcs(const char *i_str)
374 size_t len = strlen(i_str);
375 Array<char> buf(len * 2 + 1);
379 if (_ismbblead(static_cast<u_char>(*i_str)) && i_str[1])
382 if (strchr(".*?+(){}[]^$", *i_str))
387 return std::string(buf.get(), p);
393 std::wstring to_wstring(const std::string &i_str)
395 size_t size = mbstowcs(NULL, i_str.c_str(), i_str.size() + 1);
396 if (size == (size_t)-1)
397 return std::wstring();
398 Array<wchar_t> result(size + 1);
399 mbstowcs(result.get(), i_str.c_str(), i_str.size() + 1);
400 return std::wstring(result.get());
405 std::string to_string(const std::wstring &i_str)
407 size_t size = wcstombs(NULL, i_str.c_str(), i_str.size() + 1);
408 if (size == (size_t)-1)
409 return std::string();
410 Array<char> result(size + 1);
411 wcstombs(result.get(), i_str.c_str(), i_str.size() + 1);
412 return std::string(result.get());
417 tostream &operator<<(tostream &i_ost, const tregex &i_data)
419 return i_ost << i_data.str();
424 tstring toLower(const tstring &i_str)
427 for (size_t i = 0; i < str.size(); ++ i)
429 if (_ismbblead(str[i]))
432 str[i] = tolower(str[i]);
438 // convert wstring to UTF-8
439 std::string to_UTF_8(const std::wstring &i_str)
442 // 110xxxxx 10xxxxxx: 0080-07FF
443 // 1110xxxx 10xxxxxx 10xxxxxx: 0800 - FFFF
447 // count needed buffer size
448 for (std::wstring::const_iterator i = i_str.begin(); i != i_str.end(); ++ i)
450 if (0x0000 <= *i && *i <= 0x007f)
452 else if (0x0080 <= *i && *i <= 0x07ff)
454 else if (0x0800 <= *i && *i <= 0xffff)
458 Array<char> result(size);
462 for (std::wstring::const_iterator i = i_str.begin(); i != i_str.end(); ++ i)
464 if (0x0000 <= *i && *i <= 0x007f)
465 result[ri ++] = static_cast<char>(*i);
466 else if (0x0080 <= *i && *i <= 0x07ff)
468 result[ri ++] = static_cast<char>(((*i & 0x0fc0) >> 6) | 0xc0);
469 result[ri ++] = static_cast<char>(( *i & 0x003f ) | 0x80);
471 else if (0x0800 <= *i && *i <= 0xffff)
473 result[ri ++] = static_cast<char>(((*i & 0xf000) >> 12) | 0xe0);
474 result[ri ++] = static_cast<char>(((*i & 0x0fc0) >> 6) | 0x80);
475 result[ri ++] = static_cast<char>(( *i & 0x003f ) | 0x80);
479 return std::string(result.begin(), result.end());