mingwrt/man/wcsrtombs.3.man

   1 .\" vim: ft=nroff
   2 .TH %PAGEREF% MinGW "MinGW Programmer's Reference Manual"
   3 .
   4 .SH NAME
   5 .B \%wcsrtombs
   6 \- convert a wide character to a multibyte sequence
   7 .
   8 .
   9 .SH SYNOPSIS
  10 .B  #include
  11 .RB < wchar.h >
  12 .PP
  13 .B  size_t wcsrtombs( char
  14 .BI * dst ,
  15 .B  wchar_t
  16 .BI ** src ,
  17 .B  size_t
  18 .IB len ,
  19 .B  mbstate_t
  20 .BI * ps
  21 .B  );
  22 .
  23 .
  24 .SH DESCRIPTION
  25 The
  26 .BR \%wcsrtombs ()
  27 function converts a sequence of wide characters from
  28 the array which is indirectly pointed to by
  29 .IR src ,
  30 to a corresponding multibyte character sequence in
  31 the codeset which is associated with the
  32 .B \%LC_CTYPE
  33 category of the active process locale,
  34 beginning in the conversion state which is represented by the
  35 .B \%mbstate_t
  36 object at
  37 .IR *ps ;
  38 each wide character is converted,
  39 as if by calling the
  40 .BR \%wcrtomb (3)
  41 function.
  42 .
  43 .PP
  44 Conversion continues until:
  45 .RS 2n
  46 .ll -2n
  47 .IP \(bu 2n
  48 A wide character which is invalid in its own context is encountered.
  49 .
  50 .IP \(bu 2n
  51 A wide character which does not have a valid representation within
  52 the target multibyte codeset is encountered.
  53 .
  54 .IP \(bu 2n
  55 The NUL wide character is encountered,
  56 while in the initial conversion state.
  57 .
  58 .IP \(bu 2n
  59 The
  60 .I dst
  61 argument is not a NULL pointer,
  62 and a wide character is encountered for which
  63 the converted length would cause the aggregate length
  64 of the converted multibyte character string to exceed
  65 the limit specified by the
  66 .I len
  67 argument.
  68 .ll +2n
  69 .RE
  70 .
  71 .PP
  72 If
  73 .I dst
  74 is
  75 .I not
  76 a NULL pointer,
  77 the multibyte character string resulting from successful conversion,
  78 up to a maximum of
  79 .I len
  80 bytes,
  81 is stored in the multibyte array starting at
  82 .IR dst .
  83 If the conversion is NUL terminated,
  84 the wide character string reference pointed to by
  85 .I src
  86 is replaced by a NULL pointer;
  87 otherwise it is updated to point to the address immediately
  88 following that of the last wide character converted.
  89 .
  90 .PP
  91 If
  92 .I dst
  93 is a NULL pointer,
  94 the aggregate count of bytes required
  95 to represent the conversion is accumulated,
  96 until any one of the preceding termination conditions is encountered;
  97 the
  98 .I len
  99 argument,
 100 and the termination condition which is dependent upon it,
 101 is ignored,
 102 and the conversion is not stored.
 103 .
 104 .PP
 105 If
 106 .I ps
 107 is a NULL pointer,
 108 the
 109 .BR \%wcsrtombs ()
 110 function uses a static internal
 111 .B \%mbstate_t
 112 object,
 113 which is known only to,
 114 and visible only within the scope of execution of,
 115 the
 116 .BR \%wcsrtombs ()
 117 function itself.
 118 .
 119 .PP
 120 Following a successful conversion,
 121 the
 122 .B \%mbstate_t
 123 object at
 124 .IR *ps ,
 125 or the internal
 126 .B \%mbstate_t
 127 object if appropriate,
 128 is reset to the initial conversion state.
 129 .
 130 .
 131 .SH RETURN VALUE
 132 When conversion is successful,
 133 and
 134 .I dst
 135 is
 136 .I not
 137 a NULL pointer,
 138 the
 139 .BR \%wcsrtombs ()
 140 function returns the number of bytes stored at
 141 .IR dst ,
 142 to represent the resulting multibyte character sequence,
 143 .I excluding
 144 the terminating NUL,
 145 (if any).
 146 .
 147 .PP
 148 Conversely,
 149 when conversion is successful,
 150 but
 151 .I dst is
 152 a NULL pointer,
 153 the
 154 .BR \%wcsrtombs ()
 155 function returns the number of bytes which would be required
 156 to store the entire multibyte character string resulting from
 157 the successful conversion,
 158 .I excluding
 159 the terminating NUL.
 160 .
 161 .
 162 .SH ERROR CONDITIONS
 163 If conversion is unsuccessful,
 164 .I \%errno
 165 is set to
 166 .BR \%EILSEQ ,
 167 the
 168 .BR wcsrtombs ()
 169 function returns
 170 .IR (size_t)(\-1) ,
 171 and the conversion state is unspecified.
 172 .
 173 .
 174 .SH STANDARDS CONFORMANCE
 175 Except in respect of its extended provision for handling of
 176 .IR surrogate\ pairs ,
 177 and to the extent that it may be affected by limitations
 178 of the underlying \%MS\(hyWindows API,
 179 the
 180 .I \%libmingwex
 181 implementation of
 182 .BR \%wcsrtombs ()
 183 conforms generally to
 184 .BR \%ISO\(hyC99 ,
 185 .BR \%POSIX.1\(hy2001 ,
 186 and
 187 .BR \%POSIX.1\(hy2008 .
 188 .
 189 .
 190 .\"SH EXAMPLE
 191 .
 192 .
 193 .SH CAVEATS AND BUGS
 194 Due to a documented limitation of Microsoft\(aqs
 195 .BR \%setlocale ()
 196 function implementation,
 197 it is not possible to directly select an active locale,
 198 in which the codeset is represented by any multibyte
 199 character sequence with an effective
 200 .B \%MB_CUR_MAX
 201 of more than two bytes.
 202 Prior to \%mingwrt\(hy5.3,
 203 this limitation precludes the use of
 204 .BR \%wcsrtombs ()
 205 to convert to any codeset with
 206 .B \%MB_CUR_MAX
 207 greater than two bytes,
 208 (such as
 209 .BR \%UTF\(hy8 ).
 210 From \%mingwrt\(hy5.3 onward,
 211 the MinGW.org implementation of
 212 .BR \%wcsrtombs ()
 213 mitigates this limitation by assignment of the codeset
 214 from the
 215 .B \%LC_CTYPE
 216 environment variable,
 217 provided the system default has been previously activated
 218 for the
 219 .B \%LC_CTYPE
 220 locale category;
 221 e.g.\ execution of:
 222 .PP
 223 .RS 4n
 224 .EX
 225 #define _ISOC99_SOURCE
 226
 227 #include <stdio.h>
 228 #include <stdlib.h>
 229 #include <locale.h>
 230 #include <wchar.h>
 231
 232 void print_conv( const wchar_t * );
 233
 234 int main()
 235 {
 236   setlocale( LC_CTYPE, "" );
 237   putenv( "LC_CTYPE=en_GB.65001" );
 238   print_conv( L"\eu6c34\eU0001d10b" );
 239   return 0;
 240 }
 241
 242 void print_conv( const wchar_t *wcs )
 243 {
 244   size_t len;
 245   if( (len = 1 + wcsrtombs( NULL, &wcs, 0, NULL )) > 0 )
 246   {
 247     const wchar_t *wc = wcs;
 248     size_t n = 1 + wcslen( wcs );
 249     unsigned char mbs[len], *mb = mbs;
 250     printf( "UTF-16: %u value%s: ", n, (n == 1) ? "" : "s" );
 251     do { printf( "0x%04X%c", *wc, (*wc == L'\e0') ? '\en' : ':' );
 252        } while( *p++ != L'\e0' );
 253     printf( "UTF-8: %u byte%s: ",
 254         1 + wcsrtombs( mbs, &wcs, len, NULL ),
 255         (len == 1) ? "" : "s"
 256       );
 257     do { printf( "0x%02X%s", *mb, (*mb == '\e0') ? '\en' : ':' );
 258        } while( *mb++ != '\e0' );
 259   }
 260   else perror( "wcsrtombs" );
 261 }
 262 .EE
 263 .RE
 264 .PP
 265 will select
 266 .B \%UTF\(hy8
 267 as the target codeset,
 268 then convert the \fC\%L"\eu6c34\eU0001d10b"\fP
 269 wide character string,
 270 resulting in the output:
 271 .PP
 272 .RS 4n
 273 .EX
 274 UTF-16: 4 values: 0x6C34:0xD834:0xDD0B:0x0000
 275 UTF-8: 8 bytes: 0xE6:0xB0:0xB4:0xF0:0x9D:0x84:0x8B:0x00
 276 .EE
 277 .RE
 278 .
 279 .
 280 .SH SEE ALSO
 281 .BR mbsinit (3),
 282 and
 283 .BR wcrtomb (3)
 284 .
 285 .
 286 .SH AUTHOR
 287 This manpage was written by \%Keith\ Marshall,
 288 \%<keith@users.osdn.me>,
 289 to document the
 290 .BR \%wcsrtombs ()
 291 function as it has been implemented for the MinGW.org Project.
 292 It may be copied, modified and redistributed,
 293 without restriction of copyright,
 294 provided this acknowledgement of contribution by
 295 the original author remains in place.
 296 .
 297 .\" EOF