From cc40bc6c69615bc8b936d966f457d31bbc9cabc8 Mon Sep 17 00:00:00 2001 From: Keith Marshall Date: Tue, 3 Mar 2020 21:58:11 +0000 Subject: [PATCH] Reimplement btowc(), and wctob() functions. --- mingwrt/ChangeLog | 22 ++++++++ mingwrt/include/wchar.h | 14 ++++++ mingwrt/mingwex/btowc.c | 131 ++++++++++++++++++++++++++++++++++++++++++------ mingwrt/mingwex/wctob.c | 120 +++++++++++++++++++++++++++++++++++++------- 4 files changed, 253 insertions(+), 34 deletions(-) diff --git a/mingwrt/ChangeLog b/mingwrt/ChangeLog index 0d32b91..b06bc3f 100644 --- a/mingwrt/ChangeLog +++ b/mingwrt/ChangeLog @@ -1,3 +1,25 @@ +2020-03-03 Keith Marshall + + Reimplement btowc(), and wctob() functions. + + * include/wchar.h [__MSVCRT_VERSION__ < __MSVCR80_DLL] + (btowc, wctob): Implement them as static inline redirects... + (__msvcrt_btowc, __msvcrt_wctob): ...to these; declare them, with + additional MinGW specific alternatives declared... + (__mingw_btowc, __mingw_wctob): ...thus. + + * mingwex/btowc.c: Rewrite as new; it now implements... + (__mingw_btowc, __msvcrt_btowc): ...these public API functions, either + of which serves as a suitable replacement entry point for... + (btowc): ...this, with ultimate fallback served by... + (__mingw_btowc_fallback): ...this private function. + + * mingwex/wctob.c: Rewrite as new; it now implements... + (__mingw_wctob, __msvcrt_wctob): ...these public API functions, either + of which serves as a suitable replacement entry point for... + (wctob): ...this, with ultimate fallback served by... + (__mingw_wctob_fallback): ...this private function. + 2020-03-02 Keith Marshall Reimplement mbrlen(), mbrtowc(), and mbsrtowcs() functions. diff --git a/mingwrt/include/wchar.h b/mingwrt/include/wchar.h index 95214e8..56fe9b8 100644 --- a/mingwrt/include/wchar.h +++ b/mingwrt/include/wchar.h @@ -560,6 +560,8 @@ __cdecl __MINGW_NOTHROW size_t wcsrtombs * it exists in the process address space; otherwise, execution * will fall back to a MinGW implementation... */ +__cdecl __MINGW_NOTHROW wint_t __msvcrt_btowc (int); + __cdecl __MINGW_NOTHROW size_t __msvcrt_mbrlen (const char *__restrict__, size_t, mbstate_t *__restrict__); @@ -569,6 +571,8 @@ __cdecl __MINGW_NOTHROW size_t __msvcrt_mbrtowc __cdecl __MINGW_NOTHROW size_t __msvcrt_mbsrtowcs (wchar_t *__restrict__, const char **__restrict__, size_t, mbstate_t *__restrict__); +__cdecl __MINGW_NOTHROW int __msvcrt_wctob (wint_t); + __cdecl __MINGW_NOTHROW size_t __msvcrt_wcrtomb (char * __restrict__, wchar_t, mbstate_t *__restrict__); @@ -579,6 +583,8 @@ __cdecl __MINGW_NOTHROW size_t __msvcrt_wcsrtombs * fall back implementations, without considering any possible * reference to MSVCRT.DLL or MSVCR80.DLL implementations. */ +__cdecl __MINGW_NOTHROW wint_t __mingw_btowc (int); + __cdecl __MINGW_NOTHROW size_t __mingw_mbrlen (const char *__restrict__, size_t, mbstate_t *__restrict__); @@ -588,6 +594,8 @@ __cdecl __MINGW_NOTHROW size_t __mingw_mbrtowc __cdecl __MINGW_NOTHROW size_t __mingw_mbsrtowcs (wchar_t *__restrict__, const char **__restrict__, size_t, mbstate_t *__restrict__); +__cdecl __MINGW_NOTHROW int __mingw_wctob (wint_t); + __cdecl __MINGW_NOTHROW size_t __mingw_wcrtomb (char * __restrict__, wchar_t, mbstate_t *__restrict__); @@ -603,6 +611,9 @@ __cdecl __MINGW_NOTHROW size_t __mingw_wcsrtombs * the libmingwex.a implementations, (which will delegate the calls * to the Microsoft DLL implementations, when they are available). */ +__CRT_ALIAS __cdecl __MINGW_NOTHROW wint_t btowc (int __c) +{ return __msvcrt_btowc( __c ); } + __CRT_ALIAS __cdecl __MINGW_NOTHROW size_t mbrlen (const char *__mbc, size_t __n, mbstate_t *__ps) { return __msvcrt_mbrlen( __mbc, __n, __ps ); } @@ -615,6 +626,9 @@ __CRT_ALIAS __cdecl __MINGW_NOTHROW size_t mbsrtowcs (wchar_t *__wcs, const char **__mbs, size_t __n, mbstate_t *__ps) { return __msvcrt_mbsrtowcs( __wcs, __mbs, __n, __ps ); } +__CRT_ALIAS __cdecl __MINGW_NOTHROW int wctob (wint_t __wc) +{ return __msvcrt_wctob( __wc ); } + __CRT_ALIAS __cdecl __MINGW_NOTHROW size_t wcrtomb (char * __mbc, wchar_t __wc, mbstate_t *__ps) { return __msvcrt_wcrtomb(__mbc, __wc, __ps); } diff --git a/mingwrt/mingwex/btowc.c b/mingwrt/mingwex/btowc.c index 26b8870..77db09c 100644 --- a/mingwrt/mingwex/btowc.c +++ b/mingwrt/mingwex/btowc.c @@ -1,19 +1,120 @@ -#include "mb_wc_common.h" -#include +/* + * btowc.c + * + * Implementation of an ISO-C99 conforming btowc() function; note that, + * since this considers only one byte for conversion, and a single byte + * can never convert to a surrogate pair, this is not susceptible to the + * potential wchar_t overflow error, which may occur with functions such + * as mbrtowc(), which may need to return surrogate pairs. + * + * $Id$ + * + * Written by Keith Marshall + * Copyright (C) 2020, MinGW.org Project + * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice, this permission notice, and the following + * disclaimer shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ +#include "wcharmap.h" + +/* For runtime delegation, we need a mechanism for detection of an + * implementation, within the default C runtime DLL; we may use the + * MinGW dlfcn emulation, to facilitate this. + */ +#include + +/* We also need , for UCHAR_MAX, and , for EOF. + */ +#include #include -#define WIN32_LEAN_AND_MEAN -#include -wint_t btowc (int c) +/* We need to look up the effective working codeset, before choosing + * between MSVCRT.DLL and MinGW fallback implementations; to avoid a + * need to look it up again, within the MinGW fallback, we store the + * result of the initial look up in this file-global variable. + */ +static __thread unsigned int cs; + +static wint_t __mingw_btowc_fallback( int c ) +{ + /* Fallback function, providing an implementation of the btowc() + * function, when none is available within the Microsoft runtime. + * This performs an MBCS to wchar_t conversion on the given single + * character argument, (expressed as an int), returning WEOF in + * the event that conversion fails. + */ + wint_t wc = WEOF; + + if( c != EOF ) + { if( (cs == 0) && (UCHAR_MAX >= (unsigned int)(c)) ) return (wchar_t)(c); + MultiByteToWideChar( cs, MB_ERR_INVALID_CHARS, (char *)(&c), 1, &wc, 1 ); + } + return wc; +} + +wint_t __mingw_btowc( int c ) { - if (c == EOF) - return (WEOF); - else - { - unsigned char ch = c; - wchar_t wc = WEOF; - MultiByteToWideChar (get_codepage(), MB_ERR_INVALID_CHARS, - (char*)&ch, 1, &wc, 1); - return wc; - } + /* Wrapper for the btowc() function; this will unconditionally + * delegate the call to the MinGW fallback implementation, (as + * implemented above), after initialization of the effective + * codeset file-global variable. + */ + cs = __mb_codeset_for_locale(); + return __mingw_btowc_fallback( c ); } + +wint_t __msvcrt_btowc( int c ) +{ + /* Wrapper for the btowc() function; it will initially attempt + * to delegate the call to a Microsoft-provided implementation, + * but if no such implementation can be found, fall back to the + * MinGW substitute (defined above). + */ + static wint_t (*redirector_hook)( int ) = NULL; + + /* MSVCRT.DLL's setlocale() cannot reliably handle code pages with + * more than two bytes per code point, (e.g. UTF-7 and UTF-8); thus, + * Microsoft's btowc() is likely to be similarly unreliable, so we + * always use the MinGW fallback with such code pages. + */ + if( __mb_cur_max_for_codeset( cs = __mb_codeset_for_locale() ) > 2 ) + return __mingw_btowc_fallback( c ); + + /* On first time call, we don't know which implementation is to be + * selected; look for a Microsoft implementation, which, if available, + * may be registered for immediate use on this, and any subsequent, + * calls to this function wrapper... + */ + if( (redirector_hook == NULL) + && ((redirector_hook = dlsym( RTLD_DEFAULT, "btowc" )) == NULL) ) + + /* ...but when no Microsoft implementation can be found, register + * the MinGW fall back in its stead. + */ + redirector_hook = __mingw_btowc_fallback; + + /* Finally, delegate the call to whichever implementation has been + * registered on first-time call. + */ + return redirector_hook( c ); +} + +/* $RCSfile$: end of file */ diff --git a/mingwrt/mingwex/wctob.c b/mingwrt/mingwex/wctob.c index ee5d014..e52e201 100644 --- a/mingwrt/mingwex/wctob.c +++ b/mingwrt/mingwex/wctob.c @@ -1,21 +1,103 @@ -#include "mb_wc_common.h" -#include +/* + * wctob.c + * + * Implementation of ISO-C99 wctob() function, supporting it on legacy + * Windows versions, for which MSVCRT.DLL doesn't provide it, otherwise + * delegating to the Microsoft implementation, except in specific cases + * when that implementation may not support the active MBCS codeset. + * + * + * $Id$ + * + * Written by Keith Marshall + * Copyright (C) 2020, MinGW.org Project + * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice, this permission notice, and the following + * disclaimer shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ +#include "wcharmap.h" + +/* For runtime delegation, we need a mechanism for detection of an + * implementation, within the default C runtime DLL; we may use the + * MinGW dlfcn emulation, to facilitate this. + */ +#include #include -#include -#include -#define WIN32_LEAN_AND_MEAN -#include - -/* Return just the first byte after translating to multibyte. */ -int wctob (wint_t wc ) -{ - wchar_t w = wc; - char c; - int invalid_char = 0; - if (!WideCharToMultiByte (get_codepage(), - 0 /* Is this correct flag? */, - &w, 1, &c, 1, NULL, &invalid_char) - || invalid_char) - return EOF; - return (int) c; + +static int __mingw_wctob_fallback( wint_t wc ) +{ /* Fallback function, providing an implementation of the wctob() + * function, when none is available within the Microsoft runtime. + * This performs a wchar_t to MBCS conversion on the given single + * wide character argument, capturing the conversion into a local + * buffer, checks that the result occupies exactly one byte, for + * which the coercion of that byte value to int is returned, or + * otherwise returns EOF. + */ + union { unsigned char u; char c; } retval; + return (__mingw_wctomb_convert( &retval.c, 1, &wc, 1 ) == 1) + ? (int)(retval.u) : EOF; +} + +int __mingw_wctob( wint_t wc ) +{ /* Wrapper for the wctob() function; this variant will unconditionally + * delegate the call to the MinGW fallback implementation, after first + * storing the effective codeset index. + */ + (void)(__mingw_wctomb_codeset_init()); + return __mingw_wctob_fallback( wc ); } + +int __msvcrt_wctob( wint_t wc ) +{ /* Wrapper for the wctob() function; it will initially attempt + * to delegate the call to a Microsoft-provided implementation, + * but if no such implementation can be found, fall back to the + * MinGW substitute (defined above). + */ + static int (*redirector_hook)( wchar_t ) = NULL; + + /* MSVCRT.DLL's setlocale() cannot reliably handle code pages with + * more than two bytes per code point, (e.g. UTF-7 and UTF-8); thus, + * Microsoft's wctob() is likely to be similarly unreliable, so we + * always use the MinGW fallback with such code pages. + */ + if( __mingw_wctomb_cur_max_init(__mingw_wctomb_codeset_init()) > 2 ) + return __mingw_wctob_fallback( wc ); + + /* On first time call, we don't know which implementation is to be + * selected; look for a Microsoft implementation, which, if available, + * may be registered for immediate use on this, and any subsequent, + * calls to this function wrapper... + */ + if( (redirector_hook == NULL) + && ((redirector_hook = dlsym( RTLD_DEFAULT, "wctob" )) == NULL) ) + + /* ...but when no Microsoft implementation can be found, register + * the MinGW fall back in its stead. + */ + redirector_hook = __mingw_wctob_fallback; + + /* Finally, delegate the call to whichever implementation has been + * registered on first-time call. + */ + return redirector_hook( wc ); +} + +/* $RCSfile$: end of file */ -- 2.11.0