From: Elliott Hughes Date: Fri, 18 Nov 2016 02:18:08 +0000 (-0800) Subject: Use icu4c to implement . X-Git-Tag: android-x86-8.1-r1~224^2^2~85^2~7^2 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=a57ca0da69ab9a3f870a584ba1ddab9af432c639;p=android-x86%2Fbionic.git Use icu4c to implement . Pretty useless, because the POSIX APIs are useless for actually internationalization, but it lets us put this to bed for good. Bug: http://b/18492914 Test: bionic tests Change-Id: I4dd0aff66c44b5547039be3ffea806c865b9014a --- diff --git a/libc/Android.bp b/libc/Android.bp index f326db3a8..cc8d8f5ac 100644 --- a/libc/Android.bp +++ b/libc/Android.bp @@ -1623,6 +1623,7 @@ cc_library { static: { srcs: [ "bionic/dl_iterate_phdr_static.cpp", + "bionic/icu_static.cpp", "bionic/malloc_common.cpp", "bionic/libc_init_static.cpp", ], @@ -1633,6 +1634,7 @@ cc_library { srcs: [ "arch-common/bionic/crtbegin_so.c", "arch-common/bionic/crtbrand.S", + "bionic/icu.cpp", "bionic/malloc_common.cpp", "bionic/libc_init_dynamic.cpp", "bionic/NetdClient.cpp", diff --git a/libc/bionic/icu.cpp b/libc/bionic/icu.cpp new file mode 100644 index 000000000..abc0eec0f --- /dev/null +++ b/libc/bionic/icu.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "private/icu.h" + +#include +#include +#include +#include + +#include "private/libc_logging.h" + +// Allowed icu4c version numbers are in the range [44, 999]. +// Gingerbread's icu4c 4.4 is the minimum supported ICU version. +static constexpr auto ICUDATA_VERSION_MIN_LENGTH = 2; +static constexpr auto ICUDATA_VERSION_MAX_LENGTH = 3; +static constexpr auto ICUDATA_VERSION_MIN = 44; + +static char g_icudata_version[ICUDATA_VERSION_MAX_LENGTH + 1]; + +static void* g_libicuuc_handle = nullptr; + +static int __icu_dat_file_filter(const dirent* dirp) { + const char* name = dirp->d_name; + + // Is the name the right length to match 'icudt(\d\d\d)l.dat'? + const size_t len = strlen(name); + if (len < 10 + ICUDATA_VERSION_MIN_LENGTH || len > 10 + ICUDATA_VERSION_MAX_LENGTH) return 0; + + return !strncmp(name, "icudt", 5) && !strncmp(&name[len - 5], "l.dat", 5); +} + +static bool __find_icu() { + dirent** namelist = nullptr; + int n = scandir("/system/usr/icu", &namelist, &__icu_dat_file_filter, alphasort); + int max_version = -1; + while (n--) { + // We prefer the latest version available. + int version = atoi(&namelist[n]->d_name[strlen("icudt")]); + if (version != 0 && version > max_version) max_version = version; + free(namelist[n]); + } + free(namelist); + + if (max_version == -1 || max_version < ICUDATA_VERSION_MIN) { + __libc_write_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't find an ICU .dat file"); + return false; + } + + snprintf(g_icudata_version, sizeof(g_icudata_version), "_%d", max_version); + + g_libicuuc_handle = dlopen("libicuuc.so", RTLD_LOCAL); + if (g_libicuuc_handle == nullptr) { + __libc_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't open libicuuc.so: %s", dlerror()); + return false; + } + + return true; +} + +void* __find_icu_symbol(const char* symbol_name) { + static bool found_icu = __find_icu(); + if (!found_icu) return nullptr; + + char versioned_symbol_name[strlen(symbol_name) + sizeof(g_icudata_version)]; + snprintf(versioned_symbol_name, sizeof(versioned_symbol_name), "%s%s", + symbol_name, g_icudata_version); + + void* symbol = dlsym(g_libicuuc_handle, versioned_symbol_name); + if (symbol == nullptr) { + __libc_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't find %s", versioned_symbol_name); + } + return symbol; +} diff --git a/libc/bionic/icu_static.cpp b/libc/bionic/icu_static.cpp new file mode 100644 index 000000000..cf24a381c --- /dev/null +++ b/libc/bionic/icu_static.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "private/icu.h" + +// We don't have dlopen/dlsym for static binaries yet. +void* __find_icu_symbol(const char*) { + return nullptr; +} diff --git a/libc/bionic/wctype.cpp b/libc/bionic/wctype.cpp index cd8c39b0b..64c346722 100644 --- a/libc/bionic/wctype.cpp +++ b/libc/bionic/wctype.cpp @@ -34,26 +34,53 @@ #include #include -// These functions are either defined to be the same as their ASCII cousins, -// or defined in terms of other functions. -int iswalnum(wint_t wc) { return iswdigit(wc) || iswalpha(wc); } -int iswblank(wint_t wc) { return isblank(wc); } -int iswdigit(wint_t wc) { return isdigit(wc); } -int iswgraph(wint_t wc) { return !iswspace(wc) && iswprint(wc); } -int iswlower(wint_t wc) { - return towlower(wc) == wc && !(iswcntrl(wc) || iswdigit(wc) || iswpunct(wc) || iswspace(wc)); +#include "private/icu.h" + +static constexpr int UCHAR_ALPHABETIC = 0; +static constexpr int UCHAR_LOWERCASE = 22; +static constexpr int UCHAR_POSIX_ALNUM = 44; +static constexpr int UCHAR_POSIX_BLANK = 45; +static constexpr int UCHAR_POSIX_GRAPH = 46; +static constexpr int UCHAR_POSIX_PRINT = 47; +static constexpr int UCHAR_POSIX_XDIGIT = 48; +static constexpr int UCHAR_UPPERCASE = 30; +static constexpr int UCHAR_WHITE_SPACE = 31; + +static constexpr int U_CONTROL_CHAR = 15; + +static bool __icu_hasBinaryProperty(wint_t wc, int property, int (*fallback)(int)) { + typedef int (*FnT)(wint_t, int); + static auto u_hasBinaryProperty = reinterpret_cast(__find_icu_symbol("u_hasBinaryProperty")); + return u_hasBinaryProperty ? u_hasBinaryProperty(wc, property) : fallback(wc); } -int iswupper(wint_t wc) { - return towupper(wc) == wc && !(iswcntrl(wc) || iswdigit(wc) || iswpunct(wc) || iswspace(wc)); + +int iswalnum(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_ALNUM, isalnum); } +int iswalpha(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_ALPHABETIC, isalpha); } +int iswblank(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_BLANK, isblank); } +int iswgraph(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_GRAPH, isgraph); } +int iswlower(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_LOWERCASE, islower); } +int iswprint(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_PRINT, isprint); } +int iswspace(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_WHITE_SPACE, isspace); } +int iswupper(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_UPPERCASE, isupper); } +int iswxdigit(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_XDIGIT, isxdigit); } + +int iswcntrl(wint_t wc) { + typedef int (*FnT)(wint_t); + static auto u_charType = reinterpret_cast(__find_icu_symbol("u_charType")); + return u_charType ? (u_charType(wc) == U_CONTROL_CHAR) : iscntrl(wc); +} + +int iswdigit(wint_t wc) { + typedef int (*FnT)(wint_t); + static auto u_isdigit = reinterpret_cast(__find_icu_symbol("u_isdigit")); + return u_isdigit ? u_isdigit(wc) : isdigit(wc); } -int iswxdigit(wint_t wc) { return isxdigit(wc); } -// TODO: need proper implementations of these. -int iswalpha(wint_t wc) { return isalpha(wc); } -int iswcntrl(wint_t wc) { return iscntrl(wc); } -int iswprint(wint_t wc) { return isprint(wc); } -int iswpunct(wint_t wc) { return ispunct(wc); } -int iswspace(wint_t wc) { return isspace(wc); } +int iswpunct(wint_t wc) { + typedef int (*FnT)(wint_t); + static auto u_ispunct = reinterpret_cast(__find_icu_symbol("u_ispunct")); + return u_ispunct ? u_ispunct(wc) : ispunct(wc); +} int iswalnum_l(wint_t c, locale_t) { return iswalnum(c); } int iswalpha_l(wint_t c, locale_t) { return iswalpha(c); } @@ -90,12 +117,20 @@ int iswctype_l(wint_t wc, wctype_t char_class, locale_t) { return iswctype(wc, char_class); } -// TODO: need proper implementations of these. -wint_t towlower(wint_t wc) { return tolower(wc); } -wint_t towupper(wint_t wc) { return toupper(wc); } +wint_t towlower(wint_t wc) { + typedef wchar_t (*FnT)(wchar_t); + static auto u_tolower = reinterpret_cast(__find_icu_symbol("u_tolower")); + return u_tolower ? u_tolower(wc) : tolower(wc); +} + +wint_t towupper(wint_t wc) { + typedef wchar_t (*FnT)(wchar_t); + static auto u_toupper = reinterpret_cast(__find_icu_symbol("u_toupper")); + return u_toupper ? u_toupper(wc) : toupper(wc); +} -wint_t towupper_l(int c, locale_t) { return towupper(c); } -wint_t towlower_l(int c, locale_t) { return towlower(c); } +wint_t towupper_l(wint_t c, locale_t) { return towupper(c); } +wint_t towlower_l(wint_t c, locale_t) { return towlower(c); } wctype_t wctype(const char* property) { static const char* const properties[WC_TYPE_MAX] = { diff --git a/libc/include/wctype.h b/libc/include/wctype.h index a0ed09af3..9e22a8f92 100644 --- a/libc/include/wctype.h +++ b/libc/include/wctype.h @@ -49,8 +49,8 @@ int iswspace_l(wint_t, locale_t) __INTRODUCED_IN(21); int iswupper_l(wint_t, locale_t) __INTRODUCED_IN(21); int iswxdigit_l(wint_t, locale_t) __INTRODUCED_IN(21); -wint_t towlower_l(int, locale_t) __INTRODUCED_IN(21); -wint_t towupper_l(int, locale_t) __INTRODUCED_IN(21); +wint_t towlower_l(wint_t, locale_t) __INTRODUCED_IN(21); +wint_t towupper_l(wint_t, locale_t) __INTRODUCED_IN(21); #else // Implemented as static inlines before 21. #endif diff --git a/libc/private/icu.h b/libc/private/icu.h new file mode 100644 index 000000000..c5d89b860 --- /dev/null +++ b/libc/private/icu.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _PRIVATE_ICU_H +#define _PRIVATE_ICU_H + +void* __find_icu_symbol(const char* symbol_name); + +#endif // _PRIVATE_ICU_H diff --git a/tests/Android.bp b/tests/Android.bp index 3e1e13b4c..643b295d1 100644 --- a/tests/Android.bp +++ b/tests/Android.bp @@ -385,6 +385,12 @@ cc_test { name: "bionic-unit-tests", defaults: ["bionic_unit_tests_defaults", "bionic_tests_defaults"], clang: true, + + target: { + android: { + shared_libs: ["libicuuc"], + }, + }, } cc_test { diff --git a/tests/wctype_test.cpp b/tests/wctype_test.cpp index fe2e374cb..84a0e656d 100644 --- a/tests/wctype_test.cpp +++ b/tests/wctype_test.cpp @@ -16,6 +16,8 @@ #include +#include + #include class UtfLocale { @@ -25,63 +27,75 @@ class UtfLocale { locale_t l; }; +// bionic's dlsym doesn't work in static binaries, so we can't access icu, +// so any unicode test case will fail. +static bool have_dl = (dlopen("libc.so", 0) != nullptr); + static void TestIsWideFn(int fn(wint_t), int fn_l(wint_t, locale_t), const wchar_t* trues, const wchar_t* falses) { UtfLocale l; for (const wchar_t* p = trues; *p; ++p) { + if (!have_dl && *p > 0x7f) { + GTEST_LOG_(INFO) << "skipping unicode test " << *p; + continue; + } EXPECT_TRUE(fn(*p)) << *p; EXPECT_TRUE(fn_l(*p, l.l)) << *p; } for (const wchar_t* p = falses; *p; ++p) { + if (!have_dl && *p > 0x7f) { + GTEST_LOG_(INFO) << "skipping unicode test " << *p; + continue; + } EXPECT_FALSE(fn(*p)) << *p; EXPECT_FALSE(fn_l(*p, l.l)) << *p; } } TEST(wctype, iswalnum) { - TestIsWideFn(iswalnum, iswalnum_l, L"1aA", L"! \b"); + TestIsWideFn(iswalnum, iswalnum_l, L"1aAÇçΔδ", L"! \b"); } TEST(wctype, iswalpha) { - TestIsWideFn(iswalpha, iswalpha_l, L"aA", L"1! \b"); + TestIsWideFn(iswalpha, iswalpha_l, L"aAÇçΔδ", L"1! \b"); } TEST(wctype, iswblank) { - TestIsWideFn(iswblank, iswblank_l, L" \t", L"1aA!\b"); + TestIsWideFn(iswblank, iswblank_l, L" \t", L"1aA!\bÇçΔδ"); } TEST(wctype, iswcntrl) { - TestIsWideFn(iswcntrl, iswcntrl_l, L"\b", L"1aA! "); + TestIsWideFn(iswcntrl, iswcntrl_l, L"\b\u009f", L"1aA! ÇçΔδ"); } TEST(wctype, iswdigit) { - TestIsWideFn(iswdigit, iswdigit_l, L"1", L"aA! \b"); + TestIsWideFn(iswdigit, iswdigit_l, L"1", L"aA! \bÇçΔδ"); } TEST(wctype, iswgraph) { - TestIsWideFn(iswgraph, iswgraph_l, L"1aA!", L" \b"); + TestIsWideFn(iswgraph, iswgraph_l, L"1aA!ÇçΔδ", L" \b"); } TEST(wctype, iswlower) { - TestIsWideFn(iswlower, iswlower_l, L"a", L"1A! \b"); + TestIsWideFn(iswlower, iswlower_l, L"açδ", L"1A! \bÇΔ"); } TEST(wctype, iswprint) { - TestIsWideFn(iswprint, iswprint_l, L"1aA! ", L"\b"); + TestIsWideFn(iswprint, iswprint_l, L"1aA! ÇçΔδ", L"\b"); } TEST(wctype, iswpunct) { - TestIsWideFn(iswpunct, iswpunct_l, L"!", L"1aA \b"); + TestIsWideFn(iswpunct, iswpunct_l, L"!", L"1aA \bÇçΔδ"); } TEST(wctype, iswspace) { - TestIsWideFn(iswspace, iswspace_l, L" \f\t", L"1aA!\b"); + TestIsWideFn(iswspace, iswspace_l, L" \f\t", L"1aA!\bÇçΔδ"); } TEST(wctype, iswupper) { - TestIsWideFn(iswupper, iswupper_l, L"A", L"1a! \b"); + TestIsWideFn(iswupper, iswupper_l, L"AÇΔ", L"1a! \bçδ"); } TEST(wctype, iswxdigit) { @@ -89,29 +103,65 @@ TEST(wctype, iswxdigit) { } TEST(wctype, towlower) { + EXPECT_EQ(WEOF, towlower(WEOF)); EXPECT_EQ(wint_t('!'), towlower(L'!')); EXPECT_EQ(wint_t('a'), towlower(L'a')); EXPECT_EQ(wint_t('a'), towlower(L'A')); + if (have_dl) { + EXPECT_EQ(wint_t(L'ç'), towlower(L'ç')); + EXPECT_EQ(wint_t(L'ç'), towlower(L'Ç')); + EXPECT_EQ(wint_t(L'δ'), towlower(L'δ')); + EXPECT_EQ(wint_t(L'δ'), towlower(L'Δ')); + } else { + GTEST_LOG_(INFO) << "skipping unicode towlower tests"; + } } TEST(wctype, towlower_l) { UtfLocale l; + EXPECT_EQ(WEOF, towlower(WEOF)); EXPECT_EQ(wint_t('!'), towlower_l(L'!', l.l)); EXPECT_EQ(wint_t('a'), towlower_l(L'a', l.l)); EXPECT_EQ(wint_t('a'), towlower_l(L'A', l.l)); + if (have_dl) { + EXPECT_EQ(wint_t(L'ç'), towlower_l(L'ç', l.l)); + EXPECT_EQ(wint_t(L'ç'), towlower_l(L'Ç', l.l)); + EXPECT_EQ(wint_t(L'δ'), towlower_l(L'δ', l.l)); + EXPECT_EQ(wint_t(L'δ'), towlower_l(L'Δ', l.l)); + } else { + GTEST_LOG_(INFO) << "skipping unicode towlower_l tests"; + } } TEST(wctype, towupper) { + EXPECT_EQ(WEOF, towupper(WEOF)); EXPECT_EQ(wint_t('!'), towupper(L'!')); EXPECT_EQ(wint_t('A'), towupper(L'a')); EXPECT_EQ(wint_t('A'), towupper(L'A')); + if (have_dl) { + EXPECT_EQ(wint_t(L'Ç'), towupper(L'ç')); + EXPECT_EQ(wint_t(L'Ç'), towupper(L'Ç')); + EXPECT_EQ(wint_t(L'Δ'), towupper(L'δ')); + EXPECT_EQ(wint_t(L'Δ'), towupper(L'Δ')); + } else { + GTEST_LOG_(INFO) << "skipping unicode towupper tests"; + } } TEST(wctype, towupper_l) { UtfLocale l; + EXPECT_EQ(WEOF, towupper_l(WEOF, l.l)); EXPECT_EQ(wint_t('!'), towupper_l(L'!', l.l)); EXPECT_EQ(wint_t('A'), towupper_l(L'a', l.l)); EXPECT_EQ(wint_t('A'), towupper_l(L'A', l.l)); + if (have_dl) { + EXPECT_EQ(wint_t(L'Ç'), towupper_l(L'ç', l.l)); + EXPECT_EQ(wint_t(L'Ç'), towupper_l(L'Ç', l.l)); + EXPECT_EQ(wint_t(L'Δ'), towupper_l(L'δ', l.l)); + EXPECT_EQ(wint_t(L'Δ'), towupper_l(L'Δ', l.l)); + } else { + GTEST_LOG_(INFO) << "skipping unicode towupper_l tests"; + } } TEST(wctype, wctype) {