From f42d4247ae1138c6deed50f92dcd1a4f34e07dec Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Tue, 10 Sep 2013 19:45:51 +0000 Subject: [PATCH] Add getenv() wrapper that works on multibyte environment variable. On Windows, character encoding of multibyte environment variable varies depending on settings. The only reliable way to handle it I think is to use GetEnvironmentVariableW(). GetEnvironmentVariableW() works on wchar_t string, which is on Windows UTF16 string. That's not ideal because we use UTF-8 as the internal encoding in LLVM. This patch defines a wrapper function which takes and returns UTF-8 string for GetEnvironmentVariableW(). The wrapper function does not do any conversion and just forwards the argument to getenv() on Unix. Differential Revision: http://llvm-reviews.chandlerc.com/D1612 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190423 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Process.h | 7 +++ lib/Support/Unix/Process.inc | 9 +++ lib/Support/Windows/Path.inc | 117 ++++++++++++++++++++------------------ lib/Support/Windows/Process.inc | 30 ++++++++++ lib/Support/Windows/Windows.h | 13 +++++ unittests/Support/ProcessTest.cpp | 28 +++++++++ 6 files changed, 148 insertions(+), 56 deletions(-) diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h index a23c4add429..6d6add0f3e4 100644 --- a/include/llvm/Support/Process.h +++ b/include/llvm/Support/Process.h @@ -25,11 +25,14 @@ #ifndef LLVM_SUPPORT_PROCESS_H #define LLVM_SUPPORT_PROCESS_H +#include "llvm/ADT/Optional.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/TimeValue.h" namespace llvm { +class StringRef; + namespace sys { class self_process; @@ -161,6 +164,10 @@ public: /// @brief Prevent core file generation. static void PreventCoreFiles(); + // This function returns the environment variable \arg name's value as a UTF-8 + // string. \arg Name is assumed to be in UTF-8 encoding too. + static Optional GetEnv(StringRef name); + /// This function determines if the standard input is connected directly /// to a user's input (keyboard probably), rather than coming from a file /// or pipe. diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 47d0a3c794d..7d8f6250136 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -13,6 +13,7 @@ #include "Unix.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Support/TimeValue.h" @@ -181,6 +182,14 @@ void Process::PreventCoreFiles() { #endif } +Optional Process::GetEnv(StringRef Name) { + std::string NameStr = Name.str(); + const char *Val = ::getenv(NameStr.c_str()); + if (!Val) + return None; + return std::string(Val); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(STDIN_FILENO); } diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 645d5238dbc..94a501b39ac 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -37,6 +37,9 @@ typedef int errno_t; using namespace llvm; +using llvm::sys::windows::UTF8ToUTF16; +using llvm::sys::windows::UTF16ToUTF8; + namespace { typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)( /*__in*/ LPCWSTR lpSymlinkFileName, @@ -47,61 +50,6 @@ namespace { ::GetProcAddress(::GetModuleHandleA("kernel32.dll"), "CreateSymbolicLinkW")); - error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl &utf16) { - int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, - utf8.begin(), utf8.size(), - utf16.begin(), 0); - - if (len == 0) - return windows_error(::GetLastError()); - - utf16.reserve(len + 1); - utf16.set_size(len); - - len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, - utf8.begin(), utf8.size(), - utf16.begin(), utf16.size()); - - if (len == 0) - return windows_error(::GetLastError()); - - // Make utf16 null terminated. - utf16.push_back(0); - utf16.pop_back(); - - return error_code::success(); - } - - error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, - SmallVectorImpl &utf8) { - // Get length. - int len = ::WideCharToMultiByte(CP_UTF8, 0, - utf16, utf16_len, - utf8.begin(), 0, - NULL, NULL); - - if (len == 0) - return windows_error(::GetLastError()); - - utf8.reserve(len); - utf8.set_size(len); - - // Now do the actual conversion. - len = ::WideCharToMultiByte(CP_UTF8, 0, - utf16, utf16_len, - utf8.data(), utf8.size(), - NULL, NULL); - - if (len == 0) - return windows_error(::GetLastError()); - - // Make utf8 null terminated. - utf8.push_back(0); - utf8.pop_back(); - - return error_code::success(); - } - error_code TempDir(SmallVectorImpl &result) { retry_temp_dir: DWORD len = ::GetTempPathW(result.capacity(), result.begin()); @@ -1092,7 +1040,64 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD, ResultFD = FD; return error_code::success(); } - } // end namespace fs + +namespace windows { +llvm::error_code UTF8ToUTF16(llvm::StringRef utf8, + llvm::SmallVectorImpl &utf16) { + int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8.begin(), utf8.size(), + utf16.begin(), 0); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + utf16.reserve(len + 1); + utf16.set_size(len); + + len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8.begin(), utf8.size(), + utf16.begin(), utf16.size()); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + // Make utf16 null terminated. + utf16.push_back(0); + utf16.pop_back(); + + return llvm::error_code::success(); +} + +llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl &utf8) { + // Get length. + int len = ::WideCharToMultiByte(CP_UTF8, 0, + utf16, utf16_len, + utf8.begin(), 0, + NULL, NULL); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + utf8.reserve(len); + utf8.set_size(len); + + // Now do the actual conversion. + len = ::WideCharToMultiByte(CP_UTF8, 0, + utf16, utf16_len, + utf8.data(), utf8.size(), + NULL, NULL); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + // Make utf8 null terminated. + utf8.push_back(0); + utf8.pop_back(); + + return llvm::error_code::success(); +} +} // end namespace windows } // end namespace sys } // end namespace llvm diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index f840d064d8d..0191751a824 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -140,6 +140,36 @@ void Process::PreventCoreFiles() { SEM_NOOPENFILEERRORBOX); } +/// Returns the environment variable \arg Name's value as a string encoded in +/// UTF-8. \arg Name is assumed to be in UTF-8 encoding. +Optional Process::GetEnv(StringRef Name) { + // Convert the argument to UTF-16 to pass it to _wgetenv(). + SmallVector NameUTF16; + if (error_code ec = windows::UTF8ToUTF16(Name, NameUTF16)) + return None; + + // Environment variable can be encoded in non-UTF8 encoding, and there's no + // way to know what the encoding is. The only reliable way to look up + // multibyte environment variable is to use GetEnvironmentVariableW(). + std::vector Buf(16); + size_t Size = 0; + for (;;) { + Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size()); + if (Size < Buf.size()) + break; + // Try again with larger buffer. + Buf.resize(Size + 1); + } + if (Size == 0) + return None; + + // Convert the result from UTF-16 to UTF-8. + SmallVector Res; + if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res)) + return None; + return std::string(&Res[0]); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(0); } diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h index 4cdac788a0e..1236fe56521 100644 --- a/lib/Support/Windows/Windows.h +++ b/lib/Support/Windows/Windows.h @@ -24,13 +24,17 @@ #define _WIN32_IE 0x0600 // MinGW at it again. #define WIN32_LEAN_AND_MEAN +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" // Get build system configuration settings #include "llvm/Support/Compiler.h" +#include "llvm/Support/system_error.h" #include #include #include #include #include +#include inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { if (!ErrMsg) @@ -148,4 +152,13 @@ c_str(SmallVectorImpl &str) { str.pop_back(); return str.data(); } + +namespace sys { +namespace windows { +error_code UTF8ToUTF16(StringRef utf8, + SmallVectorImpl &utf16); +error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + SmallVectorImpl &utf8); +} // end namespace windows +} // end namespace sys } // end namespace llvm. diff --git a/unittests/Support/ProcessTest.cpp b/unittests/Support/ProcessTest.cpp index eff9c711a1a..ac1b01e88bf 100644 --- a/unittests/Support/ProcessTest.cpp +++ b/unittests/Support/ProcessTest.cpp @@ -39,4 +39,32 @@ TEST(ProcessTest, SelfProcess) { EXPECT_GT(TimeValue::MaxTime, process::get_self()->get_wall_time()); } +#ifdef LLVM_ON_WIN32 +#define setenv(name, var, ignore) _putenv_s(name, var) +#endif + +#if HAVE_SETENV || defined(LLVM_ON_WIN32) +TEST(ProcessTest, Basic) { + setenv("__LLVM_TEST_ENVIRON_VAR__", "abc", true); + Optional val(Process::GetEnv("__LLVM_TEST_ENVIRON_VAR__")); + EXPECT_TRUE(val.hasValue()); + EXPECT_STREQ("abc", val->c_str()); +} + +TEST(ProcessTest, None) { + Optional val( + Process::GetEnv("__LLVM_TEST_ENVIRON_NO_SUCH_VAR__")); + EXPECT_FALSE(val.hasValue()); +} +#endif + +#ifdef LLVM_ON_WIN32 +TEST(ProcessTest, Wchar) { + SetEnvironmentVariableW(L"__LLVM_TEST_ENVIRON_VAR__", L"abcdefghijklmnopqrs"); + Optional val(Process::GetEnv("__LLVM_TEST_ENVIRON_VAR__")); + EXPECT_TRUE(val.hasValue()); + EXPECT_STREQ("abcdefghijklmnopqrs", val->c_str()); +} +#endif + } // end anonymous namespace -- 2.11.0