From 432c850773568d9cbd4c05ef94749a4b96b15b96 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Sun, 10 Jun 2018 02:46:11 +0000 Subject: [PATCH] Resubmit "[Support] Expose flattenWindowsCommandLine." There were a few linux compilation failures, but other than that I think this was just a flake that caused the tests to fail. I'm going to resubmit and see if the failures go away, if not I'll revert again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334355 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/Program.h | 13 +++ lib/Support/Program.cpp | 9 ++ lib/Support/Unix/Program.inc | 10 +-- lib/Support/Windows/Path.inc | 2 +- lib/Support/Windows/Program.inc | 187 ++++++++++++++++------------------------ 5 files changed, 99 insertions(+), 122 deletions(-) diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h index 381b0d4902d..e64bfb2948a 100644 --- a/include/llvm/Support/Program.h +++ b/include/llvm/Support/Program.h @@ -133,6 +133,11 @@ namespace sys { /// Return true if the given arguments fit within system-specific /// argument length limits. bool commandLineFitsWithinSystemLimits(StringRef Program, + ArrayRef Args); + + /// Return true if the given arguments fit within system-specific + /// argument length limits. + bool commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef Args); /// File encoding options when writing contents that a non-UTF8 tool will @@ -189,6 +194,14 @@ namespace sys { ///< string is non-empty upon return an error occurred while invoking the ///< program. ); + +#if defined(_WIN32) + /// Given a list of command line arguments, quote and escape them as necessary + /// to build a single flat command line appropriate for calling CreateProcess + /// on + /// Windows. + std::string flattenWindowsCommandLine(ArrayRef Args); +#endif } } diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp index 8484b46e522..8c56fb6db2d 100644 --- a/lib/Support/Program.cpp +++ b/lib/Support/Program.cpp @@ -63,6 +63,15 @@ ProcessInfo sys::ExecuteNoWait(StringRef Program, const char **Args, return PI; } +bool sys::commandLineFitsWithinSystemLimits(StringRef Program, + ArrayRef Args) { + SmallVector StringRefArgs; + StringRefArgs.reserve(Args.size()); + for (const char *A : Args) + StringRefArgs.emplace_back(A); + return commandLineFitsWithinSystemLimits(Program, StringRefArgs); +} + // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Program.inc" diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 04840bf4448..be971555b82 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -434,7 +434,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, } bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, - ArrayRef Args) { + ArrayRef Args) { static long ArgMax = sysconf(_SC_ARG_MAX); // POSIX requires that _POSIX_ARG_MAX is 4096, which is the lowest possible // value for ARG_MAX on a POSIX compliant system. @@ -456,18 +456,16 @@ bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, long HalfArgMax = EffectiveArgMax / 2; size_t ArgLength = Program.size() + 1; - for (const char* Arg : Args) { - size_t length = strlen(Arg); - + for (StringRef Arg : Args) { // Ensure that we do not exceed the MAX_ARG_STRLEN constant on Linux, which // does not have a constant unlike what the man pages would have you // believe. Since this limit is pretty high, perform the check // unconditionally rather than trying to be aggressive and limiting it to // Linux only. - if (length >= (32 * 4096)) + if (Arg.size() >= (32 * 4096)) return false; - ArgLength += length + 1; + ArgLength += Arg.size() + 1; if (ArgLength > size_t(HalfArgMax)) { return false; } diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 9173206d816..7ef7468867d 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -1197,7 +1197,7 @@ Expected openNativeFileForRead(const Twine &Name, OpenFlags Flags, if (Result && RealPath) realPathFromHandle(*Result, *RealPath); - return std::move(Result); + return Result; } void closeFile(file_t &F) { diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index 183b66ce2c0..ef1da602897 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -23,6 +23,7 @@ #include #include #include +#include //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code @@ -31,7 +32,7 @@ namespace llvm { -ProcessInfo::ProcessInfo() : Process(0), Pid(0), ReturnCode(0) {} +ProcessInfo::ProcessInfo() : Pid(0), Process(0), ReturnCode(0) {} ErrorOr sys::findProgramByName(StringRef Name, ArrayRef Paths) { @@ -146,108 +147,13 @@ static HANDLE RedirectIO(Optional Path, int fd, return h; } -/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling -/// CreateProcess. -static bool ArgNeedsQuotes(const char *Str) { - return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0; } -/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur -/// in the C string Start. -static unsigned int CountPrecedingBackslashes(const char *Start, - const char *Cur) { - unsigned int Count = 0; - --Cur; - while (Cur >= Start && *Cur == '\\') { - ++Count; - --Cur; - } - return Count; -} - -/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash -/// preceding Cur in the Start string. Assumes Dst has enough space. -static char *EscapePrecedingEscapes(char *Dst, const char *Start, - const char *Cur) { - unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur); - while (PrecedingEscapes > 0) { - *Dst++ = '\\'; - --PrecedingEscapes; - } - return Dst; -} - -/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling -/// CreateProcess and returns length of quoted arg with escaped quotes -static unsigned int ArgLenWithQuotes(const char *Str) { - const char *Start = Str; - bool Quoted = ArgNeedsQuotes(Str); - unsigned int len = Quoted ? 2 : 0; - - while (*Str != '\0') { - if (*Str == '\"') { - // We need to add a backslash, but ensure that it isn't escaped. - unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str); - len += PrecedingEscapes + 1; - } - // Note that we *don't* need to escape runs of backslashes that don't - // precede a double quote! See MSDN: - // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx - - ++len; - ++Str; - } - - if (Quoted) { - // Make sure the closing quote doesn't get escaped by a trailing backslash. - unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str); - len += PrecedingEscapes + 1; - } - - return len; -} - -} - -static std::unique_ptr flattenArgs(const char **Args) { - // First, determine the length of the command line. - unsigned len = 0; - for (unsigned i = 0; Args[i]; i++) { - len += ArgLenWithQuotes(Args[i]) + 1; - } - - // Now build the command line. - std::unique_ptr command(new char[len+1]); - char *p = command.get(); - - for (unsigned i = 0; Args[i]; i++) { - const char *arg = Args[i]; - const char *start = arg; - - bool needsQuoting = ArgNeedsQuotes(arg); - if (needsQuoting) - *p++ = '"'; - - while (*arg != '\0') { - if (*arg == '\"') { - // Escape all preceding escapes (if any), and then escape the quote. - p = EscapePrecedingEscapes(p, start, arg); - *p++ = '\\'; - } - - *p++ = *arg++; - } - - if (needsQuoting) { - // Make sure our quote doesn't get escaped by a trailing backslash. - p = EscapePrecedingEscapes(p, start, arg); - *p++ = '"'; - } - *p++ = ' '; - } - - *p = 0; - return command; +static SmallVector buildArgVector(const char **Args) { + SmallVector Result; + for (unsigned I = 0; Args[I]; ++I) + Result.push_back(StringRef(Args[I])); + return Result; } static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, @@ -270,7 +176,8 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, // Windows wants a command line, not an array of args, to pass to the new // process. We have to concatenate them all, while quoting the args that // have embedded spaces (or are empty). - std::unique_ptr command = flattenArgs(Args); + auto ArgVector = buildArgVector(Args); + std::string Command = flattenWindowsCommandLine(ArgVector); // The pointer to the environment block for the new process. std::vector EnvBlock; @@ -353,7 +260,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, } SmallVector CommandUtf16; - if (std::error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) { + if (std::error_code ec = windows::UTF8ToUTF16(Command, CommandUtf16)) { SetLastError(ec.value()); MakeErrMsg(ErrMsg, std::string("Unable to convert command-line to UTF-16")); @@ -414,7 +321,63 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **Args, return true; } +static bool argNeedsQuotes(StringRef Arg) { + if (Arg.empty()) + return true; + return StringRef::npos != Arg.find_first_of(" \t\n\v\""); +} + +static std::string quoteSingleArg(StringRef Arg) { + std::string Result; + Result.push_back('"'); + + while (!Arg.empty()) { + size_t FirstNonBackslash = Arg.find_first_not_of('\\'); + size_t BackslashCount = FirstNonBackslash; + if (FirstNonBackslash == StringRef::npos) { + // The entire remainder of the argument is backslashes. Escape all of + // them and just early out. + BackslashCount = Arg.size(); + Result.append(BackslashCount * 2, '\\'); + break; + } + + if (Arg[FirstNonBackslash] == '\"') { + // This is an embedded quote. Escape all preceding backslashes, then + // add one additional backslash to escape the quote. + Result.append(BackslashCount * 2 + 1, '\\'); + Result.push_back('\"'); + } else { + // This is just a normal character. Don't escape any of the preceding + // backslashes, just append them as they are and then append the + // character. + Result.append(BackslashCount, '\\'); + Result.push_back(Arg[FirstNonBackslash]); + } + + // Drop all the backslashes, plus the following character. + Arg = Arg.drop_front(FirstNonBackslash + 1); + } + + Result.push_back('"'); + return Result; +} + namespace llvm { +std::string sys::flattenWindowsCommandLine(ArrayRef Args) { + std::string Command; + for (StringRef Arg : Args) { + if (argNeedsQuotes(Arg)) + Command += quoteSingleArg(Arg); + else + Command += Arg; + + Command.push_back(' '); + } + + return Command; +} + ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilChildTerminates, std::string *ErrMsg) { assert(PI.Pid && "invalid pid to wait on, process not started?"); @@ -537,19 +500,13 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, } bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, - ArrayRef Args) { + ArrayRef Args) { // The documented max length of the command line passed to CreateProcess. static const size_t MaxCommandStringLength = 32768; - // Account for the trailing space for the program path and the - // trailing NULL of the last argument. - size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2; - for (const char* Arg : Args) { - // Account for the trailing space for every arg - ArgLength += ArgLenWithQuotes(Arg) + 1; - if (ArgLength > MaxCommandStringLength) { - return false; - } - } - return true; + SmallVector FullArgs; + FullArgs.push_back(Program); + FullArgs.append(Args.begin(), Args.end()); + std::string Result = flattenWindowsCommandLine(FullArgs); + return (Result.size() + 1) <= MaxCommandStringLength; } } -- 2.11.0