From f8a29b174a965acb942dd3ef5f8ef2c32620777b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 17 Jul 2020 09:18:24 -0700 Subject: [PATCH] [OptTable] Support grouped short options POSIX.1-2017 12.2 Utility Syntax Guidelines, Guideline 5 says: > One or more options without option-arguments, followed by at most one option that takes an option-argument, should be accepted when grouped behind one '-' delimiter. i.e. -abc represents -a -b -c. The grouped short options are very common. Many utilities extend the syntax by allowing (an option with an argument) following a sequence of short options. This patch adds the support to OptTable, similar to cl::Group for CommandLine (D58711). llvm-symbolizer will use the feature (D83530). CommandLine is exotic in some aspects. OptTable is preferred if the user wants to get rid of the behaviors. * `cl::opt i(...)` can be disabled via -i=false or -i=0, which is different from conventional --no-i. * Handling --foo & --no-foo requires a comparison of argument positions, which is a bit clumsy in user code. OptTable::parseOneArg (non-const reference InputArgList) is added along with ParseOneArg (const ArgList &). The duplicate does not look great at first glance. However, The implementation can be simpler if ArgList is mutable. (ParseOneArg is used by clang-cl (FlagsToInclude/FlagsToExclude) and lld COFF (case-insensitive). Adding grouped short options can make the function even more complex.) The implementation allows a long option following a group of short options. We probably should refine the code to disallow this in the future. Allowing this seems benign for now. Reviewed By: grimar, jhenderson Differential Revision: https://reviews.llvm.org/D83639 --- llvm/include/llvm/Option/ArgList.h | 4 ++ llvm/include/llvm/Option/OptTable.h | 6 +++ llvm/include/llvm/Option/Option.h | 14 ++++--- llvm/lib/Option/OptTable.cpp | 64 +++++++++++++++++++++++++++-- llvm/lib/Option/Option.cpp | 15 +++---- llvm/unittests/Option/OptionParsingTest.cpp | 44 ++++++++++++++++++++ llvm/unittests/Option/Opts.td | 1 + 7 files changed, 132 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 74bfadcba72..9ce78397818 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -412,6 +412,10 @@ public: return ArgStrings[Index]; } + void replaceArgString(unsigned Index, const Twine &S) { + ArgStrings[Index] = MakeArgString(S); + } + unsigned getNumInputArgStrings() const override { return NumInputArgStrings; } diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index 5db30436069..b9984bed55a 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -59,6 +59,7 @@ private: /// The option information table. std::vector OptionInfos; bool IgnoreCase; + bool GroupedShortOptions = false; unsigned TheInputOptionID = 0; unsigned TheUnknownOptionID = 0; @@ -79,6 +80,8 @@ private: return OptionInfos[id - 1]; } + Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const; + protected: OptTable(ArrayRef OptionInfos, bool IgnoreCase = false); @@ -120,6 +123,9 @@ public: return getInfo(id).MetaVar; } + /// Support grouped short options. e.g. -ab represents -a -b. + void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; } + /// Find possible value for given flags. This is used for shell /// autocompletion. /// diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h index 73ee8e0073b..196cf656355 100644 --- a/llvm/include/llvm/Option/Option.h +++ b/llvm/include/llvm/Option/Option.h @@ -213,14 +213,16 @@ public: /// Index to the position where argument parsing should resume /// (even if the argument is missing values). /// - /// \param ArgSize The number of bytes taken up by the matched Option prefix - /// and name. This is used to determine where joined values - /// start. - Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const; + /// \p CurArg The argument to be matched. It may be shorter than the + /// underlying storage to represent a Joined argument. + /// \p GroupedShortOption If true, we are handling the fallback case of + /// parsing a prefix of the current argument as a short option. + Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption, + unsigned &Index) const; private: - Arg *acceptInternal(const ArgList &Args, unsigned &Index, - unsigned ArgSize) const; + Arg *acceptInternal(const ArgList &Args, StringRef CurArg, + unsigned &Index) const; public: void print(raw_ostream &O) const; diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 926eb8e0437..16404d3d810 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -330,6 +330,60 @@ bool OptTable::addValues(const char *Option, const char *Values) { return false; } +// Parse a single argument, return the new argument, and update Index. If +// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will +// be updated to "-bc". This overload does not support +// FlagsToInclude/FlagsToExclude or case insensitive options. +Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const { + // Anything that doesn't start with PrefixesUnion is an input, as is '-' + // itself. + const char *CStr = Args.getArgString(Index); + StringRef Str(CStr); + if (isInput(PrefixesUnion, Str)) + return new Arg(getOption(TheInputOptionID), Str, Index++, CStr); + + const Info *End = OptionInfos.data() + OptionInfos.size(); + StringRef Name = Str.ltrim(PrefixChars); + const Info *Start = std::lower_bound( + OptionInfos.data() + FirstSearchableIndex, End, Name.data()); + const Info *Fallback = nullptr; + unsigned Prev = Index; + + // Search for the option which matches Str. + for (; Start != End; ++Start) { + unsigned ArgSize = matchOption(Start, Str, IgnoreCase); + if (!ArgSize) + continue; + + Option Opt(Start, this); + if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize), + false, Index)) + return A; + + // If Opt is a Flag of length 2 (e.g. "-a"), we know it is a prefix of + // the current argument (e.g. "-abc"). Match it as a fallback if no longer + // option (e.g. "-ab") exists. + if (ArgSize == 2 && Opt.getKind() == Option::FlagClass) + Fallback = Start; + + // Otherwise, see if the argument is missing. + if (Prev != Index) + return nullptr; + } + if (Fallback) { + Option Opt(Fallback, this); + if (Arg *A = Opt.accept(Args, Str.substr(0, 2), true, Index)) { + if (Str.size() == 2) + ++Index; + else + Args.replaceArgString(Index, Twine('-') + Str.substr(2)); + return A; + } + } + + return new Arg(getOption(TheUnknownOptionID), Str, Index++, CStr); +} + Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, unsigned FlagsToInclude, unsigned FlagsToExclude) const { @@ -373,7 +427,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, continue; // See if this option matches. - if (Arg *A = Opt.accept(Args, Index, ArgSize)) + if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize), + false, Index)) return A; // Otherwise, see if this argument was missing values. @@ -414,8 +469,11 @@ InputArgList OptTable::ParseArgs(ArrayRef ArgArr, } unsigned Prev = Index; - Arg *A = ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude); - assert(Index > Prev && "Parser failed to consume argument."); + Arg *A = GroupedShortOptions + ? parseOneArgGrouped(Args, Index) + : ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude); + assert((Index > Prev || GroupedShortOptions) && + "Parser failed to consume argument."); // Check for missing argument error. if (!A) { diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp index 9abc9fdce4c..68d074b2702 100644 --- a/llvm/lib/Option/Option.cpp +++ b/llvm/lib/Option/Option.cpp @@ -106,9 +106,9 @@ bool Option::matches(OptSpecifier Opt) const { return false; } -Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index, - unsigned ArgSize) const { - StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize); +Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling, + unsigned &Index) const { + size_t ArgSize = Spelling.size(); switch (getKind()) { case FlagClass: { if (ArgSize != strlen(Args.getArgString(Index))) @@ -230,10 +230,11 @@ Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index, } } -Arg *Option::accept(const ArgList &Args, - unsigned &Index, - unsigned ArgSize) const { - std::unique_ptr A(acceptInternal(Args, Index, ArgSize)); +Arg *Option::accept(const ArgList &Args, StringRef CurArg, + bool GroupedShortOption, unsigned &Index) const { + std::unique_ptr A(GroupedShortOption && getKind() == FlagClass + ? new Arg(*this, CurArg, Index) + : acceptInternal(Args, CurArg, Index)); if (!A) return nullptr; diff --git a/llvm/unittests/Option/OptionParsingTest.cpp b/llvm/unittests/Option/OptionParsingTest.cpp index e1d7a473ee7..feeb3b7866c 100644 --- a/llvm/unittests/Option/OptionParsingTest.cpp +++ b/llvm/unittests/Option/OptionParsingTest.cpp @@ -332,3 +332,47 @@ TEST(DISABLED_Option, FindNearestFIXME) { EXPECT_EQ(Nearest, "--ermghFoo"); } + +TEST(Option, ParseGroupedShortOptions) { + TestOptTable T; + T.setGroupedShortOptions(true); + unsigned MAI, MAC; + + // Grouped short options can be followed by a long Flag (-Joo), or a non-Flag + // option (-C=1). + const char *Args1[] = {"-AIJ", "-AIJoo", "-AC=1"}; + InputArgList AL = T.ParseArgs(Args1, MAI, MAC); + EXPECT_TRUE(AL.hasArg(OPT_A)); + EXPECT_TRUE(AL.hasArg(OPT_H)); + ASSERT_EQ((size_t)2, AL.getAllArgValues(OPT_B).size()); + EXPECT_EQ("foo", AL.getAllArgValues(OPT_B)[0]); + EXPECT_EQ("bar", AL.getAllArgValues(OPT_B)[1]); + ASSERT_TRUE(AL.hasArg(OPT_C)); + EXPECT_EQ("1", AL.getAllArgValues(OPT_C)[0]); + + // Prefer a long option to a short option. + const char *Args2[] = {"-AB"}; + InputArgList AL2 = T.ParseArgs(Args2, MAI, MAC); + EXPECT_TRUE(!AL2.hasArg(OPT_A)); + EXPECT_TRUE(AL2.hasArg(OPT_AB)); + + // Short options followed by a long option. We probably should disallow this. + const char *Args3[] = {"-AIblorp"}; + InputArgList AL3 = T.ParseArgs(Args3, MAI, MAC); + EXPECT_TRUE(AL3.hasArg(OPT_A)); + EXPECT_TRUE(AL3.hasArg(OPT_Blorp)); +} + +TEST(Option, UnknownOptions) { + TestOptTable T; + unsigned MAI, MAC; + const char *Args[] = {"-u", "--long", "0"}; + for (int I = 0; I < 2; ++I) { + T.setGroupedShortOptions(I != 0); + InputArgList AL = T.ParseArgs(Args, MAI, MAC); + const std::vector Unknown = AL.getAllArgValues(OPT_UNKNOWN); + ASSERT_EQ((size_t)2, Unknown.size()); + EXPECT_EQ("-u", Unknown[0]); + EXPECT_EQ("--long", Unknown[1]); + } +} diff --git a/llvm/unittests/Option/Opts.td b/llvm/unittests/Option/Opts.td index 70920a6a76b..e1ebffd1881 100644 --- a/llvm/unittests/Option/Opts.td +++ b/llvm/unittests/Option/Opts.td @@ -5,6 +5,7 @@ def OptFlag2 : OptionFlag; def OptFlag3 : OptionFlag; def A : Flag<["-"], "A">, HelpText<"The A option">, Flags<[OptFlag1]>; +def AB : Flag<["-"], "AB">; def B : Joined<["-"], "B">, HelpText<"The B option">, MetaVarName<"B">, Flags<[OptFlag2]>; def C : Separate<["-"], "C">, HelpText<"The C option">, MetaVarName<"C">, Flags<[OptFlag1]>; def SLASH_C : Separate<["/", "-"], "C">, HelpText<"The C option">, MetaVarName<"C">, Flags<[OptFlag3]>; -- 2.11.0