From 217dc082d5cc2af1cc7c544f51ef15b4abe5be8b Mon Sep 17 00:00:00 2001 From: Jim Stichnoth Date: Fri, 11 Jul 2014 14:06:55 -0700 Subject: [PATCH] Subzero: Deal with substitutions in the primitive remangler. https://refspecs.linuxbase.org/cxxabi-1.75.html#mangling-compression describes the mechanism for compressing mangled strings by using substitutions of the form S[0-9A-Z]*_ to represent repeated components. When the prefix is handled as wrapping inside a namespace, the base-36 substitution numbers all have to be incremented. This is implemented in a very simple way by scanning the string only for instances of the substitution pattern. Unfortunately, false matches are possible because the S[0-9A-Z]*_ pattern can be a substring of the type name, or can span other components of the mangled name. Getting this completely right would essentially require a full demangling parser - see the ~4000 lines of code in cxa_demangle.cpp and ItaniumMangle.cpp. Since this is just for testing, any false matches will likely cause a linking error and the test can be rewritten to avoid false matches. BUG= none R=jvoung@chromium.org Review URL: https://codereview.chromium.org/385273002 --- src/IceGlobalContext.cpp | 98 ++++++++++++++++++++++++++++++++++++-- src/IceGlobalContext.h | 4 ++ tests_lit/llvm2ice_tests/mangle.ll | 23 +++++++++ 3 files changed, 120 insertions(+), 5 deletions(-) diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp index d2940807a..50ac5bd80 100644 --- a/src/IceGlobalContext.cpp +++ b/src/IceGlobalContext.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include // isdigit() +#include // isdigit(), isupper() #include "IceDefs.h" #include "IceTypes.h" @@ -120,6 +120,92 @@ GlobalContext::GlobalContext(llvm::raw_ostream *OsDump, ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt), TestPrefix(TestPrefix), HasEmittedFirstMethod(false) {} +// Scan a string for S[0-9A-Z]*_ patterns and replace them with +// S_ where is the next base-36 value. If a type name +// legitimately contains that pattern, then the substitution will be +// made in error and most likely the link will fail. In this case, +// the test classes can be rewritten not to use that pattern, which is +// much simpler and more reliable than implementing a full demangling +// parser. Another substitution-in-error may occur if a type +// identifier ends with the pattern S[0-9A-Z]*, because an immediately +// following substitution string like "S1_" or "PS1_" may be combined +// with the previous type. +void GlobalContext::incrementSubstitutions(ManglerVector &OldName) const { + const std::locale CLocale("C"); + // Provide extra space in case the length of increases. + ManglerVector NewName(OldName.size() * 2); + size_t OldPos = 0; + size_t NewPos = 0; + size_t OldLen = OldName.size(); + for (; OldPos < OldLen; ++OldPos, ++NewPos) { + if (OldName[OldPos] == '\0') + break; + if (OldName[OldPos] == 'S') { + // Search forward until we find _ or invalid character (including \0). + bool AllZs = true; + bool Found = false; + size_t Last; + for (Last = OldPos + 1; Last < OldLen; ++Last) { + char Ch = OldName[Last]; + if (Ch == '_') { + Found = true; + break; + } else if (std::isdigit(Ch) || std::isupper(Ch, CLocale)) { + if (Ch != 'Z') + AllZs = false; + } else { + // Invalid character, stop searching. + break; + } + } + if (Found) { + NewName[NewPos++] = OldName[OldPos++]; // 'S' + size_t Length = Last - OldPos; + // NewPos and OldPos point just past the 'S'. + assert(NewName[NewPos - 1] == 'S'); + assert(OldName[OldPos - 1] == 'S'); + assert(OldName[OldPos + Length] == '_'); + if (AllZs) { + // Replace N 'Z' characters with N+1 '0' characters. (This + // is also true for N=0, i.e. S_ ==> S0_ .) + for (size_t i = 0; i < Length + 1; ++i) { + NewName[NewPos++] = '0'; + } + } else { + // Iterate right-to-left and increment the base-36 number. + bool Carry = true; + for (size_t i = 0; i < Length; ++i) { + size_t Offset = Length - 1 - i; + char Ch = OldName[OldPos + Offset]; + if (Carry) { + Carry = false; + switch (Ch) { + case '9': + Ch = 'A'; + break; + case 'Z': + Ch = '0'; + Carry = true; + break; + default: + ++Ch; + break; + } + } + NewName[NewPos + Offset] = Ch; + } + NewPos += Length; + } + OldPos = Last; + // Fall through and let the '_' be copied across. + } + } + NewName[NewPos] = OldName[OldPos]; + } + assert(NewName[NewPos] == '\0'); + OldName = NewName; +} + // In this context, name mangling means to rewrite a symbol using a // given prefix. For a C++ symbol, nest the original symbol inside // the "prefix" namespace. For other symbols, just prepend the @@ -137,9 +223,9 @@ IceString GlobalContext::mangleName(const IceString &Name) const { return Name; unsigned PrefixLength = getTestPrefix().length(); - llvm::SmallVector NameBase(1 + Name.length()); + ManglerVector NameBase(1 + Name.length()); const size_t BufLen = 30 + Name.length() + PrefixLength; - llvm::SmallVector NewName(BufLen); + ManglerVector NewName(BufLen); uint32_t BaseLength = 0; // using uint32_t due to sscanf format string int ItemsParsed = sscanf(Name.c_str(), "_ZN%s", NameBase.data()); @@ -152,6 +238,7 @@ IceString GlobalContext::mangleName(const IceString &Name) const { // somehow miscalculated the output buffer length, the output will // be truncated, but it will be truncated consistently for all // mangleName() calls on the same input string. + incrementSubstitutions(NewName); return NewName.data(); } @@ -172,8 +259,8 @@ IceString GlobalContext::mangleName(const IceString &Name) const { // Transform _Z3barIabcExyz ==> _ZN6Prefix3barIabcEExyz // ^^^^^^^^ ^ // (splice in "N6Prefix", and insert "E" after "3barIabcE") - llvm::SmallVector OrigName(Name.length()); - llvm::SmallVector OrigSuffix(Name.length()); + ManglerVector OrigName(Name.length()); + ManglerVector OrigSuffix(Name.length()); uint32_t ActualBaseLength = BaseLength; if (NameBase[ActualBaseLength] == 'I') { ++ActualBaseLength; @@ -187,6 +274,7 @@ IceString GlobalContext::mangleName(const IceString &Name) const { snprintf(NewName.data(), BufLen, "_ZN%u%s%u%sE%s", PrefixLength, getTestPrefix().c_str(), BaseLength, OrigName.data(), OrigSuffix.data()); + incrementSubstitutions(NewName); return NewName.data(); } diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h index c46d7d450..1a3e4cd38 100644 --- a/src/IceGlobalContext.h +++ b/src/IceGlobalContext.h @@ -105,6 +105,10 @@ private: bool HasEmittedFirstMethod; GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION; GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION; + + // Private helpers for mangleName() + typedef llvm::SmallVector ManglerVector; + void incrementSubstitutions(ManglerVector &OldName) const; }; } // end of namespace Ice diff --git a/tests_lit/llvm2ice_tests/mangle.ll b/tests_lit/llvm2ice_tests/mangle.ll index c76a23d11..4271244a2 100644 --- a/tests_lit/llvm2ice_tests/mangle.ll +++ b/tests_lit/llvm2ice_tests/mangle.ll @@ -101,5 +101,28 @@ entry: } ; MANGLE: Subzero_Z-1FuncCPlusPlusi: + +; Test for substitution incrementing. This single test captures: +; S_ ==> S_ for single-digit +; S_ ==> S0_ +; String length increase, e.g. SZZZ_ ==> S0000_ +; At least one digit wrapping without length increase, e.g. SZ9ZZ_ ==> SZA00_ +; Unrelated identifiers containing S[0-9A-Z]* , e.g. MyClassS1x +; A proper substring of S_ at the end of the string +; (to test parser edge cases) + +define internal void @_Z3fooP10MyClassS1xP10MyClassS2xRS_RS1_S_S1_SZZZ_SZ9ZZ_S12345() { +; MANGLE: _ZN7Subzero3fooEP10MyClassS1xP10MyClassS2xRS0_RS2_S0_S2_S0000_SZA00_S12345: +entry: + ret void +} + +; Test that unmangled (non-C++) strings don't have substitutions updated. +define internal void @foo_S_S0_SZ_S() { +; MANGLE: Subzerofoo_S_S0_SZ_S: +entry: + ret void +} + ; ERRORS-NOT: ICE translation error ; DUMP-NOT: SZ -- 2.11.0