From 10c84a8f35cae4a9fc421648d9608fccda3925f2 Mon Sep 17 00:00:00 2001 From: Eugene Leviant Date: Sat, 10 Nov 2018 08:31:21 +0000 Subject: [PATCH] [ThinLTO] Internalize readonly globals This patch allows internalising globals if all accesses to them (from live functions) are from non-volatile load instructions Differential revision: https://reviews.llvm.org/D49362 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346584 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/ModuleSummaryIndex.h | 50 +++++++--- include/llvm/Transforms/IPO/FunctionImport.h | 8 ++ .../llvm/Transforms/Utils/FunctionImportUtils.h | 1 - lib/Analysis/ModuleSummaryAnalysis.cpp | 75 +++++++++++---- lib/AsmParser/LLParser.cpp | 3 +- lib/Bitcode/Reader/BitcodeReader.cpp | 51 ++++++++-- lib/Bitcode/Writer/BitcodeWriter.cpp | 22 ++++- lib/IR/ModuleSummaryIndex.cpp | 107 +++++++++++++++++++-- lib/LTO/LTO.cpp | 5 +- lib/LTO/ThinLTOCodeGenerator.cpp | 3 +- lib/Linker/IRMover.cpp | 5 - lib/Transforms/IPO/FunctionImport.cpp | 45 ++++++++- lib/Transforms/Utils/FunctionImportUtils.cpp | 21 +++- test/Bitcode/summary_version.ll | 2 +- test/Bitcode/thinlto-alias.ll | 4 +- test/Bitcode/thinlto-alias2.ll | 2 +- .../thinlto-function-summary-callgraph-cast.ll | 4 +- .../thinlto-function-summary-callgraph-pgo.ll | 4 +- ...o-function-summary-callgraph-profile-summary.ll | 4 +- .../thinlto-function-summary-callgraph-relbf.ll | 2 +- ...ion-summary-callgraph-sample-profile-summary.ll | 4 +- test/Bitcode/thinlto-function-summary-callgraph.ll | 4 +- test/Bitcode/thinlto-function-summary-refgraph.ll | 12 +-- test/ThinLTO/X86/Inputs/index-const-prop-alias.ll | 5 + test/ThinLTO/X86/Inputs/index-const-prop-comdat.ll | 5 + .../X86/Inputs/index-const-prop-define-g.ll | 4 + .../X86/Inputs/index-const-prop-full-lto.ll | 12 +++ test/ThinLTO/X86/Inputs/index-const-prop-gvref.ll | 5 + .../ThinLTO/X86/Inputs/index-const-prop-linkage.ll | 15 +++ test/ThinLTO/X86/Inputs/index-const-prop.ll | 64 ++++++++++++ test/ThinLTO/X86/dot-dumper.ll | 10 +- test/ThinLTO/X86/globals-import-const-fold.ll | 4 +- test/ThinLTO/X86/index-const-prop-O0.ll | 18 ++++ test/ThinLTO/X86/index-const-prop-alias.ll | 42 ++++++++ test/ThinLTO/X86/index-const-prop-comdat.ll | 17 ++++ test/ThinLTO/X86/index-const-prop-dead.ll | 26 +++++ test/ThinLTO/X86/index-const-prop-full-lto.ll | 24 +++++ test/ThinLTO/X86/index-const-prop-gvref.ll | 27 ++++++ test/ThinLTO/X86/index-const-prop-ldst.ll | 21 ++++ test/ThinLTO/X86/index-const-prop-linkage.ll | 27 ++++++ test/ThinLTO/X86/index-const-prop.ll | 40 ++++++++ test/ThinLTO/X86/index-const-prop2.ll | 59 ++++++++++++ 42 files changed, 774 insertions(+), 89 deletions(-) create mode 100644 test/ThinLTO/X86/Inputs/index-const-prop-alias.ll create mode 100644 test/ThinLTO/X86/Inputs/index-const-prop-comdat.ll create mode 100644 test/ThinLTO/X86/Inputs/index-const-prop-define-g.ll create mode 100644 test/ThinLTO/X86/Inputs/index-const-prop-full-lto.ll create mode 100644 test/ThinLTO/X86/Inputs/index-const-prop-gvref.ll create mode 100644 test/ThinLTO/X86/Inputs/index-const-prop-linkage.ll create mode 100644 test/ThinLTO/X86/Inputs/index-const-prop.ll create mode 100644 test/ThinLTO/X86/index-const-prop-O0.ll create mode 100644 test/ThinLTO/X86/index-const-prop-alias.ll create mode 100644 test/ThinLTO/X86/index-const-prop-comdat.ll create mode 100644 test/ThinLTO/X86/index-const-prop-dead.ll create mode 100644 test/ThinLTO/X86/index-const-prop-full-lto.ll create mode 100644 test/ThinLTO/X86/index-const-prop-gvref.ll create mode 100644 test/ThinLTO/X86/index-const-prop-ldst.ll create mode 100644 test/ThinLTO/X86/index-const-prop-linkage.ll create mode 100644 test/ThinLTO/X86/index-const-prop.ll create mode 100644 test/ThinLTO/X86/index-const-prop2.ll diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 9a456acf966..7e10fa1aa02 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -163,13 +163,13 @@ using GlobalValueSummaryMapTy = /// Struct that holds a reference to a particular GUID in a global value /// summary. struct ValueInfo { - PointerIntPair - RefAndFlag; + PointerIntPair + RefAndFlags; ValueInfo() = default; ValueInfo(bool HaveGVs, const GlobalValueSummaryMapTy::value_type *R) { - RefAndFlag.setPointer(R); - RefAndFlag.setInt(HaveGVs); + RefAndFlags.setPointer(R); + RefAndFlags.setInt(HaveGVs); } operator bool() const { return getRef(); } @@ -189,10 +189,12 @@ struct ValueInfo { : getRef()->second.U.Name; } - bool haveGVs() const { return RefAndFlag.getInt(); } + bool haveGVs() const { return RefAndFlags.getInt() & 0x1; } + bool isReadOnly() const { return RefAndFlags.getInt() & 0x2; } + void setReadOnly() { RefAndFlags.setInt(RefAndFlags.getInt() | 0x2); } const GlobalValueSummaryMapTy::value_type *getRef() const { - return RefAndFlag.getPointer(); + return RefAndFlags.getPointer(); } bool isDSOLocal() const; @@ -543,6 +545,8 @@ public: std::move(TypeTestAssumeConstVCalls), std::move(TypeCheckedLoadConstVCalls)}); } + // Gets the number of immutable refs in RefEdgeList + unsigned immutableRefCount() const; /// Check if this is a function summary. static bool classof(const GlobalValueSummary *GVS) { @@ -652,19 +656,30 @@ template <> struct DenseMapInfo { /// Global variable summary information to aid decisions and /// implementation of importing. /// -/// Currently this doesn't add anything to the base \p GlobalValueSummary, -/// but is a placeholder as additional info may be added to the summary -/// for variables. +/// Global variable summary has extra flag, telling if it is +/// modified during the program run or not. This affects ThinLTO +/// internalization class GlobalVarSummary : public GlobalValueSummary { - public: - GlobalVarSummary(GVFlags Flags, std::vector Refs) - : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)) {} + struct GVarFlags { + GVarFlags(bool ReadOnly = false) : ReadOnly(ReadOnly) {} + + unsigned ReadOnly : 1; + } VarFlags; + + GlobalVarSummary(GVFlags Flags, GVarFlags VarFlags, + std::vector Refs) + : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)), + VarFlags(VarFlags) {} /// Check if this is a global variable summary. static bool classof(const GlobalValueSummary *GVS) { return GVS->getSummaryKind() == GlobalVarKind; } + + GVarFlags varflags() const { return VarFlags; } + void setReadOnly(bool RO) { VarFlags.ReadOnly = RO; } + bool isReadOnly() const { return VarFlags.ReadOnly; } }; struct TypeTestResolution { @@ -1135,6 +1150,9 @@ public: /// Print out strongly connected components for debugging. void dumpSCCs(raw_ostream &OS); + + /// Analyze index and detect unmodified globals + void propagateConstants(const DenseSet &PreservedSymbols); }; /// GraphTraits definition to build SCC for the index @@ -1184,6 +1202,14 @@ struct GraphTraits : public GraphTraits { } }; +static inline bool canImportGlobalVar(GlobalValueSummary *S) { + assert(isa(S->getBaseObject())); + + // We don't import GV with references, because it can result + // in promotion of local variables in the source module. + return !GlobalValue::isInterposableLinkage(S->linkage()) && + !S->notEligibleToImport() && S->refs().empty(); +} } // end namespace llvm #endif // LLVM_IR_MODULESUMMARYINDEX_H diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h index 113ef2e4c7b..c2103b63726 100644 --- a/include/llvm/Transforms/IPO/FunctionImport.h +++ b/include/llvm/Transforms/IPO/FunctionImport.h @@ -176,6 +176,14 @@ void computeDeadSymbols( const DenseSet &GUIDPreservedSymbols, function_ref isPrevailing); +/// Compute dead symbols and run constant propagation in combined index +/// after that. +void computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing, + bool ImportEnabled); + /// Converts value \p GV to declaration, or replaces with a declaration if /// it is an alias. Returns true if converted, false if replaced. bool convertToDeclaration(GlobalValue &GV); diff --git a/include/llvm/Transforms/Utils/FunctionImportUtils.h b/include/llvm/Transforms/Utils/FunctionImportUtils.h index b9fbef04cdc..5f8dc846d52 100644 --- a/include/llvm/Transforms/Utils/FunctionImportUtils.h +++ b/include/llvm/Transforms/Utils/FunctionImportUtils.h @@ -113,7 +113,6 @@ public: bool renameModuleForThinLTO( Module &M, const ModuleSummaryIndex &Index, SetVector *GlobalsToImport = nullptr); - } // End llvm namespace #endif diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 29b96ac746b..af2faa0dc6f 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -220,10 +220,19 @@ static void addIntrinsicToSummary( } } -static void computeFunctionSummary( - ModuleSummaryIndex &Index, const Module &M, const Function &F, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, - bool HasLocalsInUsedOrAsm, DenseSet &CantBePromoted) { +static bool isNonVolatileLoad(const Instruction *I) { + if (const auto *LI = dyn_cast(I)) + return !LI->isVolatile(); + + return false; +} + +static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, + const Function &F, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, DominatorTree &DT, + bool HasLocalsInUsedOrAsm, + DenseSet &CantBePromoted, + bool IsThinLTO) { // Summary not currently supported for anonymous functions, they should // have been named. assert(F.hasName()); @@ -244,6 +253,7 @@ static void computeFunctionSummary( // Add personality function, prefix data and prologue data to function's ref // list. findRefEdges(Index, &F, RefEdges, Visited); + std::vector NonVolatileLoads; bool HasInlineAsmMaybeReferencingInternal = false; for (const BasicBlock &BB : F) @@ -251,6 +261,13 @@ static void computeFunctionSummary( if (isa(I)) continue; ++NumInsts; + if (isNonVolatileLoad(&I)) { + // Postpone processing of non-volatile load instructions + // See comments below + Visited.insert(&I); + NonVolatileLoads.push_back(&I); + continue; + } findRefEdges(Index, &I, RefEdges, Visited); auto CS = ImmutableCallSite(&I); if (!CS) @@ -340,6 +357,24 @@ static void computeFunctionSummary( } } + // By now we processed all instructions in a function, except + // non-volatile loads. All new refs we add in a loop below + // are obviously constant. All constant refs are grouped in the + // end of RefEdges vector, so we can use a single integer value + // to identify them. + unsigned RefCnt = RefEdges.size(); + for (const Instruction *I : NonVolatileLoads) { + Visited.erase(I); + findRefEdges(Index, I, RefEdges, Visited); + } + std::vector Refs = RefEdges.takeVector(); + // Regular LTO module doesn't participate in ThinLTO import, + // so no reference from it can be readonly, since this would + // require importing variable as local copy + if (IsThinLTO) + for (; RefCnt < Refs.size(); ++RefCnt) + Refs[RefCnt].setReadOnly(); + // Explicit add hot edges to enforce importing for designated GUIDs for // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) @@ -363,9 +398,9 @@ static void computeFunctionSummary( // Don't try to import functions with noinline attribute. F.getAttributes().hasFnAttribute(Attribute::NoInline)}; auto FuncSummary = llvm::make_unique( - Flags, NumInsts, FunFlags, RefEdges.takeVector(), - CallGraphEdges.takeVector(), TypeTests.takeVector(), - TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), + Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.takeVector(), + TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), + TypeCheckedLoadVCalls.takeVector(), TypeTestAssumeConstVCalls.takeVector(), TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) @@ -382,8 +417,13 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, /* Live = */ false, V.isDSOLocal()); - auto GVarSummary = - llvm::make_unique(Flags, RefEdges.takeVector()); + + // Don't mark variables we won't be able to internalize as read-only. + GlobalVarSummary::GVarFlags VarFlags( + !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && + !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass()); + auto GVarSummary = llvm::make_unique(Flags, VarFlags, + RefEdges.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(V.getGUID()); if (HasBlockAddress) @@ -487,13 +527,19 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr Summary = - llvm::make_unique(GVFlags, - ArrayRef{}); + llvm::make_unique( + GVFlags, GlobalVarSummary::GVarFlags(), + ArrayRef{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); } }); } + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + // Compute summaries for all functions defined in module, and save in the // index. for (auto &F : M) { @@ -514,7 +560,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( computeFunctionSummary(Index, M, F, BFI, PSI, DT, !LocalsUsed.empty() || HasLocalInlineAsmSymbol, - CantBePromoted); + CantBePromoted, IsThinLTO); } // Compute summaries for all variables defined in module, and save in the @@ -545,11 +591,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( setLiveRoot(Index, "llvm.global_dtors"); setLiveRoot(Index, "llvm.global.annotations"); - bool IsThinLTO = true; - if (auto *MD = - mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) - IsThinLTO = MD->getZExtValue(); - for (auto &GlobalList : Index) { // Ignore entries for references that are undefined in the current module. if (GlobalList.second.SummaryList.empty()) diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 5fe1e125d48..57f73244acb 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -7642,7 +7642,8 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID, if (ParseToken(lltok::rparen, "expected ')' here")) return true; - auto GS = llvm::make_unique(GVFlags, std::move(Refs)); + auto GS = llvm::make_unique( + GVFlags, GlobalVarSummary::GVarFlags(), std::move(Refs)); GS->setModulePath(ModulePath); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 56e05f8f085..41acb9f3234 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -898,6 +898,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local); } +// Decode the flags for GlobalVariable in the summary +static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) { + return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false); +} + static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { switch (Val) { default: // Map unknown visibilities to default. @@ -5170,6 +5175,12 @@ static void parseTypeIdSummaryRecord(ArrayRef Record, parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId); } +static void setImmutableRefs(std::vector &Refs, unsigned Count) { + // Read-only refs are in the end of the refs list. + for (unsigned RefNo = Refs.size() - Count; RefNo < Refs.size(); ++RefNo) + Refs[RefNo].setReadOnly(); +} + // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { @@ -5187,9 +5198,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { } const uint64_t Version = Record[0]; const bool IsOldProfileFormat = Version == 1; - if (Version < 1 || Version > 4) + if (Version < 1 || Version > 5) return error("Invalid summary version " + Twine(Version) + - ", 1, 2, 3 or 4 expected"); + ", 1, 2, 3, 4 or 5 expected"); Record.clear(); // Keep around the last seen summary to be used when we see an optional @@ -5268,11 +5279,16 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { unsigned InstCount = Record[2]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[3]; + unsigned NumImmutableRefs = 0; int RefListStartIndex = 4; if (Version >= 4) { RawFunFlags = Record[3]; NumRefs = Record[4]; RefListStartIndex = 5; + if (Version >= 5) { + NumImmutableRefs = Record[5]; + RefListStartIndex = 6; + } } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5291,6 +5307,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, HasRelBF); + setImmutableRefs(Refs, NumImmutableRefs); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Calls), std::move(PendingTypeTests), @@ -5339,14 +5356,21 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { TheIndex.addGlobalValueSummary(GUID.first, std::move(AS)); break; } - // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid] + // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags, n x valueid] case bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS: { unsigned ValueID = Record[0]; uint64_t RawFlags = Record[1]; + unsigned RefArrayStart = 2; + GlobalVarSummary::GVarFlags GVF; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); + if (Version >= 5) { + GVF = getDecodedGVarFlags(Record[2]); + RefArrayStart = 3; + } std::vector Refs = - makeRefList(ArrayRef(Record).slice(2)); - auto FS = llvm::make_unique(Flags, std::move(Refs)); + makeRefList(ArrayRef(Record).slice(RefArrayStart)); + auto FS = + llvm::make_unique(Flags, GVF, std::move(Refs)); FS->setModulePath(getThisModule()->first()); auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); @@ -5365,12 +5389,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { unsigned InstCount = Record[3]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[4]; + unsigned NumImmutableRefs = 0; int RefListStartIndex = 5; if (Version >= 4) { RawFunFlags = Record[4]; NumRefs = Record[5]; RefListStartIndex = 6; + if (Version >= 5) { + NumImmutableRefs = Record[6]; + RefListStartIndex = 7; + } } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5384,6 +5413,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; + setImmutableRefs(Refs, NumImmutableRefs); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Edges), std::move(PendingTypeTests), @@ -5432,10 +5462,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { unsigned ValueID = Record[0]; uint64_t ModuleId = Record[1]; uint64_t RawFlags = Record[2]; + unsigned RefArrayStart = 3; + GlobalVarSummary::GVarFlags GVF; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); + if (Version >= 5) { + GVF = getDecodedGVarFlags(Record[3]); + RefArrayStart = 4; + } std::vector Refs = - makeRefList(ArrayRef(Record).slice(3)); - auto FS = llvm::make_unique(Flags, std::move(Refs)); + makeRefList(ArrayRef(Record).slice(RefArrayStart)); + auto FS = + llvm::make_unique(Flags, GVF, std::move(Refs)); LastSeenSummary = FS.get(); FS->setModulePath(ModuleIdMap[ModuleId]); ValueInfo VI = getValueInfoFromValueId(ValueID).first; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index f4634c9d3f4..89a20dd1c67 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -991,6 +991,11 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) { return RawFlags; } +static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) { + uint64_t RawFlags = Flags.ReadOnly; + return RawFlags; +} + static unsigned getEncodedVisibility(const GlobalValue &GV) { switch (GV.getVisibility()) { case GlobalValue::DefaultVisibility: return 0; @@ -3489,6 +3494,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); NameVals.push_back(FS->refs().size()); + NameVals.push_back(FS->immutableRefCount()); for (auto &RI : FS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); @@ -3530,6 +3536,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences( NameVals.push_back(VE.getValueID(&V)); GlobalVarSummary *VS = cast(Summary); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); + NameVals.push_back(getEncodedGVarFlags(VS->varflags())); unsigned SizeBeforeRefs = NameVals.size(); for (auto &RI : VS->refs()) @@ -3546,7 +3553,7 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences( // Current version for the summary. // This is bumped whenever we introduce changes in the way some record are // interpreted, like flags for instance. -static const uint64_t INDEX_VERSION = 4; +static const uint64_t INDEX_VERSION = 5; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. @@ -3581,6 +3588,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3597,6 +3605,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid [, rel_block_freq]) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3691,6 +3700,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3705,6 +3715,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3777,6 +3788,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(VS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); + NameVals.push_back(getEncodedGVarFlags(VS->varflags())); for (auto &RI : VS->refs()) { auto RefValueId = getValueId(RI.getGUID()); if (!RefValueId) @@ -3802,17 +3814,21 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); // Fill in below - NameVals.push_back(0); + NameVals.push_back(0); // numrefs + NameVals.push_back(0); // immutablerefcnt - unsigned Count = 0; + unsigned Count = 0, ImmutableRefCnt = 0; for (auto &RI : FS->refs()) { auto RefValueId = getValueId(RI.getGUID()); if (!RefValueId) continue; NameVals.push_back(*RefValueId); + if (RI.isReadOnly()) + ImmutableRefCnt++; Count++; } NameVals[5] = Count; + NameVals[6] = ImmutableRefCnt; bool HasProfileData = false; for (auto &EI : FS->calls()) { diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp index 8d85f7901b0..6c0d3973715 100644 --- a/lib/IR/ModuleSummaryIndex.cpp +++ b/lib/IR/ModuleSummaryIndex.cpp @@ -30,6 +30,17 @@ bool ValueInfo::isDSOLocal() const { }); } +// Gets the number of immutable refs in RefEdgeList +unsigned FunctionSummary::immutableRefCount() const { + // Here we take advantage of having all readonly references + // located in the end of the RefEdgeList. + auto Refs = refs(); + unsigned ImmutableRefCnt = 0; + for (int I = Refs.size() - 1; I >= 0 && Refs[I].isReadOnly(); --I) + ImmutableRefCnt++; + return ImmutableRefCnt; +} + // Collect for the given module the list of function it defines // (GUID -> Summary). void ModuleSummaryIndex::collectDefinedFunctionsForModule( @@ -84,6 +95,73 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const { return false; } +static void propagateConstantsToRefs(GlobalValueSummary *S) { + // If reference is not readonly then referenced summary is not + // readonly either. Note that: + // - All references from GlobalVarSummary are conservatively considered as + // not readonly. Tracking them properly requires more complex analysis + // then we have now. + // + // - AliasSummary objects have no refs at all so this function is a no-op + // for them. + for (auto &VI : S->refs()) { + if (VI.isReadOnly()) { + // We only mark refs as readonly when computing function summaries on + // analysis phase. + assert(isa(S)); + continue; + } + for (auto &Ref : VI.getSummaryList()) + // If references to alias is not readonly then aliasee is not readonly + if (auto *GVS = dyn_cast(Ref->getBaseObject())) + GVS->setReadOnly(false); + } +} + +// Do the constant propagation in combined index. +// The goal of constant propagation is internalization of readonly +// variables. To determine which variables are readonly and which +// are not we take following steps: +// - During analysis we speculatively assign readonly attribute to +// all variables which can be internalized. When computing function +// summary we also assign readonly attribute to a reference if +// function doesn't modify referenced variable. +// +// - After computing dead symbols in combined index we do the constant +// propagation. During this step we clear readonly attribute from +// all variables which: +// a. are dead, preserved or can't be imported +// b. referenced by any global variable initializer +// c. referenced by a function and reference is not readonly +// +// Internalization itself happens in the backend after import is finished +// See internalizeImmutableGVs. +void ModuleSummaryIndex::propagateConstants( + const DenseSet &GUIDPreservedSymbols) { + for (auto &P : *this) + for (auto &S : P.second.SummaryList) { + if (!isGlobalValueLive(S.get())) + // We don't examine references from dead objects + continue; + + // Global variable can't be marked read only if it is not eligible + // to import since we need to ensure that all external references + // get a local (imported) copy. It also can't be marked read only + // if it or any alias (since alias points to the same memory) are + // preserved or notEligibleToImport, since either of those means + // there could be writes that are not visible (because preserved + // means it could have external to DSO writes, and notEligibleToImport + // means it could have writes via inline assembly leading it to be + // in the @llvm.*used). + if (auto *GVS = dyn_cast(S->getBaseObject())) + // Here we intentionally pass S.get() not GVS, because S could be + // an alias. + if (!canImportGlobalVar(S.get()) || GUIDPreservedSymbols.count(P.first)) + GVS->setReadOnly(false); + propagateConstantsToRefs(S.get()); + } +} + // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot) // then delete this function and update its tests LLVM_DUMP_METHOD @@ -108,6 +186,7 @@ namespace { struct Attributes { void add(const Twine &Name, const Twine &Value, const Twine &Comment = Twine()); + void addComment(const Twine &Comment); std::string getAsString() const; std::vector Attrs; @@ -129,6 +208,10 @@ void Attributes::add(const Twine &Name, const Twine &Value, A += Value.str(); A += "\""; Attrs.push_back(A); + addComment(Comment); +} + +void Attributes::addComment(const Twine &Comment) { if (!Comment.isTriviallyEmpty()) { if (Comments.empty()) Comments = " // "; @@ -237,6 +320,12 @@ static void defineExternalNode(raw_ostream &OS, const char *Pfx, OS << "\"]; // defined externally\n"; } +static bool hasReadOnlyFlag(const GlobalValueSummary *S) { + if (auto *GVS = dyn_cast(S)) + return GVS->isReadOnly(); + return false; +} + void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const { std::vector CrossModuleEdges; DenseMap> NodeMap; @@ -252,13 +341,17 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const { }; auto DrawEdge = [&](const char *Pfx, uint64_t SrcMod, GlobalValue::GUID SrcId, - uint64_t DstMod, GlobalValue::GUID DstId, int TypeOrHotness) { - // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown - // hotness, ... - TypeOrHotness += 2; + uint64_t DstMod, GlobalValue::GUID DstId, + int TypeOrHotness) { + // 0 - alias + // 1 - reference + // 2 - constant reference + // Other value: (hotness - 3). + TypeOrHotness += 3; static const char *EdgeAttrs[] = { " [style=dotted]; // alias", " [style=dashed]; // ref", + " [style=dashed,color=forestgreen]; // const-ref", " // call (hotness : Unknown)", " [color=blue]; // call (hotness : Cold)", " // call (hotness : None)", @@ -301,6 +394,8 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const { A.add("shape", "box"); } else { A.add("shape", "Mrecord", "variable"); + if (Flags.Live && hasReadOnlyFlag(SummaryIt.second)) + A.addComment("immutable"); } auto VI = getValueInfo(SummaryIt.first); @@ -318,7 +413,7 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const { for (auto &SummaryIt : GVSMap) { auto *GVS = SummaryIt.second; for (auto &R : GVS->refs()) - Draw(SummaryIt.first, R.getGUID(), -1); + Draw(SummaryIt.first, R.getGUID(), R.isReadOnly() ? -1 : -2); if (auto *AS = dyn_cast_or_null(SummaryIt.second)) { GlobalValue::GUID AliaseeId; @@ -331,7 +426,7 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const { AliaseeId = AliaseeOrigId; } - Draw(SummaryIt.first, AliaseeId, -2); + Draw(SummaryIt.first, AliaseeId, -3); continue; } diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 6b11f690ef3..68fd93a5d9c 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -187,6 +187,8 @@ static void computeCacheKey( AddUnsigned(VI.isDSOLocal()); AddUsedCfiGlobal(VI.getGUID()); } + if (auto *GVS = dyn_cast(GS)) + AddUnsigned(GVS->isReadOnly()); if (auto *FS = dyn_cast(GS)) { for (auto &TT : FS->type_tests()) UsedTypeIds.insert(TT); @@ -809,7 +811,8 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { return PrevailingType::Unknown; return It->second; }; - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols, + isPrevailing, Conf.OptLevel > 0); // Setup output file to emit statistics. std::unique_ptr StatsFile = nullptr; diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index 8017527bf22..b817c462d4e 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -646,7 +646,8 @@ static void computeDeadSymbolsInIndex( auto isPrevailing = [&](GlobalValue::GUID G) { return PrevailingType::Unknown; }; - computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing, + /* ImportEnabled = */ true); } /** diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index b304bfc401a..72e20ae0ba1 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -1062,11 +1062,6 @@ void IRLinker::prepareCompileUnitsForImport() { ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr); ValueMap.MD()[CU->getRawMacros()].reset(nullptr); ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr); - // We import global variables only temporarily in order for instcombine - // and globalopt to perform constant folding and static constructor - // evaluation. After that elim-avail-extern will covert imported globals - // back to declarations, so we don't need debug info for them. - ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr); // Imported entities only need to be mapped in if they have local // scope, as those might correspond to an imported entity inside a diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 1196dd0099b..60ca8e32897 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -294,10 +294,8 @@ static void computeImportForReferencedGlobals( LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n"); for (auto &RefSummary : VI.getSummaryList()) - if (RefSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind && - !RefSummary->notEligibleToImport() && - !GlobalValue::isInterposableLinkage(RefSummary->linkage()) && - RefSummary->refs().empty()) { + if (isa(RefSummary.get()) && + canImportGlobalVar(RefSummary.get())) { auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID()); // Only update stat if we haven't already imported this variable. if (ILI.second) @@ -824,6 +822,25 @@ void llvm::computeDeadSymbols( NumLiveSymbols += LiveSymbols; } +// Compute dead symbols and propagate constants in combined index. +void llvm::computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing, + bool ImportEnabled) { + computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + if (ImportEnabled) { + Index.propagateConstants(GUIDPreservedSymbols); + } else { + // If import is disabled we should drop read-only attribute + // from all summaries to prevent internalization. + for (auto &P : Index) + for (auto &S : P.second.SummaryList) + if (auto *GVS = dyn_cast(S.get())) + GVS->setReadOnly(false); + } +} + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. void llvm::gatherImportedSummariesForModule( @@ -1020,6 +1037,22 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) { return NewFn; } +// Internalize values that we marked with specific attribute +// in processGlobalForThinLTO. +static void internalizeImmutableGVs(Module &M) { + for (auto &GV : M.globals()) { + // Skip GVs which have been converted to declarations + // by dropDeadSymbols. + if (GV.isDeclaration()) + continue; + if (auto *GVar = dyn_cast(&GV)) + if (GVar->hasAttribute("thinlto-internalize")) { + GVar->setLinkage(GlobalValue::InternalLinkage); + GVar->setVisibility(GlobalValue::DefaultVisibility); + } + } +} + // Automatically import functions in Module \p DestModule based on the summaries // index. Expected FunctionImporter::importFunctions( @@ -1143,6 +1176,8 @@ Expected FunctionImporter::importFunctions( NumImportedModules++; } + internalizeImmutableGVs(DestModule); + NumImportedFunctions += (ImportedCount - ImportedGVCount); NumImportedGlobalVars += ImportedGVCount; @@ -1159,7 +1194,7 @@ static bool doImportingForModule(Module &M) { if (SummaryFile.empty()) report_fatal_error("error: -function-import requires -summary-file\n"); Expected> IndexPtrOrErr = - getModuleSummaryIndexForFile(SummaryFile); + getModuleSummaryIndexForFile(SummaryFile); if (!IndexPtrOrErr) { logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(), "Error loading file '" + SummaryFile + "': "); diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index 479816a339d..89e74da4d99 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -204,8 +204,9 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { // Check the summaries to see if the symbol gets resolved to a known local // definition. + ValueInfo VI; if (GV.hasName()) { - ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID()); + VI = ImportIndex.getValueInfo(GV.getGUID()); if (VI && VI.isDSOLocal()) { GV.setDSOLocal(true); if (GV.hasDLLImportStorageClass()) @@ -213,6 +214,22 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { } } + // Mark read-only variables which can be imported with specific attribute. + // We can't internalize them now because IRMover will fail to link variable + // definitions to their external declarations during ThinLTO import. We'll + // internalize read-only variables later, after import is finished. + // See internalizeImmutableGVs. + // + // If global value dead stripping is not enabled in summary then + // propagateConstants hasn't been run (may be because we're using + // distriuted import. We can't internalize GV in such case. + if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) { + const auto &SL = VI.getSummaryList(); + auto *GVS = SL.empty() ? nullptr : dyn_cast(SL[0].get()); + if (GVS && GVS->isReadOnly()) + cast(&GV)->addAttribute("thinlto-internalize"); + } + bool DoPromote = false; if (GV.hasLocalLinkage() && ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { @@ -230,7 +247,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { // Remove functions imported as available externally defs from comdats, // as this is a declaration for the linker, and will be dropped eventually. // It is illegal for comdats to contain declarations. - auto *GO = dyn_cast_or_null(&GV); + auto *GO = dyn_cast(&GV); if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { // The IRMover should not have placed any imported declarations in // a comdat, so the only declaration that should be in a comdat diff --git a/test/Bitcode/summary_version.ll b/test/Bitcode/summary_version.ll index b285da7a6f4..4913a1ec066 100644 --- a/test/Bitcode/summary_version.ll +++ b/test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: diff --git a/test/Bitcode/thinlto-alias.ll b/test/Bitcode/thinlto-alias.ll index 05de932faee..6369a6c7e7f 100644 --- a/test/Bitcode/thinlto-alias.ll +++ b/test/Bitcode/thinlto-alias.ll @@ -20,7 +20,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: ; COMBINED-NEXT: -; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: diff --git a/test/Bitcode/thinlto-function-summary-callgraph-cast.ll b/test/Bitcode/thinlto-function-summary-callgraph-cast.ll index 45801c9a74d..79644403d38 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-cast.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-cast.ll @@ -6,9 +6,9 @@ ; CHECK: +; CHECK-NEXT: ; "another_caller" has only references but no calls. -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: diff --git a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll index bb3e8e97835..d7cf4d7deb2 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll @@ -17,7 +17,7 @@ ; CHECK: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' diff --git a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll index fc3c5c90ab9..f749489629c 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -48,7 +48,7 @@ ; CHECK-NEXT: ; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED_NEXT: diff --git a/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll b/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll index d84517865a8..04f28c95730 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -13,7 +13,7 @@ ; CHECK: ; CHECK: ; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123 -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED_NEXT: diff --git a/test/Bitcode/thinlto-function-summary-callgraph.ll b/test/Bitcode/thinlto-function-summary-callgraph.ll index 8025eee5929..ca6ed433f7b 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' diff --git a/test/Bitcode/thinlto-function-summary-refgraph.ll b/test/Bitcode/thinlto-function-summary-refgraph.ll index 848598fa686..419becae062 100644 --- a/test/Bitcode/thinlto-function-summary-refgraph.ll +++ b/test/Bitcode/thinlto-function-summary-refgraph.ll @@ -41,27 +41,27 @@ ; CHECK: +; CHECK-DAG: ; Function W contains a call to func3 as well as a reference to globalvar: ; op0=W op4=globalvar op5=func3 -; CHECK-DAG: +; CHECK-DAG: ; Function X contains call to foo, as well as address reference to foo ; which is in the same instruction as the call: ; op0=X op4=foo op5=foo -; CHECK-DAG: +; CHECK-DAG: ; Function Y contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while earlier analyzing the phi using its ; return value: ; op0=Y op4=func2 -; CHECK-DAG: +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: ; op0=Z op4=func2 -; CHECK-DAG: +; CHECK-DAG: ; Variable bar initialization contains address reference to func: ; op0=bar op2=func -; CHECK-DAG: +; CHECK-DAG: ; CHECK: ; CHECK: M1_{{[0-9]+}} // call -; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // ref +; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // const-ref ; STRUCTURE-NEXT: } ; CLUSTER0: // Module: {{.*}}1.bc @@ -33,13 +33,13 @@ ; CLUSTER1: // Module: {{.*}}2.bc ; CLUSTER1-NEXT: subgraph cluster_1 { -; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[FOO:[0-9]+]] [{{.*}}foo|extern{{.*}} ffl: 00001{{.*}}]; // function -; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[BAR:[0-9]+]] [{{.*}}bar|extern{{.*}}]; // function, dead ; CLUSTER1-NEXT: // Edges: -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // ref -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // const-ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // const-ref ; CLUSTER1-DAG: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/ThinLTO/X86/globals-import-const-fold.ll b/test/ThinLTO/X86/globals-import-const-fold.ll index 49e31b79a47..a250ed2e92b 100644 --- a/test/ThinLTO/X86/globals-import-const-fold.ll +++ b/test/ThinLTO/X86/globals-import-const-fold.ll @@ -2,12 +2,12 @@ ; RUN: opt -module-summary %p/Inputs/globals-import-cf-baz.ll -o %t2.bc ; RUN: llvm-lto -thinlto-action=thinlink %t1.bc %t2.bc -o %t3.index.bc -; RUN: llvm-lto -thinlto-action=import %t1.bc %t2.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=import -exported-symbol=main %t1.bc -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t1.bc.thinlto.imported.bc -o - | FileCheck --check-prefix=IMPORT %s ; RUN: llvm-lto -thinlto-action=optimize %t1.bc.thinlto.imported.bc -o %t1.bc.thinlto.opt.bc ; RUN: llvm-dis %t1.bc.thinlto.opt.bc -o - | FileCheck --check-prefix=OPTIMIZE %s -; IMPORT: @baz = available_externally local_unnamed_addr constant i32 10 +; IMPORT: @baz = internal local_unnamed_addr constant i32 10 ; OPTIMIZE: define i32 @main() ; OPTIMIZE-NEXT: ret i32 10 diff --git a/test/ThinLTO/X86/index-const-prop-O0.ll b/test/ThinLTO/X86/index-const-prop-O0.ll new file mode 100644 index 00000000000..6426d726843 --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-O0.ll @@ -0,0 +1,18 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: llvm-lto2 run -O0 -save-temps %t2.bc -r=%t2.bc,g,pl %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s + +; With -O0 import is disabled so we must not internalize +; read-only globals +; CHECK: @g = dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + ret i32 %v +} diff --git a/test/ThinLTO/X86/index-const-prop-alias.ll b/test/ThinLTO/X86/index-const-prop-alias.ll new file mode 100644 index 00000000000..592fc9e081a --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-alias.ll @@ -0,0 +1,42 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-alias.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,ret_ptr,pl -r=%t1.bc,g.alias,l -r=%t1.bc,g,l \ +; RUN: %t2.bc -r=%t2.bc,g,pl -r=%t2.bc,g.alias,pl -save-temps -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t3.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN + +; When ret_ptr is preserved we return pointer to alias, so we can't internalize aliasee +; RUN: llvm-lto2 run %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,ret_ptr,plx -r=%t1.bc,g.alias,l -r=%t1.bc,g,l \ +; RUN: %t2.bc -r=%t2.bc,g,pl -r=%t2.bc,g.alias,pl -save-temps -o %t4 +; RUN: llvm-dis %t4.1.3.import.bc -o - | FileCheck %s --check-prefix=PRESERVED + +; When g.alias is preserved we can't internalize aliasee either +; RUN: llvm-lto2 run %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,ret_ptr,pl -r=%t1.bc,g.alias,l -r=%t1.bc,g,l \ +; RUN: %t2.bc -r=%t2.bc,g,pl -r=%t2.bc,g.alias,plx -save-temps -o %t5 +; RUN: llvm-dis %t5.1.3.import.bc -o - | FileCheck %s --check-prefix=PRESERVED + +; We currently don't support importing aliases +; IMPORT: @g.alias = external dso_local global i32 +; IMPORT-NEXT: @g = internal global i32 42, align 4 #0 +; IMPORT: attributes #0 = { "thinlto-internalize" } + +; CODEGEN: define dso_local i32 @main +; CODEGEN-NEXT: ret i32 42 + +; PRESERVED: @g.alias = external dso_local global i32 +; PRESERVED-NEXT: @g = available_externally dso_local global i32 42, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g.alias = external global i32 +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + ret i32 %v +} + +define i32* @ret_ptr() { + ret i32* @g.alias +} diff --git a/test/ThinLTO/X86/index-const-prop-comdat.ll b/test/ThinLTO/X86/index-const-prop-comdat.ll new file mode 100644 index 00000000000..e05aaf98d7f --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-comdat.ll @@ -0,0 +1,17 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-comdat.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; Comdats are not internalized even if they are read only. +; CHECK: @g = available_externally dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + ret i32 %v +} diff --git a/test/ThinLTO/X86/index-const-prop-dead.ll b/test/ThinLTO/X86/index-const-prop-dead.ll new file mode 100644 index 00000000000..f3ffca8cbdf --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-dead.ll @@ -0,0 +1,26 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl \ +; RUN: %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,foo,pl -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; Dead globals are converted to declarations by ThinLTO in dropDeadSymbols +; If we try to internalize such we'll get a broken module. +; CHECK: @g = external dso_local global i32 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +; We need at least one live symbol to enable dead stripping +; Otherwise ModuleSummaryIndex::isGlobalValueLive will always +; return true. +define i32 @main() { + ret i32 42 +} + +define i32 @foo() { + %v = load i32, i32* @g + ret i32 %v +} diff --git a/test/ThinLTO/X86/index-const-prop-full-lto.ll b/test/ThinLTO/X86/index-const-prop-full-lto.ll new file mode 100644 index 00000000000..26fe4d644b7 --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-full-lto.ll @@ -0,0 +1,24 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-full-lto.ll -o %t3.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl \ +; RUN: %t1.bc -r=%t1.bc,foo,l -r=%t1.bc,main,plx -r=%t1.bc,g, \ +; RUN: %t3.bc -r=%t3.bc,foo,pl -r=%t3.bc,g, -o %t4 +; RUN: llvm-dis %t4.2.3.import.bc -o - | FileCheck %s + +; All references from functions in full LTO module are not constant. +; We cannot internalize @g +; CHECK: @g = available_externally dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @foo() +@g = external global i32 + +define i32 @main() { + %v = call i32 @foo() + %v2 = load i32, i32* @g + %v3 = add i32 %v, %v2 + ret i32 %v3 +} diff --git a/test/ThinLTO/X86/index-const-prop-gvref.ll b/test/ThinLTO/X86/index-const-prop-gvref.ll new file mode 100644 index 00000000000..87103fee141 --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-gvref.ll @@ -0,0 +1,27 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-gvref.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,b,pl -r=%t2.bc,a,pl \ +; RUN: %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,a, -r=%t1.bc,b, -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=SRC +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s --check-prefix=DEST + +; No variable in the source module should have been internalized +; SRC: @b = dso_local global i32* @a +; SRC-NEXT: @a = dso_local global i32 42 + +; We can't internalize globals referenced by other live globals +; DEST: @b = external dso_local global i32* +; DEST-NEXT: @a = available_externally dso_local global i32 42, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = external global i32 +@b = external global i32* + +define i32 @main() { + %p = load i32*, i32** @b, align 8 + store i32 33, i32* %p, align 4 + %v = load i32, i32* @a, align 4 + ret i32 %v +} diff --git a/test/ThinLTO/X86/index-const-prop-ldst.ll b/test/ThinLTO/X86/index-const-prop-ldst.ll new file mode 100644 index 00000000000..4646557b6cf --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-ldst.ll @@ -0,0 +1,21 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; The 'store' instruction in @main should prevent internalization +; even when there is 'load' instruction before it. +; CHECK: @g = available_externally dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + %q = add i32 %v, 1 + store i32 %q, i32* @g + + ret i32 %v +} diff --git a/test/ThinLTO/X86/index-const-prop-linkage.ll b/test/ThinLTO/X86/index-const-prop-linkage.ll new file mode 100644 index 00000000000..aac91772111 --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop-linkage.ll @@ -0,0 +1,27 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-linkage.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,foo,pl -r=%t2.bc,g1,pl -r=%t2.bc,g2,pl -r=%t2.bc,g3, \ +; RUN: %t1.bc -r=%t1.bc,foo, -r=%t1.bc,main,plx -r=%t1.bc,g2, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; Check that we never internalize anything with: +; - appending linkage +; - common linkage +; - available_externally linkage +; - reference from @llvm.used +; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g2] +; CHECK-NEXT: @g1 = external dso_local global i32, align 4 +; CHECK-NEXT: @g2 = available_externally dso_local global i32 42, align 4 +; CHECK-NEXT: @g3 = available_externally global i32 42, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @foo() +@g2 = external global i32 +@llvm.used = appending global [1 x i32*] [i32* @g2] + +define i32 @main() { + %v = call i32 @foo() + ret i32 %v +} diff --git a/test/ThinLTO/X86/index-const-prop.ll b/test/ThinLTO/X86/index-const-prop.ll new file mode 100644 index 00000000000..df99d83fbb5 --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop.ll @@ -0,0 +1,40 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.index.bc %t1.bc %t2.bc +; RUN: llvm-lto -thinlto-action=import -exported-symbol=main %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported.bc +; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-lto -thinlto-action=optimize %t1.imported.bc -o - | llvm-dis - -o - | FileCheck %s --check-prefix=OPTIMIZE + +; Check that we don't internalize gBar when it is exported +; RUN: llvm-lto -thinlto-action=import -exported-symbol main -exported-symbol gBar %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported2.bc +; RUN: llvm-dis %t1.imported2.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4, !dbg !0 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4, !dbg !5 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; OPTIMIZE: define i32 @main +; OPTIMIZE-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally local_unnamed_addr global i32 2, align 4, !dbg !5 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@gBar = external global i32 + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr diff --git a/test/ThinLTO/X86/index-const-prop2.ll b/test/ThinLTO/X86/index-const-prop2.ll new file mode 100644 index 00000000000..d6c8ec80661 --- /dev/null +++ b/test/ThinLTO/X86/index-const-prop2.ll @@ -0,0 +1,59 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,pl \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -r=%t1.bc,gBar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t3.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN + +; Now check that we won't internalize global (gBar) if it's externally referenced +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,plx \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -r=%t1.bc,gBar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; CODEGEN: i32 @main() +; CODEGEN-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally dso_local local_unnamed_addr global i32 2, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +; We should be able to link external definition of gBar to its declaration +@gBar = external global i32 + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr -- 2.11.0