From: Teresa Johnson Date: Thu, 26 Dec 2019 19:40:18 +0000 (-0800) Subject: [LTO/WPD] Enable aggressive WPD under LTO option X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=59733525d37cf9ad88b5021b33ecdbaf2e18911c;p=android-x86%2Fexternal-llvm-project.git [LTO/WPD] Enable aggressive WPD under LTO option Summary: Third part in series to support Safe Whole Program Devirtualization Enablement, see RFC here: http://lists.llvm.org/pipermail/llvm-dev/2019-December/137543.html This patch adds type test metadata under -fwhole-program-vtables, even for classes without hidden visibility. It then changes WPD to skip devirtualization for a virtual function call when any of the compatible vtables has public vcall visibility. Additionally, internal LLVM options as well as lld and gold-plugin options are added which enable upgrading all public vcall visibility to linkage unit (hidden) visibility during LTO. This enables the more aggressive WPD to kick in based on LTO time knowledge of the visibility guarantees. Support was added to all flavors of LTO WPD (regular, hybrid and index-only), and to both the new and old LTO APIs. Unfortunately it was not simple to split the first and second parts of this part of the change (the unconditional emission of type tests and the upgrading of the vcall visiblity) as I needed a way to upgrade the public visibility on legacy WPD llvm assembly tests that don't include linkage unit vcall visibility specifiers, to avoid a lot of test churn. I also added a mechanism to LowerTypeTests that allows dropping type test assume sequences we now aggressively insert when we invoke distributed ThinLTO backends with null indexes, which is used in testing mode, and which doesn't invoke the normal ThinLTO backend pipeline. Depends on D71907 and D71911. Reviewers: pcc, evgeny777, steven_wu, espindola Subscribers: emaste, Prazek, inglorion, arichardson, hiraditya, MaskRay, dexonsmith, dang, davidxl, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71913 --- diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f379c103b69..6eff6bd7b7c 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -51,6 +51,7 @@ #include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" @@ -553,6 +554,16 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, std::unique_ptr TLII( createTLII(TargetTriple, CodeGenOpts)); + // If we reached here with a non-empty index file name, then the index file + // was empty and we are not performing ThinLTO backend compilation (used in + // testing in a distributed build environment). Drop any the type test + // assume sequences inserted for whole program vtables so that codegen doesn't + // complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + MPM.add(createLowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); + PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts); // At O0 and O1 we only run the always inliner which is more efficient. At @@ -1114,6 +1125,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( bool IsLTO = CodeGenOpts.PrepareForLTO; if (CodeGenOpts.OptimizationLevel == 0) { + // If we reached here with a non-empty index file name, then the index + // file was empty and we are not performing ThinLTO backend compilation + // (used in testing in a distributed build environment). Drop any the type + // test assume sequences inserted for whole program vtables so that + // codegen doesn't complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); if (Optional Options = getGCOVOptions(CodeGenOpts)) MPM.addPass(GCOVProfilerPass(*Options)); if (Optional Options = @@ -1150,6 +1170,18 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + // If we reached here with a non-empty index file name, then the index + // file was empty and we are not performing ThinLTO backend compilation + // (used in testing in a distributed build environment). Drop any the type + // test assume sequences inserted for whole program vtables so that + // codegen doesn't complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); + }); + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { MPM.addPass(createModuleToFunctionPassAdaptor( EntryExitInstrumenterPass(/*PostInlining=*/false))); diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 3f3825b7627..7389207bc8a 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2641,7 +2641,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); else if (CGM.getCodeGenOpts().WholeProgramVTables && - CGM.HasHiddenLTOVisibility(RD)) { + // Don't insert type test assumes if we are forcing public std + // visibility. + !CGM.HasLTOVisibilityPublicStd(RD)) { llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index cbc969a1ac3..403b9e25f7a 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1011,6 +1011,26 @@ void CodeGenModule::EmitDeferredVTables() { DeferredVTables.clear(); } +bool CodeGenModule::HasLTOVisibilityPublicStd(const CXXRecordDecl *RD) { + if (!getCodeGenOpts().LTOVisibilityPublicStd) + return false; + + const DeclContext *DC = RD; + while (1) { + auto *D = cast(DC); + DC = DC->getParent(); + if (isa(DC->getRedeclContext())) { + if (auto *ND = dyn_cast(D)) + if (const IdentifierInfo *II = ND->getIdentifier()) + if (II->isStr("std") || II->isStr("stdext")) + return true; + break; + } + } + + return false; +} + bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { LinkageInfo LV = RD->getLinkageAndVisibility(); if (!isExternallyVisible(LV.getLinkage())) @@ -1027,22 +1047,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { return false; } - if (getCodeGenOpts().LTOVisibilityPublicStd) { - const DeclContext *DC = RD; - while (1) { - auto *D = cast(DC); - DC = DC->getParent(); - if (isa(DC->getRedeclContext())) { - if (auto *ND = dyn_cast(D)) - if (const IdentifierInfo *II = ND->getIdentifier()) - if (II->isStr("std") || II->isStr("stdext")) - return false; - break; - } - } - } - - return true; + return !HasLTOVisibilityPublicStd(RD); } llvm::GlobalObject::VCallVisibility diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 115e754bb39..a711d5ccba0 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1292,6 +1292,11 @@ public: /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); + /// Returns whether the given record has public std LTO visibility + /// and therefore may not participate in (single-module) CFI and whole-program + /// vtable optimization. + bool HasLTOVisibilityPublicStd(const CXXRecordDecl *RD); + /// Returns the vcall visibility of the given type. This is the scope in which /// a virtual function call could be made which ends up being dispatched to a /// member function of this class. This scope can be wider than the visibility diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index b5b8702c551..057c726e355 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -670,6 +670,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CGM.HasHiddenLTOVisibility(RD); bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); + bool ShouldEmitWPDInfo = + CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type tests if we are forcing public std visibility. + !CGM.HasLTOVisibilityPublicStd(RD); llvm::Value *VirtualFn = nullptr; { @@ -677,8 +681,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Value *TypeId = nullptr; llvm::Value *CheckResult = nullptr; - if (ShouldEmitCFICheck || ShouldEmitVFEInfo) { - // If doing CFI or VFE, we will need the metadata node to check against. + if (ShouldEmitCFICheck || ShouldEmitVFEInfo || ShouldEmitWPDInfo) { + // If doing CFI, VFE or WPD, we will need the metadata node to check + // against. llvm::Metadata *MD = CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); @@ -702,7 +707,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( } else { // When not doing VFE, emit a normal load, as it allows more // optimisations than type.checked.load. - if (ShouldEmitCFICheck) { + if (ShouldEmitCFICheck || ShouldEmitWPDInfo) { CheckResult = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::type_test), {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); @@ -713,7 +718,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( "memptr.virtualfn"); } assert(VirtualFn && "Virtual fuction pointer not created!"); - assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) && + assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || !ShouldEmitWPDInfo || + CheckResult) && "Check result required but not created!"); if (ShouldEmitCFICheck) { diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll index 2920cf19c81..b46845afba2 100644 --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -8,6 +8,7 @@ ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. ; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \ +; RUN: -whole-program-visibility \ ; RUN: -verify-machineinstrs=0 \ ; RUN: -o %t2.index \ ; RUN: -r=%t.o,test,px \ diff --git a/clang/test/CodeGenCXX/cfi-mfcall.cpp b/clang/test/CodeGenCXX/cfi-mfcall.cpp index c16b20b8dce..47f3a2616f6 100644 --- a/clang/test/CodeGenCXX/cfi-mfcall.cpp +++ b/clang/test/CodeGenCXX/cfi-mfcall.cpp @@ -1,5 +1,8 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-mfcall -fsanitize-trap=cfi-mfcall -fvisibility hidden -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-mfcall -fsanitize-trap=cfi-mfcall -fvisibility default -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s +// With -fwhole-program-vtables we should get the member function pointer type +// test, even without hidden visibility. +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck %s --check-prefix=WPV struct B1 {}; struct B2 {}; @@ -9,6 +12,9 @@ struct S : B1, B3 {}; // DEFAULT-NOT: llvm.type.test void f(S *s, void (S::*p)()) { + // WPV: [[OFFSET:%.*]] = sub i64 {{.*}}, 1 + // WPV: [[VFPTR:%.*]] = getelementptr i8, i8* %{{.*}}, i64 [[OFFSET]] + // WPV: [[TT:%.*]] = call i1 @llvm.type.test(i8* [[VFPTR]], metadata !"_ZTSM1SFvvE.virtual") // CHECK: [[OFFSET:%.*]] = sub i64 {{.*}}, 1 // CHECK: [[VFPTR:%.*]] = getelementptr i8, i8* %{{.*}}, i64 [[OFFSET]] // CHECK: [[TT:%.*]] = call i1 @llvm.type.test(i8* [[VFPTR]], metadata !"_ZTSM1SFvvE.virtual") diff --git a/clang/test/CodeGenCXX/lto-visibility-inference.cpp b/clang/test/CodeGenCXX/lto-visibility-inference.cpp index 8e57ef5e0b8..632b6e643f3 100644 --- a/clang/test/CodeGenCXX/lto-visibility-inference.cpp +++ b/clang/test/CodeGenCXX/lto-visibility-inference.cpp @@ -70,20 +70,20 @@ void f(C1 *c1, C2 *c2, C3 *c3, C4 *c4, C5 *c5, C6 *c6, std::C7 *c7, // ITANIUM: type.test{{.*}}!"_ZTS2C1" // MS: type.test{{.*}}!"?AUC1@@" c1->f(); - // ITANIUM-NOT: type.test{{.*}}!"_ZTS2C2" + // ITANIUM: type.test{{.*}}!"_ZTS2C2" // MS: type.test{{.*}}!"?AUC2@@" c2->f(); // ITANIUM: type.test{{.*}}!"_ZTS2C3" - // MS-NOT: type.test{{.*}}!"?AUC3@@" + // MS: type.test{{.*}}!"?AUC3@@" c3->f(); // ITANIUM: type.test{{.*}}!"_ZTS2C4" - // MS-NOT: type.test{{.*}}!"?AUC4@@" + // MS: type.test{{.*}}!"?AUC4@@" c4->f(); - // ITANIUM-NOT: type.test{{.*}}!"_ZTS2C5" - // MS-NOT: type.test{{.*}}!"?AUC5@@" + // ITANIUM: type.test{{.*}}!"_ZTS2C5" + // MS: type.test{{.*}}!"?AUC5@@" c5->f(); - // ITANIUM-NOT: type.test{{.*}}!"_ZTS2C6" - // MS-NOT: type.test{{.*}}!"?AUC6@@" + // ITANIUM: type.test{{.*}}!"_ZTS2C6" + // MS: type.test{{.*}}!"?AUC6@@" c6->f(); // ITANIUM: type.test{{.*}}!"_ZTSSt2C7" // MS-STD: type.test{{.*}}!"?AUC7@std@@" diff --git a/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp new file mode 100644 index 00000000000..7e1cedffde6 --- /dev/null +++ b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp @@ -0,0 +1,69 @@ +// Test distributed ThinLTO backend handling of type tests + +// REQUIRES: x86-registered-target + +// Ensure that a distributed backend invocation of ThinLTO lowers the type test +// as expected. +// RUN: %clang_cc1 -flto=thin -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm-bc -o %t.o %s +// RUN: llvm-dis %t.o -o - | FileCheck --check-prefix=TT %s +// RUN: llvm-lto -thinlto -o %t2 %t.o +// RUN: %clang -target x86_64-unknown-linux -O2 -o %t3.o -x ir %t.o -c -fthinlto-index=%t2.thinlto.bc -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t3.o | FileCheck --check-prefix=NM %s + +// The pre-link bitcode produced by clang should contain a type test assume +// sequence. +// TT: [[TTREG:%[0-9]+]] = call i1 @llvm.type.test({{.*}}, metadata !"_ZTS1A") +// TT: void @llvm.assume(i1 [[TTREG]]) + +// The ThinLTO backend optimized bitcode should not have any type test assume +// sequences. +// OPT-NOT: @llvm.type.test +// OPT-NOT: call void @llvm.assume +// We should have only one @llvm.assume call, the one that was expanded +// from the builtin in the IR below, not the one fed by the type test. +// OPT: %cmp = icmp ne %struct.A* %0, null +// OPT: void @llvm.assume(i1 %cmp) +// Check after the builtin assume again that we don't have a type test assume +// sequence. +// OPT-NOT: @llvm.type.test +// OPT-NOT: call void @llvm.assume + +// NM: T _Z2afP1A + +// Also check type test are lowered when the distributed ThinLTO backend clang +// invocation is passed an empty index file, in which case a non-ThinLTO +// compilation pipeline is invoked. If not lowered then LLVM CodeGen may assert. +// RUN: touch %t4.thinlto.bc +// O2 old PM +// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t4.o | FileCheck --check-prefix=NM %s +// O2 new PM +// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -fexperimental-new-pass-manager -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t4.o | FileCheck --check-prefix=NM %s +// O0 new PM +// RUN: %clang -target x86_64-unknown-linux -O0 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -fexperimental-new-pass-manager -save-temps=obj +// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// llvm-nm %t4.o | FileCheck --check-prefix=NM %s + +struct A { + A(); + virtual void f(); +}; + +struct B : virtual A { + B(); +}; + +A::A() {} +B::B() {} + +void A::f() { +} + +void af(A *a) { + __builtin_assume(a != 0); + a->f(); +} diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp index a7a34673cdf..05731f15b9d 100644 --- a/clang/test/CodeGenCXX/type-metadata.cpp +++ b/clang/test/CodeGenCXX/type-metadata.cpp @@ -6,6 +6,7 @@ // Tests for the whole-program-vtables feature: // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=TT-ITANIUM %s +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM-DEFAULTVIS --check-prefix=TT-ITANIUM %s // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=TT-MS %s // Tests for cfi + whole-program-vtables: @@ -129,6 +130,7 @@ void D::h() { } // ITANIUM: define hidden void @_Z2afP1A +// ITANIUM-DEFAULTVIS: define void @_Z2afP1A // MS: define dso_local void @"?af@@YAXPEAUA@@@Z" void af(A *a) { // TT-ITANIUM: [[P:%[^ ]*]] = call i1 @llvm.type.test(i8* [[VT:%[^ ]*]], metadata !"_ZTS1A") @@ -239,6 +241,7 @@ struct D : C { }; // ITANIUM: define hidden void @_ZN5test21fEPNS_1DE +// ITANIUM-DEFAULTVIS: define void @_ZN5test21fEPNS_1DE // MS: define dso_local void @"?f@test2@@YAXPEAUD@1@@Z" void f(D *d) { // TT-ITANIUM: {{%[^ ]*}} = call i1 @llvm.type.test(i8* {{%[^ ]*}}, metadata !"_ZTSN5test21DE") diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index ef1edbcd199..6655ab7a774 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -165,6 +165,7 @@ struct Configuration { bool ltoCSProfileGenerate; bool ltoDebugPassManager; bool ltoNewPassManager; + bool ltoWholeProgramVisibility; bool mergeArmExidx; bool mipsN32Abi = false; bool mmapOutputFile; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index e85183e14e2..eb2fc4f1945 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -899,6 +899,8 @@ static void readConfigs(opt::InputArgList &args) { config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); config->ltoNewPassManager = args.hasArg(OPT_lto_new_pass_manager); config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); + config->ltoWholeProgramVisibility = + args.hasArg(OPT_lto_whole_program_visibility); config->ltoo = args::getInteger(args, OPT_lto_O, 2); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 2148ac50029..d8e343c4887 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -111,6 +111,8 @@ static lto::Config createConfig() { c.DebugPassManager = config->ltoDebugPassManager; c.DwoDir = config->dwoDir; + c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility; + c.CSIRProfile = config->ltoCSProfileFile; c.RunCSIRInstr = config->ltoCSProfileGenerate; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index ea78a352621..f45ad5a0af2 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -479,6 +479,8 @@ def lto_cs_profile_file: J<"lto-cs-profile-file=">, def lto_obj_path_eq: J<"lto-obj-path=">; def lto_sample_profile: J<"lto-sample-profile=">, HelpText<"Sample profile file path">; +def lto_whole_program_visibility: F<"lto-whole-program-visibility">, + HelpText<"Asserts that the LTO link has whole program visibility">; def disable_verify: F<"disable-verify">; defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">; def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">, diff --git a/lld/test/ELF/lto/devirt_vcall_vis_public.ll b/lld/test/ELF/lto/devirt_vcall_vis_public.ll new file mode 100644 index 00000000000..a1a9d1237c7 --- /dev/null +++ b/lld/test/ELF/lto/devirt_vcall_vis_public.ll @@ -0,0 +1,127 @@ +; Test that -lto-whole-program-visibility enables devirtualization. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: ld.lld %t2.o -o %t3 -save-temps -lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; RUN: ld.lld %t.o -o %t3 -save-temps -lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: ld.lld %t4.o -o %t3 -save-temps -lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +; Try everything again but without -whole-program-visibility to confirm +; WPD fails + +; Index based WPD +; RUN: ld.lld %t2.o -o %t3 -save-temps \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Hybrid WPD +; RUN: ld.lld %t.o -o %t3 -save-temps \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Regular LTO WPD +; RUN: ld.lld %t4.o -o %t3 -save-temps \ +; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define dso_local i32 @_start +define i32 @_start(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + ; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + ; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 0} diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 50147300f7f..2ca7ad4fda0 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -61,6 +61,10 @@ struct Config { /// Run PGO context sensitive IR instrumentation. bool RunCSIRInstr = false; + /// Asserts whether we can assume whole program visibility during the LTO + /// link. + bool HasWholeProgramVisibility = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index de0c80f5b19..daf44822aeb 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -236,12 +236,15 @@ enum class PassSummaryAction { /// The behavior depends on the summary arguments: /// - If ExportSummary is non-null, this pass will export type identifiers to /// the given summary. -/// - Otherwise, if ImportSummary is non-null, this pass will import type -/// identifiers from the given summary. -/// - Otherwise it does neither. -/// It is invalid for both ExportSummary and ImportSummary to be non-null. +/// - If ImportSummary is non-null, this pass will import type identifiers from +/// the given summary. +/// - Otherwise, if both are null and DropTypeTests is true, all type test +/// assume sequences will be removed from the IR. +/// It is invalid for both ExportSummary and ImportSummary to be non-null +/// unless DropTypeTests is true. ModulePass *createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests = false); /// This pass export CFI checks for use by external modules. ModulePass *createCrossDSOCFIPass(); diff --git a/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h b/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h index 3c2bb65b955..5e91ae59936 100644 --- a/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h +++ b/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h @@ -201,9 +201,12 @@ class LowerTypeTestsPass : public PassInfoMixin { public: ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + bool DropTypeTests; LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {} + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests = false) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DropTypeTests(DropTypeTests) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 8af2af7f352..86e28cfead8 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -236,6 +236,11 @@ struct VTableSlotSummary { uint64_t ByteOffset; }; +void updateVCallVisibilityInModule(Module &M, + bool WholeProgramVisibilityEnabledInLTO); +void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, + bool WholeProgramVisibilityEnabledInLTO); + /// Perform index-based whole program devirtualization on the \p Summary /// index. Any devirtualized targets used by a type test in another module /// are added to the \p ExportedGUIDs set. For any local devirtualized targets diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 297b11de17a..e8f0fd6866d 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -982,6 +982,11 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { } } + // If allowed, upgrade public vcall visibility metadata to linkage unit + // visibility before whole program devirtualization in the optimizer. + updateVCallVisibilityInModule(*RegularLTO.CombinedModule, + Conf.HasWholeProgramVisibility); + if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); @@ -1299,6 +1304,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, std::set ExportedGUIDs; + // If allowed, upgrade public vcall visibility to linkage unit visibility in + // the summaries before whole program devirtualization below. + updateVCallVisibilityInIndex(ThinLTO.CombinedIndex, + Conf.HasWholeProgramVisibility); + // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead // performing IR-based WPD in hybrid regular/thin LTO mode). diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 5fef14230a9..b3bc727e50a 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include @@ -542,6 +543,13 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, } StatsFile = std::move(StatsFileOrErr.get()); + // Currently there is no support for enabling whole program visibility via a + // linker option in the old LTO API, but this call allows it to be specified + // via the internal option. Must be done before WPD invoked via the optimizer + // pipeline run below. + updateVCallVisibilityInModule(*MergedModule, + /* WholeProgramVisibilityEnabledInLTO */ false); + // We always run the verifier once on the merged module, the `DisableVerify` // parameter only applies to subsequent verify. verifyMergedModuleOnce(); diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 0bb518bf6cc..f4099e68315 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -969,6 +969,12 @@ void ThinLTOCodeGenerator::run() { // Synthesize entry counts for functions in the combined index. computeSyntheticCounts(*Index); + // Currently there is no support for enabling whole program visibility via a + // linker option in the old LTO API, but this call allows it to be specified + // via the internal option. Must be done before WPD below. + updateVCallVisibilityInIndex(*Index, + /* WholeProgramVisibilityEnabledInLTO */ false); + // Perform index-based WPD. This will return immediately if there are // no index entries in the typeIdMetadata map (e.g. if we are instead // performing IR-based WPD in hybrid regular/thin LTO mode). diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index e6747a68e67..6eba35aaa4e 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -382,6 +382,9 @@ class LowerTypeTestsModule { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + // Set when the client has invoked this to simply drop all type test assume + // sequences. + bool DropTypeTests; Triple::ArchType Arch; Triple::OSType OS; @@ -500,7 +503,8 @@ class LowerTypeTestsModule { public: LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests); bool lower(); @@ -516,22 +520,24 @@ struct LowerTypeTests : public ModulePass { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + bool DropTypeTests; LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } LowerTypeTests(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) + const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary) { + ImportSummary(ImportSummary), DropTypeTests(DropTypeTests) { initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override { if (UseCommandLine) return LowerTypeTestsModule::runForTesting(M); - return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); + return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) + .lower(); } }; @@ -544,8 +550,9 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, ModulePass * llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { - return new LowerTypeTests(ExportSummary, ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool DropTypeTests) { + return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests); } /// Build a bit set for TypeId using the object layouts in @@ -1655,8 +1662,9 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( /// Lower all type tests in this module. LowerTypeTestsModule::LowerTypeTestsModule( Module &M, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) + : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DropTypeTests(DropTypeTests) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); Arch = TargetTriple.getArch(); @@ -1683,7 +1691,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { bool Changed = LowerTypeTestsModule( M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, - ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr) + ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr, + /*DropTypeTests*/ false) .lower(); if (!ClWriteSummary.empty()) { @@ -1750,6 +1759,33 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { } bool LowerTypeTestsModule::lower() { + Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + + if (DropTypeTests && TypeTestFunc) { + for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); + UI != UE;) { + auto *CI = cast((*UI++).getUser()); + // Find and erase llvm.assume intrinsics for this llvm.type.test call. + for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;) { + if (auto *AssumeCI = dyn_cast((*CIU++).getUser())) { + Function *F = AssumeCI->getCalledFunction(); + if (F && F->getIntrinsicID() == Intrinsic::assume) + AssumeCI->eraseFromParent(); + } + } + CI->eraseFromParent(); + } + + // We have deleted the type intrinsics, so we no longer have enough + // information to reason about the liveness of virtual function pointers + // in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + + return true; + } + // If only some of the modules were split, we cannot correctly perform // this transformation. We already checked for the presense of type tests // with partially split modules during the thin link, and would have emitted @@ -1758,8 +1794,6 @@ bool LowerTypeTestsModule::lower() { (ImportSummary && ImportSummary->partiallySplitLTOUnits())) return false; - Function *TypeTestFunc = - M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *ICallBranchFunnelFunc = M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel)); if ((!TypeTestFunc || TypeTestFunc->use_empty()) && @@ -2196,7 +2230,9 @@ bool LowerTypeTestsModule::lower() { PreservedAnalyses LowerTypeTestsPass::run(Module &M, ModuleAnalysisManager &AM) { - bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower(); + bool Changed = + LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) + .lower(); if (!Changed) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 5ccfb29b01a..9e8eaf3ae1a 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -134,6 +134,22 @@ static cl::opt cl::init(false), cl::ZeroOrMore, cl::desc("Print index-based devirtualization messages")); +/// Provide a way to force enable whole program visibility in tests. +/// This is needed to support legacy tests that don't contain +/// !vcall_visibility metadata (the mere presense of type tests +/// previously implied hidden visibility). +cl::opt + WholeProgramVisibility("whole-program-visibility", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable whole program visibility")); + +/// Provide a way to force disable whole program for debugging or workarounds, +/// when enabled via the linker. +cl::opt DisableWholeProgramVisibility( + "disable-whole-program-visibility", cl::init(false), cl::Hidden, + cl::ZeroOrMore, + cl::desc("Disable whole program visibility (overrides enabling options)")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -702,7 +718,49 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, return PreservedAnalyses::none(); } +// Enable whole program visibility if enabled by client (e.g. linker) or +// internal option, and not force disabled. +static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) { + return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) && + !DisableWholeProgramVisibility; +} + namespace llvm { + +/// If whole program visibility asserted, then upgrade all public vcall +/// visibility metadata on vtable definitions to linkage unit visibility in +/// Module IR (for regular or hybrid LTO). +void updateVCallVisibilityInModule(Module &M, + bool WholeProgramVisibilityEnabledInLTO) { + if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) + return; + for (GlobalVariable &GV : M.globals()) + // Add linkage unit visibility to any variable with type metadata, which are + // the vtable definitions. We won't have an existing vcall_visibility + // metadata on vtable definitions with public visibility. + if (GV.hasMetadata(LLVMContext::MD_type) && + GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic) + GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit); +} + +/// If whole program visibility asserted, then upgrade all public vcall +/// visibility metadata on vtable definition summaries to linkage unit +/// visibility in Module summary index (for ThinLTO). +void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index, + bool WholeProgramVisibilityEnabledInLTO) { + if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) + return; + for (auto &P : Index) { + for (auto &S : P.second.SummaryList) { + auto *GVar = dyn_cast(S.get()); + if (!GVar || GVar->vTableFuncs().empty() || + GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic) + continue; + GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit); + } + } +} + void runWholeProgramDevirtOnIndex( ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, std::map> &LocalWPDTargetsMap) { @@ -818,6 +876,12 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; + // We cannot perform whole program devirtualization analysis on a vtable + // with public LTO visibility. + if (TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) + return false; + Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), TM.Offset + ByteOffset, M); if (!Ptr) @@ -863,8 +927,13 @@ bool DevirtIndex::tryFindVirtualCallTargets( return false; LocalFound = true; } - if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) + if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { VS = cast(S->getBaseObject()); + // We cannot perform whole program devirtualization analysis on a vtable + // with public LTO visibility. + if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) + return false; + } } if (!VS->isLive()) continue; @@ -1808,6 +1877,12 @@ bool DevirtModule::run() { removeRedundantTypeTests(); + // We have lowered or deleted the type instrinsics, so we will no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + // The rest of the code is only necessary when exporting or during regular // LTO, so we are done. return true; @@ -1931,7 +2006,7 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); - // We have lowered or deleted the type checked load intrinsics, so we no + // We have lowered or deleted the type instrinsics, so we will no // longer have enough information to reason about the liveness of virtual // function pointers in GlobalDCE. for (GlobalVariable &GV : M.globals()) diff --git a/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll b/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll index 6618a6f280f..b52cceeb52c 100644 --- a/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll +++ b/llvm/test/ThinLTO/X86/cache-typeid-resolutions.ll @@ -9,17 +9,17 @@ ; where both t and t-import are sensitive to typeid1's resolution ; so 4 distinct objects in total. ; RUN: rm -rf %t.cache -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t1.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -r=%t1.bc,vt1,plx +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t1.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -r=%t1.bc,vt1,plx -whole-program-visibility ; RUN: ls %t.cache | count 4 ; Three resolutions for typeid2: Indir, SingleImpl, UniqueRetVal ; where both t and t-import are sensitive to typeid2's resolution ; so 6 distinct objects in total. ; RUN: rm -rf %t.cache -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t2.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t2.bc,vt2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t3.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t3.bc,vt2a,plx -r=%t3.bc,vt2b,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t2.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t2.bc,vt2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility +; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t3.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t3.bc,vt2a,plx -r=%t3.bc,vt2b,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility ; RUN: ls %t.cache | count 6 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/ThinLTO/X86/cfi-devirt.ll b/llvm/test/ThinLTO/X86/cfi-devirt.ll index dd83024e263..311bed084de 100644 --- a/llvm/test/ThinLTO/X86/cfi-devirt.ll +++ b/llvm/test/ThinLTO/X86/cfi-devirt.ll @@ -6,6 +6,7 @@ ; Legacy PM ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ @@ -23,6 +24,7 @@ ; New PM ; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ @@ -46,6 +48,7 @@ ; to ensure it is being caught in the thin link. ; RUN: opt -thinlto-bc -o %t2.o %S/Inputs/empty.ll ; RUN: not llvm-lto2 run %t.o %t2.o -thinlto-distributed-indexes \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ diff --git a/llvm/test/ThinLTO/X86/devirt-after-icp.ll b/llvm/test/ThinLTO/X86/devirt-after-icp.ll index af6eba77ba4..e4ac5549867 100644 --- a/llvm/test/ThinLTO/X86/devirt-after-icp.ll +++ b/llvm/test/ThinLTO/X86/devirt-after-icp.ll @@ -46,6 +46,7 @@ ; Legacy PM ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,_Z3bazP1A,px \ ; RUN: -r=%t.o,_ZN1A3fooEv, \ @@ -64,6 +65,7 @@ ; New PM ; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,_Z3bazP1A,px \ ; RUN: -r=%t.o,_ZN1A3fooEv, \ diff --git a/llvm/test/ThinLTO/X86/devirt.ll b/llvm/test/ThinLTO/X86/devirt.ll index eae8c69eb06..c24685c5d6b 100644 --- a/llvm/test/ThinLTO/X86/devirt.ll +++ b/llvm/test/ThinLTO/X86/devirt.ll @@ -35,6 +35,7 @@ ; Legacy PM, Index based WPD ; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ @@ -48,6 +49,7 @@ ; New PM, Index based WPD ; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ @@ -62,6 +64,7 @@ ; Legacy PM ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -verify-machineinstrs=0 \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ @@ -84,6 +87,7 @@ ; New PM ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. ; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -verify-machineinstrs=0 \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt2.ll b/llvm/test/ThinLTO/X86/devirt2.ll index 01eed382f24..a2cffa7be8c 100644 --- a/llvm/test/ThinLTO/X86/devirt2.ll +++ b/llvm/test/ThinLTO/X86/devirt2.ll @@ -36,6 +36,7 @@ ; Legacy PM, Index based WPD ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ @@ -59,6 +60,7 @@ ; New PM, Index based WPD ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ @@ -92,6 +94,7 @@ ; Index based WPD, distributed backends ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm \ +; RUN: -whole-program-visibility \ ; RUN: -thinlto-distributed-indexes -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ @@ -115,6 +118,7 @@ ; Legacy PM ; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t5 \ ; RUN: -r=%t1.o,test,px \ ; RUN: -r=%t1.o,_ZTV1B, \ @@ -150,6 +154,7 @@ ; New PM ; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -o %t5 \ ; RUN: -r=%t1.o,test,px \ ; RUN: -r=%t1.o,_ZTV1B, \ diff --git a/llvm/test/ThinLTO/X86/devirt_alias.ll b/llvm/test/ThinLTO/X86/devirt_alias.ll index 92aa2bcba91..78ebd471b01 100644 --- a/llvm/test/ThinLTO/X86/devirt_alias.ll +++ b/llvm/test/ThinLTO/X86/devirt_alias.ll @@ -7,6 +7,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_alias.ll ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_available_externally.ll b/llvm/test/ThinLTO/X86/devirt_available_externally.ll index 128055f7022..94ab7b75758 100644 --- a/llvm/test/ThinLTO/X86/devirt_available_externally.ll +++ b/llvm/test/ThinLTO/X86/devirt_available_externally.ll @@ -18,6 +18,7 @@ ; EXTERNAL: gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll b/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll index 18482a051e2..ce2f9b7175d 100644 --- a/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll +++ b/llvm/test/ThinLTO/X86/devirt_external_comdat_same_guid.ll @@ -8,6 +8,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_external_comdat_same_guid.ll ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,use_B,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_promote.ll b/llvm/test/ThinLTO/X86/devirt_promote.ll index 563ed994157..e93b4192cf1 100644 --- a/llvm/test/ThinLTO/X86/devirt_promote.ll +++ b/llvm/test/ThinLTO/X86/devirt_promote.ll @@ -10,6 +10,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll ; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ ; RUN: -wholeprogramdevirt-print-index-based \ ; RUN: -o %t5 \ ; RUN: -r=%t3.o,test,px \ diff --git a/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll b/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll index 79fde540c59..7216911a6bc 100644 --- a/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll +++ b/llvm/test/ThinLTO/X86/devirt_promote_legacy.ll @@ -10,6 +10,7 @@ ; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll ; RUN: llvm-lto -thinlto-action=run %t3.o %t4.o --thinlto-save-temps=%t5. \ +; RUN: -whole-program-visibility \ ; RUN: --pass-remarks=. \ ; RUN: --exported-symbol=test \ ; RUN: --exported-symbol=test2 \ diff --git a/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll b/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll index 5b2df6cf6f9..5966794152c 100644 --- a/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll +++ b/llvm/test/ThinLTO/X86/devirt_single_hybrid.ll @@ -5,6 +5,7 @@ ; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_foo.ll -o %t-foo.bc ; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_bar.ll -o %t-bar.bc ; RUN: llvm-lto2 run -save-temps %t-main.bc %t-foo.bc %t-bar.bc -pass-remarks=. -o %t \ +; RUN: -whole-program-visibility \ ; RUN: -r=%t-foo.bc,_Z3fooP1A,pl \ ; RUN: -r=%t-main.bc,main,plx \ ; RUN: -r=%t-main.bc,_Z3barv,l \ diff --git a/llvm/test/ThinLTO/X86/devirt_vcall_vis_hidden.ll b/llvm/test/ThinLTO/X86/devirt_vcall_vis_hidden.ll new file mode 100644 index 00000000000..7a301491e05 --- /dev/null +++ b/llvm/test/ThinLTO/X86/devirt_vcall_vis_hidden.ll @@ -0,0 +1,143 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization through the thin link and backend, when vtables +; have vcall_visibility metadata with public visibility. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t5 \ +; RUN: -r=%t4.o,test,px \ +; RUN: -r=%t4.o,_ZN1A1nEi,p \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 1} diff --git a/llvm/test/ThinLTO/X86/devirt_vcall_vis_public.ll b/llvm/test/ThinLTO/X86/devirt_vcall_vis_public.ll new file mode 100644 index 00000000000..468237bea4c --- /dev/null +++ b/llvm/test/ThinLTO/X86/devirt_vcall_vis_public.ll @@ -0,0 +1,215 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization through the thin link and backend, when vtables +; have vcall_visibility metadata with public visibility. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -o %t5 \ +; RUN: -r=%t4.o,test,px \ +; RUN: -r=%t4.o,_ZN1A1nEi,p \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +; Try everything again but without -whole-program-visibility to confirm +; WPD fails + +; Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Hybrid WPD +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -verify-machineinstrs=0 \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Regular LTO WPD +; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t5 \ +; RUN: -r=%t4.o,test,px \ +; RUN: -r=%t4.o,_ZN1A1nEi,p \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Try index-based WPD again with both -whole-program-visibility and +; -disable-whole-program-visibility to confirm the latter overrides +; the former and that WPD fails. +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -whole-program-visibility \ +; RUN: -disable-whole-program-visibility \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + ; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + ; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 0} diff --git a/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll index e5d0e74b22e..14de02a23c0 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll index 91cd4e419b6..1e7d38d9bdb 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel-threshold.ll @@ -1,8 +1,8 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=1 -S -o - %s | not grep @llvm.icall.branch.funnel | count 0 +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=1 -S -o - %s | not grep @llvm.icall.branch.funnel | count 0 -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=10 -S -o - %s | grep @llvm.icall.branch.funnel | count 4 +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=10 -S -o - %s | grep @llvm.icall.branch.funnel | count 4 -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=100 -S -o - %s | grep @llvm.icall.branch.funnel | count 5 +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=100 -S -o - %s | grep @llvm.icall.branch.funnel | count 5 target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll index db76080725b..32d964819fe 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -1,9 +1,9 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck --check-prefixes=CHECK,RETP %s -; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt | FileCheck --check-prefixes=CHECK,NORETP %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt -whole-program-visibility | FileCheck --check-prefixes=CHECK,NORETP %s -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t diff --git a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll index f65e4132738..2f26a2502e9 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s -; RUN: opt -S -passes=wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s +; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll index f4ef8824e2e..7302238aa7c 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll index 9f631e94cf5..5b0b5aabdae 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll b/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll index 4effaba08b2..3ea078a183e 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/expand-check.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s ; Test that we correctly expand the llvm.type.checked.load intrinsic in cases ; where we cannot devirtualize. diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll b/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll index 4707eaa17ea..64a714dd08d 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-nothing.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-write-summary=%t -o /dev/null %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-write-summary=%t -o /dev/null %s ; RUN: FileCheck %s < %t ; CHECK: --- diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll index cbba1a4d164..33ff9e1afe5 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY: TypeIdMap: diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll index 43adb90d69f..cb2fddd75d1 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY-NOT: TypeTests: diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll index 4260a2e570d..0f780a38736 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY-NOT: TypeTests: diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll b/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll index 3132444a9f3..dd16fa052af 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll @@ -1,4 +1,4 @@ -; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s +; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s ; RUN: FileCheck %s < %t ; CHECK: TypeTests: [ 15427464259790519041, 17525413373118030901 ] diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll index 5982ad4ec58..eb7b36e87dd 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll @@ -1,7 +1,7 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,X86 %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,X86 %s ; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-X86 %s < %t -; RUN: opt -mtriple=armv7-unknown-linux-gnu -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,ARM %s +; RUN: opt -mtriple=armv7-unknown-linux-gnu -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,ARM %s ; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-ARM %s < %t target datalayout = "e-p:64:64" diff --git a/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll index ecc8ad0e7c7..1a38efa01ec 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s ; CHECK-NOT: devirtualized call diff --git a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll index 5e76a5a7dde..443a850373d 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll index 3b6afc52e5d..86c9a993851 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/soa-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll index 81e41d46694..8afa1d99c50 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/struct-vtable.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll index 8fea9bc7b24..b9d20593936 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll index ef3a7e49b52..7626aba24c1 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll index 4452bb75d03..f03c07d24de 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll index ca76383c494..07a9ac7e230 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s -; RUN: opt -S -passes=wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s +; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll index 1c4e2fbe97a..35be7e4cabd 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-decl.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll index ce76c8e6797..0c3c4e9b2ff 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-no-this.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll index cc2ff33296a..3580663b7f0 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll index c24c3b4be68..a0de6d27833 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll index 7016263f8f7..e96323e3d78 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s ; Test that we correctly handle function type mismatches in argument counts ; and bitwidths. We handle an argument count mismatch by refusing diff --git a/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll b/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll index 542402e1657..1c5b0ae05b1 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll index 6e55235ebfc..22426dfc449 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll index 3299f7bce65..b7537f26e4d 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll index 5ab9571a0ca..b3cb4de7240 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll b/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll index e56170a4997..cdb3856c70f 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/vtable-decl.ll @@ -1,5 +1,5 @@ ; Check that we don't crash when processing declaration with type metadata -; RUN: opt -S -wholeprogramdevirt %s +; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-none-linux-gnu" diff --git a/llvm/test/tools/gold/X86/devirt_vcall_vis_public.ll b/llvm/test/tools/gold/X86/devirt_vcall_vis_public.ll new file mode 100644 index 00000000000..53cd3fd4e36 --- /dev/null +++ b/llvm/test/tools/gold/X86/devirt_vcall_vis_public.ll @@ -0,0 +1,148 @@ +; Test that plugin option whole-program-visibility enables devirtualization. + +; Index based WPD +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=whole-program-visibility \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t2.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Hybrid WPD +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=whole-program-visibility \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Regular LTO WPD +; RUN: opt -o %t4.o %s +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=whole-program-visibility \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t4.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +; Try everything again but without -whole-program-visibility to confirm +; WPD fails + +; Index based WPD +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t2.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Hybrid WPD +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +; Regular LTO WPD +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=-pass-remarks=. \ +; RUN: %t4.o -o %t3 \ +; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty +; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5 + + +; CHECK-IR-LABEL: define dso_local i32 @_start +define i32 @_start(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + ; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + ; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} +!5 = !{i64 0} diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 406079dad30..93e9b60873e 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -204,6 +204,8 @@ namespace options { static std::string dwo_dir; /// Statistics output filename. static std::string stats_file; + // Asserts that LTO link has whole program visibility + static bool whole_program_visibility = false; // Optimization remarks filename, accepted passes and hotness options static std::string RemarksFilename; @@ -283,6 +285,8 @@ namespace options { new_pass_manager = true; } else if (opt == "debug-pass-manager") { debug_pass_manager = true; + } else if (opt == "whole-program-visibility") { + whole_program_visibility = true; } else if (opt.startswith("dwo_dir=")) { dwo_dir = opt.substr(strlen("dwo_dir=")); } else if (opt.startswith("opt-remarks-filename=")) { @@ -926,6 +930,8 @@ static std::unique_ptr createLTO(IndexWriteCallback OnIndexWrite, // Debug new pass manager if requested Conf.DebugPassManager = options::debug_pass_manager; + Conf.HasWholeProgramVisibility = options::whole_program_visibility; + Conf.StatsFile = options::stats_file; return std::make_unique(std::move(Conf), Backend, options::ParallelCodeGenParallelismLevel); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 75a6cdc3892..82cf47d9ebd 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -54,6 +54,7 @@ #include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Debugify.h" #include @@ -625,6 +626,13 @@ int main(int argc, char **argv) { return 1; } + // Enable testing of whole program devirtualization on this module by invoking + // the facility for updating public visibility to linkage unit visibility when + // specified by an internal option. This is normally done during LTO which is + // not performed via opt. + updateVCallVisibilityInModule(*M, + /* WholeProgramVisibilityEnabledInLTO */ false); + // Figure out what stream we are supposed to write to... std::unique_ptr Out; std::unique_ptr ThinLinkOut;