From f5c82586b2b214349bd7e593b3111aa912248db0 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Wed, 20 Sep 2017 17:09:47 +0000 Subject: [PATCH] [ThinLTO] Fix dead stripping analysis for SamplePGO Summary: The fix for dead stripping analysis in the case of SamplePGO indirect calls to local functions (r313151) introduced the possibility of an infinite loop. Make sure we check for the value being already live after we update it for SamplePGO indirect call handling. Reviewers: danielcdh Subscribers: mehdi_amini, inglorion, llvm-commits, eraman Differential Revision: https://reviews.llvm.org/D38086 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313766 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/FunctionImport.cpp | 6 +- .../PGOProfile/Inputs/thinlto_samplepgo_icp3.ll | 31 +++++++++++ .../PGOProfile/thinlto_samplepgo_icp3.ll | 64 ++++++++++++++++++++++ 3 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp3.ll create mode 100644 test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 9f20cbc6fed..670a84862e0 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -463,9 +463,6 @@ void llvm::computeDeadSymbols( // Make value live and add it to the worklist if it was not live before. // FIXME: we should only make the prevailing copy live here auto visit = [&](ValueInfo VI) { - for (auto &S : VI.getSummaryList()) - if (S->isLive()) - return; // FIXME: If we knew which edges were created for indirect call profiles, // we could skip them here. Any that are live should be reached via // other edges, e.g. reference edges. Otherwise, using a profile collected @@ -478,6 +475,9 @@ void llvm::computeDeadSymbols( if (!VI) return; for (auto &S : VI.getSummaryList()) + if (S->isLive()) + return; + for (auto &S : VI.getSummaryList()) S->setLive(true); ++LiveSymbols; Worklist.push_back(VI); diff --git a/test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp3.ll b/test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp3.ll new file mode 100644 index 00000000000..10d633be59f --- /dev/null +++ b/test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp3.ll @@ -0,0 +1,31 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@fptr = external local_unnamed_addr global void ()*, align 8 + +; Function Attrs: norecurse nounwind uwtable +define void @_Z6updatei(i32 %i) local_unnamed_addr #0 { +entry: + store void ()* @_ZL3foov, void ()** @fptr, align 8 + ret void +} + +; Function Attrs: nounwind readnone uwtable +define internal void @_ZL3foov() !prof !34 { +entry: + %0 = load void ()*, void ()** @fptr, align 8 + tail call void %0(), !prof !40 + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} +!llvm.ident = !{!31} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "b.cc", directory: "/ssd/llvm/abc/small") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!31 = !{!"clang version 5.0.0 (trunk 297016)"} +!34 = !{!"function_entry_count", i64 1} +!40 = !{!"VP", i32 0, i64 3000, i64 -8789629626369651636, i64 3000} diff --git a/test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll b/test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll new file mode 100644 index 00000000000..d6a94ca6d94 --- /dev/null +++ b/test/Transforms/PGOProfile/thinlto_samplepgo_icp3.ll @@ -0,0 +1,64 @@ +; REQUIRES: x86-registered-target + +; Do setup work for all below tests: generate bitcode and combined index +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp3.ll -o %t2.bc + +; Test to make sure importing and dead stripping works in the +; case where the target is a local function that also indirectly calls itself. +; RUN: llvm-lto2 run -save-temps -o %t3 %t.bc %t2.bc -r %t.bc,fptr,plx -r %t.bc,main,plx -r %t2.bc,_Z6updatei,pl -r %t2.bc,fptr,l -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS +; Make sure we import the promted indirectly called target +; IMPORTS: Import _ZL3foov.llvm.0 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@fptr = local_unnamed_addr global void ()* null, align 8 + +; Function Attrs: norecurse uwtable +define i32 @main() local_unnamed_addr #0 !prof !34 { +entry: + %0 = load void ()*, void ()** @fptr, align 8 +; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect + tail call void %0(), !prof !40 + ret i32 0 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3,!4} +!llvm.ident = !{!31} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "main.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"ProfileSummary", !5} +!5 = !{!6, !7, !8, !9, !10, !11, !12, !13} +!6 = !{!"ProfileFormat", !"SampleProfile"} +!7 = !{!"TotalCount", i64 3003} +!8 = !{!"MaxCount", i64 3000} +!9 = !{!"MaxInternalCount", i64 0} +!10 = !{!"MaxFunctionCount", i64 0} +!11 = !{!"NumCounts", i64 3} +!12 = !{!"NumFunctions", i64 1} +!13 = !{!"DetailedSummary", !14} +!14 = !{!15, !16, !17, !18, !19, !20, !20, !21, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30} +!15 = !{i32 10000, i64 3000, i32 1} +!16 = !{i32 100000, i64 3000, i32 1} +!17 = !{i32 200000, i64 3000, i32 1} +!18 = !{i32 300000, i64 3000, i32 1} +!19 = !{i32 400000, i64 3000, i32 1} +!20 = !{i32 500000, i64 3000, i32 1} +!21 = !{i32 600000, i64 3000, i32 1} +!22 = !{i32 700000, i64 3000, i32 1} +!23 = !{i32 800000, i64 3000, i32 1} +!24 = !{i32 900000, i64 3000, i32 1} +!25 = !{i32 950000, i64 3000, i32 1} +!26 = !{i32 990000, i64 3000, i32 1} +!27 = !{i32 999000, i64 3000, i32 1} +!28 = !{i32 999900, i64 2, i32 2} +!29 = !{i32 999990, i64 2, i32 2} +!30 = !{i32 999999, i64 2, i32 2} +!31 = !{!"clang version 5.0.0 (trunk 297016)"} +!34 = !{!"function_entry_count", i64 1} +!40 = !{!"VP", i32 0, i64 3000, i64 -8789629626369651636, i64 3000} -- 2.11.0