[LoopRotate] Add PrepareForLTO stage, avoid rotating with inline cands.

author Florian Hahn <flo@fhahn.com>

Tue, 19 Jan 2021 09:22:40 +0000 (09:22 +0000)

committer Florian Hahn <flo@fhahn.com>

Tue, 19 Jan 2021 10:15:29 +0000 (10:15 +0000)
author Florian Hahn <flo@fhahn.com>
Tue, 19 Jan 2021 09:22:40 +0000 (09:22 +0000)
committer Florian Hahn <flo@fhahn.com>
Tue, 19 Jan 2021 10:15:29 +0000 (10:15 +0000)
diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h

index eab24c8..615591a 100644 (file)
--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -75,7 +75,8 @@ struct CodeMetrics {
  
    /// Add information about a block to the current state.
    void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
-                         const SmallPtrSetImpl<const Value*> &EphValues);
+                         const SmallPtrSetImpl<const Value *> &EphValues,
+                         bool PrepareForLTO = false);
  
    /// Collect a loop's ephemeral values (those used only by an assume
    /// or similar intrinsics in the loop).
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h

index 3217257..48eea52 100644 (file)
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -210,7 +210,7 @@ Pass *createLoopRerollPass();
  //
  // LoopRotate - This pass is a simple loop rotating pass.
  //
-Pass *createLoopRotatePass(int MaxHeaderSize = -1);
+Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false);
  
  //===----------------------------------------------------------------------===//
  //
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h

index 254e607..f68ac70 100644 (file)
--- a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
@@ -22,12 +22,14 @@ namespace llvm {
  /// A simple loop rotation transformation.
  class LoopRotatePass : public PassInfoMixin<LoopRotatePass> {
  public:
-  LoopRotatePass(bool EnableHeaderDuplication = true);
+  LoopRotatePass(bool EnableHeaderDuplication = true,
+                 bool PrepareForLTO = false);
    PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                          LoopStandardAnalysisResults &AR, LPMUpdater &U);
  
  private:
    const bool EnableHeaderDuplication;
+  const bool PrepareForLTO;
  };
  }
  
diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h

index 1e80722..61bf93b 100644 (file)
--- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -33,7 +33,8 @@ class TargetTransformInfo;
  bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
                    AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE,
                    MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
-                  bool RotationOnly, unsigned Threshold, bool IsUtilMode);
+                  bool RotationOnly, unsigned Threshold, bool IsUtilMode,
+                  bool PrepareForLTO = false);
  
  } // namespace llvm
  
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp

index 0b2b6f9..fca62c1 100644 (file)
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -112,9 +112,9 @@ void CodeMetrics::collectEphemeralValues(
  
  /// Fill in the current structure with information gleaned from the specified
  /// block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
-                                    const TargetTransformInfo &TTI,
-                                    const SmallPtrSetImpl<const Value*> &EphValues) {
+void CodeMetrics::analyzeBasicBlock(
+    const BasicBlock *BB, const TargetTransformInfo &TTI,
+    const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
    ++NumBlocks;
    unsigned NumInstsBeforeThisBB = NumInsts;
    for (const Instruction &I : *BB) {
@@ -128,8 +128,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
          // If a function is both internal and has a single use, then it is
          // extremely likely to get inlined in the future (it was probably
          // exposed by an interleaved devirtualization pass).
-        if (!Call->isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+        // When preparing for LTO, liberally consider calls as inline
+        // candidates.
+        if (!Call->isNoInline() &&
+            ((F->hasInternalLinkage() && F->hasOneUse()) || PrepareForLTO)) {
            ++NumInlineCandidates;
+        }
  
          // If this call is to function itself, then the function is recursive.
          // Inlining it into other functions is a bad idea, because this is
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp

index db9b737..3530b1a 100644 (file)
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1230,7 +1230,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
    // First rotate loops that may have been un-rotated by prior passes.
    // Disable header duplication at -Oz.
    OptimizePM.addPass(createFunctionToLoopPassAdaptor(
-      LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency,
+      LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
+      EnableMSSALoopDependency,
        /*UseBlockFrequencyInfo=*/false, DebugLogging));
  
    // Distribute loops to allow partial vectorization.  I.e. isolate dependences
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

index 33fa158..c8a4a4c 100644 (file)
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -433,7 +433,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
      MPM.add(createLoopSimplifyCFGPass());
    }
    // Rotate Loop - disable header duplication at -Oz
-  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
    // TODO: Investigate promotion cap for O1.
    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
    if (EnableSimpleLoopUnswitch)
@@ -777,7 +777,7 @@ void PassManagerBuilder::populateModulePassManager(
    // Re-rotate loops in all our loop nests. These may have fallout out of
    // rotated form due to GVN or other transformations, and the vectorizer relies
    // on the rotated form. Disable header duplication at -Oz.
-  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
  
    // Distribute loops to allow partial vectorization.  I.e. isolate dependences
    // into separate loop that would otherwise inhibit vectorization.  This is
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp

index 3e3d303..ad1cfc6 100644 (file)
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -34,8 +34,14 @@ static cl::opt<unsigned> DefaultRotationThreshold(
      "rotation-max-header-size", cl::init(16), cl::Hidden,
      cl::desc("The default maximum header size for automatic loop rotation"));
  
-LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication)
-    : EnableHeaderDuplication(EnableHeaderDuplication) {}
+static cl::opt<bool> PrepareForLTOOption(
+    "rotation-prepare-for-lto", cl::init(false), cl::Hidden,
+    cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
+             "should be used for testing only."));
+
+LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
+    : EnableHeaderDuplication(EnableHeaderDuplication),
+      PrepareForLTO(PrepareForLTO) {}
  
  PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
                                        LoopStandardAnalysisResults &AR,
@@ -53,9 +59,10 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
    Optional<MemorySSAUpdater> MSSAU;
    if (AR.MSSA)
      MSSAU = MemorySSAUpdater(AR.MSSA);
-  bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
-                              MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
-                              SQ, false, Threshold, false);
+  bool Changed =
+      LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+                   MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false,
+                   Threshold, false, PrepareForLTO || PrepareForLTOOption);
  
    if (!Changed)
      return PreservedAnalyses::all();
@@ -73,10 +80,13 @@ namespace {
  
  class LoopRotateLegacyPass : public LoopPass {
    unsigned MaxHeaderSize;
+  bool PrepareForLTO;
  
  public:
    static char ID; // Pass ID, replacement for typeid
-  LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
+  LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1,
+                       bool PrepareForLTO = false)
+      : LoopPass(ID), PrepareForLTO(PrepareForLTO) {
      initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
      if (SpecifiedMaxHeaderSize == -1)
        MaxHeaderSize = DefaultRotationThreshold;
@@ -121,7 +131,8 @@ public:
  
      return LoopRotation(L, LI, TTI, AC, &DT, &SE,
                          MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
-                        false, Threshold, false);
+                        false, Threshold, false,
+                        PrepareForLTO || PrepareForLTOOption);
    }
  };
  } // end namespace
@@ -136,6 +147,6 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
  INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
                      false)
  
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
-  return new LoopRotateLegacyPass(MaxHeaderSize);
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) {
+  return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO);
  }
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp

index eb7018c..8501709 100644 (file)
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -65,15 +65,17 @@ class LoopRotate {
    const SimplifyQuery &SQ;
    bool RotationOnly;
    bool IsUtilMode;
+  bool PrepareForLTO;
  
  public:
    LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
               const TargetTransformInfo *TTI, AssumptionCache *AC,
               DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
-             const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
+             const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
+             bool PrepareForLTO)
        : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
          MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
-        IsUtilMode(IsUtilMode) {}
+        IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
    bool processLoop(Loop *L);
  
  private:
@@ -301,7 +303,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
        CodeMetrics::collectEphemeralValues(L, AC, EphValues);
  
        CodeMetrics Metrics;
-      Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
+      Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO);
        if (Metrics.notDuplicatable) {
          LLVM_DEBUG(
                     dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
@@ -324,6 +326,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
          ++NumNotRotatedDueToHeaderSize;
          return Rotated;
        }
+
+      // When preparing for LTO, avoid rotating loops with calls that could be
+      // inlined during the LTO stage.
+      if (PrepareForLTO && Metrics.NumInlineCandidates > 0)
+        return Rotated;
      }
  
      // Now, this loop is suitable for rotation.
@@ -745,8 +752,8 @@ bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
                          ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
                          const SimplifyQuery &SQ, bool RotationOnly = true,
                          unsigned Threshold = unsigned(-1),
-                        bool IsUtilMode = true) {
+                        bool IsUtilMode = true, bool PrepareForLTO) {
    LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
-                IsUtilMode);
+                IsUtilMode, PrepareForLTO);
    return LR.processLoop(L);
  }
diff --git a/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll b/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll

index dff7339..0f98e41 100644 (file)
--- a/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll
+++ b/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll
@@ -1,5 +1,7 @@
  ; RUN: opt -S -loop-rotate < %s | FileCheck --check-prefix=FULL %s
+; RUN: opt -S -loop-rotate -rotation-prepare-for-lto < %s | FileCheck --check-prefix=PREPARE %s
  ; RUN: opt -S -passes='require<targetir>,require<assumptions>,loop(loop-rotate)' < %s | FileCheck --check-prefix=FULL %s
+; RUN: opt -S -passes='require<targetir>,require<assumptions>,loop(loop-rotate)' -rotation-prepare-for-lto < %s | FileCheck --check-prefix=PREPARE %s
  
  ; Test case to make sure loop-rotate avoids rotating during the prepare-for-lto
  ; stage, when the header contains a call which may be inlined during the LTO stage.
@@ -11,6 +13,11 @@ define void @test_prepare_for_lto() {
  ; FULL-NEXT:    call void @may_be_inlined()
  ; FULL-NEXT:    br label %for.body
  ;
+; PREPARE-LABEL: @test_prepare_for_lto(
+; PREPARE-NEXT:  entry:
+; PREPARE-NEXT:    %array = alloca [20 x i32], align 16
+; PREPARE-NEXT:    br label %for.cond
+;
  entry:
    %array = alloca [20 x i32], align 16
    br label %for.cond
author	Florian Hahn <flo@fhahn.com>
	Tue, 19 Jan 2021 09:22:40 +0000 (09:22 +0000)
committer	Florian Hahn <flo@fhahn.com>
	Tue, 19 Jan 2021 10:15:29 +0000 (10:15 +0000)
llvm/include/llvm/Analysis/CodeMetrics.h		patch \| blob \| history
llvm/include/llvm/Transforms/Scalar.h		patch \| blob \| history
llvm/include/llvm/Transforms/Scalar/LoopRotation.h		patch \| blob \| history
llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h		patch \| blob \| history
llvm/lib/Analysis/CodeMetrics.cpp		patch \| blob \| history
llvm/lib/Passes/PassBuilder.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/PassManagerBuilder.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/LoopRotation.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/LoopRotationUtils.cpp		patch \| blob \| history
llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll		patch \| blob \| history