[SimplifyCFG] don't sink common insts too soon (PR34603)

author Sanjay Patel <spatel@rotateright.com>

Thu, 14 Dec 2017 22:05:20 +0000 (22:05 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Thu, 14 Dec 2017 22:05:20 +0000 (22:05 +0000)
author Sanjay Patel <spatel@rotateright.com>
Thu, 14 Dec 2017 22:05:20 +0000 (22:05 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Thu, 14 Dec 2017 22:05:20 +0000 (22:05 +0000)
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h

index 07d3d7f..6fe35b4 100644 (file)
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -267,7 +267,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
  //
  FunctionPass *createCFGSimplificationPass(
      unsigned Threshold = 1, bool ForwardSwitchCond = false,
-    bool ConvertSwitch = false, bool KeepLoops = true,
+    bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
      std::function<bool(const Function &)> Ftor = nullptr);
  
  //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/SimplifyCFG.h b/include/llvm/Transforms/Scalar/SimplifyCFG.h

index ed6b1b1..1afb9c7 100644 (file)
--- a/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -39,7 +39,8 @@ public:
        : SimplifyCFGPass(SimplifyCFGOptions()
                              .forwardSwitchCondToPhi(false)
                              .convertSwitchToLookupTable(false)
-                            .needCanonicalLoops(true)) {}
+                            .needCanonicalLoops(true)
+                            .sinkCommonInsts(false)) {}
  
  
    /// Construct a pass with optional optimizations.
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h

index 6d8d859..7dd21df 100644 (file)
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -63,16 +63,20 @@ struct SimplifyCFGOptions {
    bool ForwardSwitchCondToPhi;
    bool ConvertSwitchToLookupTable;
    bool NeedCanonicalLoop;
+  bool SinkCommonInsts;
    AssumptionCache *AC;
  
    SimplifyCFGOptions(unsigned BonusThreshold = 1,
                       bool ForwardSwitchCond = false,
                       bool SwitchToLookup = false, bool CanonicalLoops = true,
+                     bool SinkCommon = false,
                       AssumptionCache *AssumpCache = nullptr)
        : BonusInstThreshold(BonusThreshold),
          ForwardSwitchCondToPhi(ForwardSwitchCond),
          ConvertSwitchToLookupTable(SwitchToLookup),
-        NeedCanonicalLoop(CanonicalLoops), AC(AssumpCache) {}
+        NeedCanonicalLoop(CanonicalLoops),
+        SinkCommonInsts(SinkCommon),
+        AC(AssumpCache) {}
  
    // Support 'builder' pattern to set members by name at construction time.
    SimplifyCFGOptions &bonusInstThreshold(int I) {
@@ -91,6 +95,10 @@ struct SimplifyCFGOptions {
      NeedCanonicalLoop = B;
      return *this;
    }
+  SimplifyCFGOptions &sinkCommonInsts(bool B) {
+    SinkCommonInsts = B;
+    return *this;
+  }
    SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
      AC = Cache;
      return *this;
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp

index 56eba69..d33c4df 100644 (file)
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -747,21 +747,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
    // Cleanup after the loop optimization passes.
    OptimizePM.addPass(InstCombinePass());
  
-
    // Now that we've formed fast to execute loop structures, we do further
    // optimizations. These are run afterward as they might block doing complex
    // analyses and transforms such as what are needed for loop vectorization.
  
-  // Optimize parallel scalar instruction chains into SIMD instructions.
-  OptimizePM.addPass(SLPVectorizerPass());
-
-  // Cleanup after all of the vectorizers. Simplification passes like CVP and
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
    // GVN, loop transforms, and others have already run, so it's now better to
    // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
    OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
-                                         forwardSwitchCondToPhi(true).
-                                         convertSwitchToLookupTable(true).
-                                         needCanonicalLoops(false)));
+                                     forwardSwitchCondToPhi(true).
+                                     convertSwitchToLookupTable(true).
+                                     needCanonicalLoops(false).
+                                     sinkCommonInsts(true)));
+
+  // Optimize parallel scalar instruction chains into SIMD instructions.
+  OptimizePM.addPass(SLPVectorizerPass());
+
    OptimizePM.addPass(InstCombinePass());
  
    // Unroll small loops to hide loop backedge latency and saturate any parallel
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp

index 21b8bff..64583ea 100644 (file)
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -365,7 +365,7 @@ void AArch64PassConfig::addIRPasses() {
    // determine whether it succeeded. We can exploit existing control-flow in
    // ldrex/strex loops to simplify this, but it needs tidying up.
    if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
-    addPass(createCFGSimplificationPass(1, true, true, false));
+    addPass(createCFGSimplificationPass(1, true, true, false, true));
  
    // Run LoopDataPrefetch
    //
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp

index 007dc2b..51982b2 100644 (file)
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -385,7 +385,7 @@ void ARMPassConfig::addIRPasses() {
    // ldrex/strex loops to simplify this, but it needs tidying up.
    if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
      addPass(createCFGSimplificationPass(
-        1, false, false, true, [this](const Function &F) {
+        1, false, false, true, true, [this](const Function &F) {
            const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
            return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
          }));
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp

index b5d32a1..3855e62 100644 (file)
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -630,6 +630,13 @@ void PassManagerBuilder::populateModulePassManager(
      addInstructionCombiningPass(MPM);
    }
  
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+  // GVN, loop transforms, and others have already run, so it's now better to
+  // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
+  MPM.add(createCFGSimplificationPass(1, true, true, false, true));
+
    if (RunSLPAfterLoopVectorization && SLPVectorize) {
      MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
      if (OptLevel > 1 && ExtraVectorizerPasses) {
@@ -638,9 +645,6 @@ void PassManagerBuilder::populateModulePassManager(
    }
  
    addExtensionsToPM(EP_Peephole, MPM);
-  // Switches to lookup tables and other transforms that may not be considered
-  // canonical by other IR passes.
-  MPM.add(createCFGSimplificationPass(1, true, true, false));
    addInstructionCombiningPass(MPM);
  
    if (!DisableUnrollLoops) {
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp

index 789e0a4..1522170 100644 (file)
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -61,6 +61,11 @@ static cl::opt<bool> UserForwardSwitchCond(
      "forward-switch-cond", cl::Hidden, cl::init(false),
      cl::desc("Forward switch condition to phi ops (default = false)"));
  
+static cl::opt<bool> UserSinkCommonInsts(
+    "sink-common-insts", cl::Hidden, cl::init(false),
+    cl::desc("Sink common instructions (default = false)"));
+
+
  STATISTIC(NumSimpl, "Number of blocks simplified");
  
  /// If we have more than one empty (other than phi node) return blocks,
@@ -205,6 +210,9 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
    Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
                                    ? UserKeepLoops
                                    : Opts.NeedCanonicalLoop;
+  Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
+                                ? UserSinkCommonInsts
+                                : Opts.SinkCommonInsts;
  }
  
  PreservedAnalyses SimplifyCFGPass::run(Function &F,
@@ -226,6 +234,7 @@ struct CFGSimplifyPass : public FunctionPass {
  
    CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
                    bool ConvertSwitch = false, bool KeepLoops = true,
+                  bool SinkCommon = false,
                    std::function<bool(const Function &)> Ftor = nullptr)
        : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
  
@@ -246,6 +255,10 @@ struct CFGSimplifyPass : public FunctionPass {
  
      Options.NeedCanonicalLoop =
          UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
+
+    Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
+                                  ? UserSinkCommonInsts
+                                  : SinkCommon;
    }
  
    bool runOnFunction(Function &F) override {
@@ -276,7 +289,8 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
  FunctionPass *
  llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
                                    bool ConvertSwitch, bool KeepLoops,
+                                  bool SinkCommon,
                                    std::function<bool(const Function &)> Ftor) {
    return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
-                             KeepLoops, std::move(Ftor));
+                             KeepLoops, SinkCommon, std::move(Ftor));
  }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp

index 394c951..f02f80c 100644 (file)
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5728,7 +5728,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
    BasicBlock *BB = BI->getParent();
    BasicBlock *Succ = BI->getSuccessor(0);
  
-  if (SinkCommon && SinkThenElseCodeToEnd(BI))
+  if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
      return true;
  
    // If the Terminator is the only non-phi instruction, simplify the block.
diff --git a/test/DebugInfo/Generic/simplifycfg_sink_last_inst.ll b/test/DebugInfo/Generic/simplifycfg_sink_last_inst.ll

index 2185fbb..9012b81 100644 (file)
--- a/test/DebugInfo/Generic/simplifycfg_sink_last_inst.ll
+++ b/test/DebugInfo/Generic/simplifycfg_sink_last_inst.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplifycfg -S < %s | FileCheck %s
+; RUN: opt -simplifycfg -sink-common-insts -S < %s | FileCheck %s
  
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Other/new-pm-defaults.ll b/test/Other/new-pm-defaults.ll

index 9212790..1964a8c 100644 (file)
--- a/test/Other/new-pm-defaults.ll
+++ b/test/Other/new-pm-defaults.ll
@@ -197,8 +197,8 @@
  ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
  ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
  ; CHECK-O-NEXT: Running pass: InstCombinePass
-; CHECK-O-NEXT: Running pass: SLPVectorizerPass
  ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O-NEXT: Running pass: SLPVectorizerPass
  ; CHECK-O-NEXT: Running pass: InstCombinePass
  ; CHECK-O-NEXT: Running pass: LoopUnrollPass
  ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
diff --git a/test/Other/new-pm-thinlto-defaults.ll b/test/Other/new-pm-thinlto-defaults.ll

index 9593b4e..12fd0d7 100644 (file)
--- a/test/Other/new-pm-thinlto-defaults.ll
+++ b/test/Other/new-pm-thinlto-defaults.ll
@@ -185,8 +185,8 @@
  ; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass
  ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
  ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
-; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
  ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
  ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
  ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
  ; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
diff --git a/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/test/Transforms/PhaseOrdering/simplifycfg-options.ll

index fcb9a55..6934623 100644 (file)
--- a/test/Transforms/PhaseOrdering/simplifycfg-options.ll
+++ b/test/Transforms/PhaseOrdering/simplifycfg-options.ll
@@ -76,10 +76,8 @@ define double @max_of_loads(double* %x, double* %y, i64 %i) {
  ; ALL-NEXT:    [[XI:%.*]] = load double, double* [[XI_PTR]], align 8
  ; ALL-NEXT:    [[YI:%.*]] = load double, double* [[YI_PTR]], align 8
  ; ALL-NEXT:    [[CMP:%.*]] = fcmp ogt double [[XI]], [[YI]]
-; ALL-NEXT:    [[Y_SINK:%.*]] = select i1 [[CMP]], double* [[X]], double* [[Y]]
-; ALL-NEXT:    [[YI_PTR_AGAIN:%.*]] = getelementptr double, double* [[Y_SINK]], i64 [[I]]
-; ALL-NEXT:    [[YI_AGAIN:%.*]] = load double, double* [[YI_PTR_AGAIN]], align 8
-; ALL-NEXT:    ret double [[YI_AGAIN]]
+; ALL-NEXT:    [[XI_YI:%.*]] = select i1 [[CMP]], double [[XI]], double [[YI]]
+; ALL-NEXT:    ret double [[XI_YI]]
  ;
  entry:
    %xi_ptr = getelementptr double, double* %x, i64 %i
diff --git a/test/Transforms/SimplifyCFG/no-md-sink.ll b/test/Transforms/SimplifyCFG/no-md-sink.ll

index b603270..25747bf 100644 (file)
--- a/test/Transforms/SimplifyCFG/no-md-sink.ll
+++ b/test/Transforms/SimplifyCFG/no-md-sink.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck %s
  
  define i1 @test1(i1 zeroext %flag, i8* %y) #0 {
  entry:
diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll

index a261453..0ac80e5 100644 (file)
--- a/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck -enable-var-scope %s
+; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck -enable-var-scope %s
  
  define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
  entry:
author	Sanjay Patel <spatel@rotateright.com>
	Thu, 14 Dec 2017 22:05:20 +0000 (22:05 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Thu, 14 Dec 2017 22:05:20 +0000 (22:05 +0000)
include/llvm/Transforms/Scalar.h		patch \| blob \| history
include/llvm/Transforms/Scalar/SimplifyCFG.h		patch \| blob \| history
include/llvm/Transforms/Utils/Local.h		patch \| blob \| history
lib/Passes/PassBuilder.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64TargetMachine.cpp		patch \| blob \| history
lib/Target/ARM/ARMTargetMachine.cpp		patch \| blob \| history
lib/Transforms/IPO/PassManagerBuilder.cpp		patch \| blob \| history
lib/Transforms/Scalar/SimplifyCFGPass.cpp		patch \| blob \| history
lib/Transforms/Utils/SimplifyCFG.cpp		patch \| blob \| history
test/DebugInfo/Generic/simplifycfg_sink_last_inst.ll		patch \| blob \| history
test/Other/new-pm-defaults.ll		patch \| blob \| history
test/Other/new-pm-thinlto-defaults.ll		patch \| blob \| history
test/Transforms/PhaseOrdering/simplifycfg-options.ll		patch \| blob \| history
test/Transforms/SimplifyCFG/no-md-sink.ll		patch \| blob \| history
test/Transforms/SimplifyCFG/sink-common-code.ll		patch \| blob \| history