From a2fd503e0ee45f26c0cd670c527e278b3d84bcbb Mon Sep 17 00:00:00 2001
From: Aditya Nandakumar <aditya_nandakumar@apple.com>
Date: Tue, 26 Jan 2016 18:42:36 +0000
Subject: [PATCH] Reassociate: Reprocess RedoInsts after each inst

Previously the RedoInsts was processed at the end of the block.
However it was possible that it left behind some instructions that
were not canonicalized.
This should guarantee that any previous instruction in the basic
block is canonicalized before we process a new instruction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258830 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/Reassociate.cpp              | 67 +++++++++++++---------
 .../Reassociate/prev_insts_canonicalized.ll        | 57 ++++++++++++++++++
 .../Reassociate/reassoc-intermediate-fnegs.ll      |  6 +-
 test/Transforms/Reassociate/xor_reassoc.ll         |  4 +-
 4 files changed, 101 insertions(+), 33 deletions(-)
 create mode 100644 test/Transforms/Reassociate/prev_insts_canonicalized.ll
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index bcadd4e2bee..a6fe51cc872 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -163,7 +163,8 @@ namespace {
       AU.addPreserved<GlobalsAAWrapperPass>();
     }
   private:
-    void BuildRankMap(Function &F);
+    void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT);
+
     unsigned getRank(Value *V);
     void canonicalizeOperands(Instruction *I);
     void ReassociateExpression(BinaryOperator *I);
@@ -246,7 +247,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
   return nullptr;
 }
 
-void Reassociate::BuildRankMap(Function &F) {
+void Reassociate::BuildRankMap(Function &F,
+                               ReversePostOrderTraversal<Function *> &RPOT) {
   unsigned i = 2;
 
   // Assign distinct ranks to function arguments.
@@ -255,7 +257,6 @@ void Reassociate::BuildRankMap(Function &F) {
     DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
   }
 
-  ReversePostOrderTraversal<Function*> RPOT(&F);
   for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
          E = RPOT.end(); I != E; ++I) {
     BasicBlock *BB = *I;
@@ -2259,13 +2260,28 @@ bool Reassociate::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
-  // Calculate the rank map for F
-  BuildRankMap(F);
+  // Reassociate needs for each instruction to have its operands already
+  // processed, so we first perform a RPOT of the basic blocks so that
+  // when we process a basic block, all its dominators have been processed
+  // before.
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+  BuildRankMap(F, RPOT);
 
   MadeChange = false;
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+  for (BasicBlock *BI : RPOT) {
+    // Use a worklist to keep track of which instructions have been processed
+    // (and which insts won't be optimized again) so when redoing insts,
+    // optimize insts rightaway which won't be processed later.
+    SmallSet<Instruction *, 8> Worklist;
+
+    // Insert all instructions in the BB
+    for (Instruction &I : *BI)
+      Worklist.insert(&I);
+
     // Optimize every instruction in the basic block.
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
+    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) {
+      // This instruction has been processed.
+      Worklist.erase(&*II);
       if (isInstructionTriviallyDead(&*II)) {
         EraseInst(&*II++);
       } else {
@@ -2274,27 +2290,22 @@ bool Reassociate::runOnFunction(Function &F) {
         ++II;
       }
 
-    // Make a copy of all the instructions to be redone so we can remove dead
-    // instructions.
-    SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
-    // Iterate over all instructions to be reevaluated and remove trivially dead
-    // instructions. If any operand of the trivially dead instruction becomes
-    // dead mark it for deletion as well. Continue this process until all
-    // trivially dead instructions have been removed.
-    while (!ToRedo.empty()) {
-      Instruction *I = ToRedo.pop_back_val();
-      if (isInstructionTriviallyDead(I))
-        RecursivelyEraseDeadInsts(I, ToRedo);
-    }
-
-    // Now that we have removed dead instructions, we can reoptimize the
-    // remaining instructions.
-    while (!RedoInsts.empty()) {
-      Instruction *I = RedoInsts.pop_back_val();
-      if (isInstructionTriviallyDead(I))
-        EraseInst(I);
-      else
-        OptimizeInst(I);
+      // If the above optimizations produced new instructions to optimize or
+      // made modifications which need to be redone, do them now if they won't
+      // be handled later.
+      while (!RedoInsts.empty()) {
+        Instruction *I = RedoInsts.pop_back_val();
+        // Process instructions that won't be processed later, either
+        // inside the block itself or in another basic block (based on rank),
+        // since these will be processed later.
+        if ((I->getParent() != BI || !Worklist.count(I)) &&
+            RankMap[I->getParent()] <= RankMap[BI]) {
+          if (isInstructionTriviallyDead(I))
+            EraseInst(I);
+          else
+            OptimizeInst(I);
+        }
+      }
     }
   }
 
diff --git a/test/Transforms/Reassociate/prev_insts_canonicalized.ll b/test/Transforms/Reassociate/prev_insts_canonicalized.ll
new file mode 100644
index 00000000000..649761e57c9
--- /dev/null
+++ b/test/Transforms/Reassociate/prev_insts_canonicalized.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; These tests make sure that before processing insts
+; any previous instructions are already canonicalized.
+define i32 @foo(i32 %in) {
+; CHECK-LABEL: @foo
+; CHECK-NEXT: %factor = mul i32 %in, -4
+; CHECK-NEXT: %factor1 = mul i32 %in, 2
+; CHECK-NEXT: %_3 = add i32 %factor, 1
+; CHECK-NEXT: %_5 = add i32 %_3, %factor1
+; CHECK-NEXT: ret i32 %_5
+  %_0 = add i32 %in, 1
+  %_1 = mul i32 %in, -2
+  %_2 = add i32 %_0, %_1
+  %_3 = add i32 %_1, %_2
+  %_4 = add i32 %_3, 1
+  %_5 = add i32 %in, %_3
+  ret i32 %_5
+}
+
+; CHECK-LABEL: @foo1
+define void @foo1(float %in, i1 %cmp) {
+wrapper_entry:
+  br label %foo1
+
+for.body:
+  %0 = fadd float %in1, %in1
+  br label %foo1
+
+foo1:
+  %_0 = fmul fast float %in, -3.000000e+00
+  %_1 = fmul fast float %_0, 3.000000e+00
+  %in1 = fadd fast float -3.000000e+00, %_1
+  %in1use = fadd fast float %in1, %in1
+  br label %for.body
+
+
+}
+
+; CHECK-LABEL: @foo2
+define void @foo2(float %in, i1 %cmp) {
+wrapper_entry:
+  br label %for.body
+
+for.body:
+; If the operands of the phi are sheduled for processing before
+; foo1 is processed, the invariant of reassociate are not preserved
+  %unused = phi float [%in1, %foo1], [undef, %wrapper_entry]
+  br label %foo1
+
+foo1:
+  %_0 = fmul fast float %in, -3.000000e+00
+  %_1 = fmul fast float %_0, 3.000000e+00
+  %in1 = fadd fast float -3.000000e+00, %_1
+  %in1use = fadd fast float %in1, %in1
+  br label %for.body
+}
diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
index c2cdffce61e..7d82ef7e7a2 100644
--- a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -reassociate -S | FileCheck %s
 ; CHECK-LABEL: faddsubAssoc1
-; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
-; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
-; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: [[TMP1:%.*]] = fsub fast half 0xH8000, %a
+; CHECK: [[TMP2:%.*]] = fadd fast half %b, [[TMP1]]
+; CHECK: fmul fast half [[TMP2]], 0xH4500
 ; CHECK: ret
 ; Input is A op (B op C)
 define half @faddsubAssoc1(half %a, half %b) {
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index 0bed6f35880..a22689805fb 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
   %xor1 = xor i32 %xor, %and
   ret i32 %xor1
 ; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %x, 123
-; CHECK: %xor1 = xor i32 %xor, %y
+; CHECK: %xor = xor i32 %y, 123
+; CHECK: %xor1 = xor i32 %xor, %x
 ; CHECK: ret i32 %xor1
 }
 
-- 
2.11.0