From d3cf783ed12c8e658598a1a9173a27ea8481073b Mon Sep 17 00:00:00 2001
From: Juergen Ributzka <juergen@apple.com>
Date: Fri, 21 Mar 2014 06:04:45 +0000
Subject: [PATCH] [Constant Hoisting] Make the constant materialization cost
 operand dependent

Extend the target hook to take also the operand index into account when
calculating the cost of the constant materialization.

Related to <rdar://problem/16381500>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204435 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Analysis/TargetTransformInfo.h |  6 ++--
 lib/Analysis/TargetTransformInfo.cpp        | 16 +++++-----
 lib/Target/X86/X86TargetTransformInfo.cpp   | 47 ++++++++++++++++++++---------
 lib/Transforms/Scalar/ConstantHoisting.cpp  | 14 ++++-----
 test/CodeGen/X86/lsr-interesting-step.ll    | 14 ++++-----
 test/CodeGen/X86/negate-add-zero.ll         | 17 +++++------
 test/Transforms/ConstantHoisting/X86/phi.ll |  6 ++--
 7 files changed, 66 insertions(+), 54 deletions(-)

diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 178d55305e2..b11674898fb 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -297,10 +297,10 @@ public:
   /// \brief Return the expected cost of materialization for the given integer
   /// immediate of the specified type for a given instruction. The cost can be
   /// zero if the immediate can be folded into the specified instruction.
-  virtual unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
-                                 Type *Ty) const;
-  virtual unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
+  virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                                  Type *Ty) const;
+  virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                 const APInt &Imm, Type *Ty) const;
   /// @}
 
   /// \name Vector Target Information
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 0dcdd12a409..75d053c6891 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -148,14 +148,14 @@ unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return PrevTTI->getIntImmCost(Imm, Ty);
 }
 
-unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, const APInt &Imm,
-                                            Type *Ty) const {
-  return PrevTTI->getIntImmCost(Opcode, Imm, Ty);
+unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx,
+                                            const APInt &Imm, Type *Ty) const {
+  return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty);
 }
 
-unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
-                                            Type *Ty) const {
-  return PrevTTI->getIntImmCost(IID, Imm, Ty);
+unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                            const APInt &Imm, Type *Ty) const {
+  return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty);
 }
 
 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
@@ -539,12 +539,12 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
     return TCC_Basic;
   }
 
-  unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                          Type *Ty) const override {
     return TCC_Free;
   }
 
-  unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                          Type *Ty) const override {
     return TCC_Free;
   }
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 1a0208c1a52..87a5dd6536b 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -103,9 +103,9 @@ public:
 
   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
 
-  unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                          Type *Ty) const override;
-  unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                          Type *Ty) const override;
 
   /// @}
@@ -776,6 +776,9 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
   if (BitSize == 0)
     return ~0U;
 
+  if (Imm == 0)
+    return TCC_Free;
+
   if (Imm.getBitWidth() <= 64 &&
       (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue())))
     return TCC_Basic;
@@ -783,7 +786,7 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
     return 2 * TCC_Basic;
 }
 
-unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
+unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                                Type *Ty) const {
   assert(Ty->isIntegerTy());
 
@@ -791,7 +794,15 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
   if (BitSize == 0)
     return ~0U;
 
+  unsigned ImmIdx = ~0U;
   switch (Opcode) {
+  default: return TCC_Free;
+  case Instruction::GetElementPtr:
+    if (Idx != 0)
+      return TCC_Free;
+  case Instruction::Store:
+    ImmIdx = 0;
+    break;
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Mul:
@@ -806,28 +817,31 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
   case Instruction::Or:
   case Instruction::Xor:
   case Instruction::ICmp:
-    if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
-      return TCC_Free;
-    else
-      return X86TTI::getIntImmCost(Imm, Ty);
+    ImmIdx = 1;
+    break;
   case Instruction::Trunc:
   case Instruction::ZExt:
   case Instruction::SExt:
   case Instruction::IntToPtr:
   case Instruction::PtrToInt:
   case Instruction::BitCast:
+  case Instruction::PHI:
   case Instruction::Call:
   case Instruction::Select:
   case Instruction::Ret:
   case Instruction::Load:
-  case Instruction::Store:
-    return X86TTI::getIntImmCost(Imm, Ty);
+    break;
   }
-  return TargetTransformInfo::getIntImmCost(Opcode, Imm, Ty);
+
+  if ((Idx == ImmIdx) &&
+      Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
+    return TCC_Free;
+
+  return X86TTI::getIntImmCost(Imm, Ty);
 }
 
-unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
-                               Type *Ty) const {
+unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                               const APInt &Imm, Type *Ty) const {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -835,21 +849,24 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
     return ~0U;
 
   switch (IID) {
-  default: return TargetTransformInfo::getIntImmCost(IID, Imm, Ty);
+  default: return TCC_Free;
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
   case Intrinsic::usub_with_overflow:
   case Intrinsic::smul_with_overflow:
   case Intrinsic::umul_with_overflow:
-    if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
+    if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
       return TCC_Free;
     else
       return X86TTI::getIntImmCost(Imm, Ty);
   case Intrinsic::experimental_stackmap:
+    if (Idx < 2)
+      return TCC_Free;
   case Intrinsic::experimental_patchpoint_void:
   case Intrinsic::experimental_patchpoint_i64:
-    if (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))
+    if ((Idx < 4 ) ||
+        (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
       return TCC_Free;
     else
       return X86TTI::getIntImmCost(Imm, Ty);
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 89df2b496f3..fc5917b8f37 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -29,7 +29,7 @@
 // certain transformations on them, which would create a new expensive constant.
 //
 // This optimization is only applied to integer constants in instructions and
-// simple (this means not nested) constant cast experessions. For example:
+// simple (this means not nested) constant cast expressions. For example:
 // %0 = load i64* inttoptr (i64 big_constant to i64*)
 //===----------------------------------------------------------------------===//
 
@@ -66,7 +66,7 @@ struct ConstantUser {
   ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { }
 };
 
-/// \brief Keeps track of a constant candidate and its usees.
+/// \brief Keeps track of a constant candidate and its uses.
 struct ConstantCandidate {
   ConstantUseListType Uses;
   ConstantInt *ConstInt;
@@ -292,7 +292,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
 /// \brief Record constant integer ConstInt for instruction Inst at operand
 /// index Idx.
 ///
-/// The operand at index Idx is not necessarily the constant inetger itself. It
+/// The operand at index Idx is not necessarily the constant integer itself. It
 /// could also be a cast instruction or a constant expression that uses the
 // constant integer.
 void ConstantHoisting::collectConstantCandidates(Instruction *Inst,
@@ -300,12 +300,12 @@ void ConstantHoisting::collectConstantCandidates(Instruction *Inst,
                                                  ConstantInt *ConstInt) {
   unsigned Cost;
   // Ask the target about the cost of materializing the constant for the given
-  // instruction.
+  // instruction and operand index.
   if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
-    Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(),
+    Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
                               ConstInt->getValue(), ConstInt->getType());
   else
-    Cost = TTI->getIntImmCost(Inst->getOpcode(), ConstInt->getValue(),
+    Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
                               ConstInt->getType());
 
   // Ignore cheap integer constants.
@@ -582,7 +582,7 @@ bool ConstantHoisting::optimizeConstants(Function &Fn) {
   if (ConstantVec.empty())
     return false;
 
-  // Finally hoist the base constant and emit materializating code for dependent
+  // Finally hoist the base constant and emit materialization code for dependent
   // constants.
   bool MadeChange = emitBaseConstants();
 
diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll
index d4a7ac7da12..8ea3c53de41 100644
--- a/test/CodeGen/X86/lsr-interesting-step.ll
+++ b/test/CodeGen/X86/lsr-interesting-step.ll
@@ -3,26 +3,24 @@
 ; The inner loop should require only one add (and no leas either).
 ; rdar://8100380
 
-; CHECK:      BB0_3:
-; CHECK-NEXT:   movb    $0, flags(%rdx)
-; CHECK-NEXT:   addq    %rax, %rdx
-; CHECK-NEXT:   cmpq    $8192, %rdx
+; CHECK:      BB0_2:
+; CHECK-NEXT:   movb    $0, flags(%rcx)
+; CHECK-NEXT:   addq    %rax, %rcx
+; CHECK-NEXT:   cmpq    $8192, %rcx
 ; CHECK-NEXT:   jl
 
 @flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]
 
 define void @foo() nounwind {
 entry:
-  %tmp = icmp slt i64 2, 8192                     ; <i1> [#uses=1]
-  br i1 %tmp, label %bb, label %bb21
+  br label %bb
 
 bb:                                               ; preds = %entry
   br label %bb7
 
 bb7:                                              ; preds = %bb, %bb17
   %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ]   ; <i64> [#uses=2]
-  %tmp9 = icmp slt i64 2, 8192                    ; <i1> [#uses=1]
-  br i1 %tmp9, label %bb10, label %bb17
+  br label %bb10
 
 bb10:                                             ; preds = %bb7
   br label %bb11
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index 92850f22eaa..c961bd091b9 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -827,9 +827,7 @@ declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13
 declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*)
 
 define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
-entry:
-	%0 = add i32 0, -1		; <i32> [#uses=1]
-	%1 = getelementptr double* null, i32 %0		; <double*> [#uses=1]
+	%1 = getelementptr double* null, i32 -1		; <double*> [#uses=1]
 	%2 = load double* %1, align 8		; <double> [#uses=1]
 	%3 = load double* null, align 8		; <double> [#uses=2]
 	%4 = load double* null, align 8		; <double> [#uses=2]
@@ -890,13 +888,12 @@ entry:
 	store double %52, double* %55, align 8
 	%56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2		; <double*> [#uses=1]
 	store double %53, double* %56, align 8
-	%57 = add i32 0, 4		; <i32> [#uses=1]
-	%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0		; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
-	store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %58, align 8
-	%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %57, i32* %59, align 4
-	%60 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2		; <i32*> [#uses=1]
-	store i32 3, i32* %60, align 8
+	%57 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0		; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
+	store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %57, align 8
+	%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 4, i32* %58, align 4
+	%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 3, i32* %59, align 8
 	unreachable
 }
 
diff --git a/test/Transforms/ConstantHoisting/X86/phi.ll b/test/Transforms/ConstantHoisting/X86/phi.ll
index e63c06e91a2..7134723f61a 100644
--- a/test/Transforms/ConstantHoisting/X86/phi.ll
+++ b/test/Transforms/ConstantHoisting/X86/phi.ll
@@ -20,10 +20,10 @@ return:
 
 ; CHECK-LABEL: @test1
 ; CHECK: if.end:
-; CHECK: %const_mat = add i64 %const, 1
-; CHECK-NEXT: %1 = inttoptr i64 %const_mat to i8*
+; CHECK: %2 = inttoptr i64 %const to i8*
+; CHECK-NEXT: br
 ; CHECK: return:
-; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ inttoptr (i64 68719476736 to i8*), %if.end ]
+; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ]
 }
 
 define void @test2(i1 %cmp, i64** %tmp) {
-- 
2.11.0