[LoopPredication] Enable predication when latchCheckIV is wider than rangeCheck

author Anna Thomas <anna@azul.com>

Thu, 2 Nov 2017 21:21:02 +0000 (21:21 +0000)

committer Anna Thomas <anna@azul.com>

Thu, 2 Nov 2017 21:21:02 +0000 (21:21 +0000)
author Anna Thomas <anna@azul.com>
Thu, 2 Nov 2017 21:21:02 +0000 (21:21 +0000)
committer Anna Thomas <anna@azul.com>
Thu, 2 Nov 2017 21:21:02 +0000 (21:21 +0000)
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp

index 9a623be..e680fbe 100644 (file)
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -174,6 +174,9 @@
  
  using namespace llvm;
  
+static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
+                                        cl::Hidden, cl::init(true));
+
  namespace {
  class LoopPredication {
    /// Represents an induction variable check:
@@ -212,6 +215,22 @@ class LoopPredication {
                                          IRBuilder<> &Builder);
    bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
  
+  // When the IV type is wider than the range operand type, we can still do loop
+  // predication, by generating SCEVs for the range and latch that are of the
+  // same type. We achieve this by generating a SCEV truncate expression for the
+  // latch IV. This is done iff truncation of the IV is a safe operation,
+  // without loss of information.
+  // Another way to achieve this is by generating a wider type SCEV for the
+  // range check operand, however, this needs a more involved check that
+  // operands do not overflow. This can lead to loss of information when the
+  // range operand is of the form: add i32 %offset, %iv. We need to prove that
+  // sext(x + y) is same as sext(x) + sext(y).
+  // This function returns true if we can safely represent the IV type in
+  // the RangeCheckType without loss of information.
+  bool isSafeToTruncateWideIVType(Type *RangeCheckType);
+  // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
+  // so.
+  Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
  public:
    LoopPredication(ScalarEvolution *SE) : SE(SE){};
    bool runOnLoop(Loop *L);
@@ -301,6 +320,34 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
    return Builder.CreateICmp(Pred, LHSV, RHSV);
  }
  
+Optional<LoopPredication::LoopICmp>
+LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
+
+  auto *LatchType = LatchCheck.IV->getType();
+  if (RangeCheckType == LatchType)
+    return LatchCheck;
+  // For now, bail out if latch type is narrower than range type.
+  if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
+    return None;
+  if (!isSafeToTruncateWideIVType(RangeCheckType))
+    return None;
+  // We can now safely identify the truncated version of the IV and limit for
+  // RangeCheckType.
+  LoopICmp NewLatchCheck;
+  NewLatchCheck.Pred = LatchCheck.Pred;
+  NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
+      SE->getTruncateExpr(LatchCheck.IV, RangeCheckType));
+  if (!NewLatchCheck.IV)
+    return None;
+  NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType);
+  DEBUG(dbgs() << "IV of type: " << *LatchType
+               << "can be represented as range check type:" << *RangeCheckType
+               << "\n");
+  DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");
+  DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");
+  return NewLatchCheck;
+}
+
  /// If ICI can be widened to a loop invariant condition emits the loop
  /// invariant condition in the loop preheader and return it, otherwise
  /// returns None.
@@ -325,22 +372,31 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
      return None;
    }
    auto *RangeCheckIV = RangeCheck->IV;
-  auto *Ty = RangeCheckIV->getType();
-  if (Ty != LatchCheck.IV->getType()) {
-    DEBUG(dbgs() << "Type mismatch between range check and latch IVs!\n");
-    return None;
-  }
    if (!RangeCheckIV->isAffine()) {
      DEBUG(dbgs() << "Range check IV is not affine!\n");
      return None;
    }
    auto *Step = RangeCheckIV->getStepRecurrence(*SE);
-  if (Step != LatchCheck.IV->getStepRecurrence(*SE)) {
+  // We cannot just compare with latch IV step because the latch and range IVs
+  // may have different types.
+  if (!Step->isOne()) {
      DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");
      return None;
    }
-  assert(Step->isOne() && "must be one");
+  auto *Ty = RangeCheckIV->getType();
+  auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty);
+  if (!CurrLatchCheckOpt) {
+    DEBUG(dbgs() << "Failed to generate a loop latch check "
+                    "corresponding to range type: "
+                 << *Ty << "\n");
+    return None;
+  }
  
+  LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
+  // At this point the range check step and latch step should have the same
+  // value and type.
+  assert(Step == CurrLatchCheck.IV->getStepRecurrence(*SE) &&
+         "Range and latch should have same step recurrence!");
    // Generate the widened condition:
    //   guardStart u< guardLimit &&
    //   latchLimit <pred> guardLimit - 1 - guardStart + latchStart
@@ -348,8 +404,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
    // header comment for the reasoning.
    const SCEV *GuardStart = RangeCheckIV->getStart();
    const SCEV *GuardLimit = RangeCheck->Limit;
-  const SCEV *LatchStart = LatchCheck.IV->getStart();
-  const SCEV *LatchLimit = LatchCheck.Limit;
+  const SCEV *LatchStart = CurrLatchCheck.IV->getStart();
+  const SCEV *LatchLimit = CurrLatchCheck.Limit;
  
    // guardLimit - guardStart + latchStart - 1
    const SCEV *RHS =
@@ -357,7 +413,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
                       SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
  
    ICmpInst::Predicate LimitCheckPred;
-  switch (LatchCheck.Pred) {
+  switch (CurrLatchCheck.Pred) {
    case ICmpInst::ICMP_ULT:
      LimitCheckPred = ICmpInst::ICMP_ULE;
      break;
@@ -510,6 +566,36 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
    return Result;
  }
  
+// Returns true if its safe to truncate the IV to RangeCheckType.
+bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) {
+  if (!EnableIVTruncation)
+    return false;
+  assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) >
+             DL->getTypeSizeInBits(RangeCheckType) &&
+         "Expected latch check IV type to be larger than range check operand "
+         "type!");
+  // The start and end values of the IV should be known. This is to guarantee
+  // that truncating the wide type will not lose information.
+  auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
+  auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
+  if (!Limit || !Start)
+    return false;
+  // This check makes sure that the IV does not change sign during loop
+  // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
+  // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
+  // IV wraps around, and the truncation of the IV would lose the range of
+  // iterations between 2^32 and 2^64.
+  bool Increasing;
+  if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
+    return false;
+  // The active bits should be less than the bits in the RangeCheckType. This
+  // guarantees that truncating the latch check to RangeCheckType is a safe
+  // operation.
+  auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType);
+  return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
+         Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
+}
+
  bool LoopPredication::runOnLoop(Loop *Loop) {
    L = Loop;
  
diff --git a/test/Transforms/LoopPredication/widened.ll b/test/Transforms/LoopPredication/widened.ll

new file mode 100644 (file)

index 0000000..33c4e27
--- /dev/null
+++ b/test/Transforms/LoopPredication/widened.ll
@@ -0,0 +1,138 @@
+; RUN: opt -S -loop-predication -loop-predication-enable-iv-truncation=true < %s 2>&1 | FileCheck %s
+declare void @llvm.experimental.guard(i1, ...)
+
+declare i32 @length(i8*)
+
+declare i16 @short_length(i8*)
+; Consider range check of type i16 and i32, while IV is of type i64
+; We can loop predicate this because the IV range is within i16 and within i32.
+define i64 @iv_wider_type_rc_two_narrow_types(i32 %offA, i16 %offB, i8* %arrA, i8* %arrB) {
+; CHECK-LABEL: iv_wider_type_rc_two_narrow_types
+entry:
+; CHECK-LABEL: entry:
+; CHECK: [[idxB:[^ ]+]] = sub i16 %lengthB, %offB
+; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i16 16, [[idxB]]
+; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i16 %offB, %lengthB
+; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]]
+; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 %lengthA, %offA
+; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 16, [[idxA]]
+; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA
+; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]]
+  %lengthA = call i32 @length(i8* %arrA)
+  %lengthB = call i16 @short_length(i8* %arrB)
+   br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: [[invariant_check:[^ ]+]] = and i1 [[WideChkB]], [[WideChkA]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[invariant_check]], i32 9)
+  %iv = phi i64 [0, %entry ], [ %iv.next, %loop ]
+  %iv.trunc.32 = trunc i64 %iv to i32
+  %iv.trunc.16 = trunc i64 %iv to i16
+  %indexA = add i32 %iv.trunc.32, %offA
+  %indexB = add i16 %iv.trunc.16, %offB
+  %rcA = icmp ult i32 %indexA, %lengthA
+  %rcB = icmp ult i16 %indexB, %lengthB
+  %wide.chk = and i1 %rcA, %rcB
+  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk, i32 9) [ "deopt"() ]
+  %indexA.ext = zext i32 %indexA to i64
+  %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
+  %eltA = load i8, i8* %addrA
+  %indexB.ext = zext i16 %indexB to i64
+  %addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext
+  store i8 %eltA, i8* %addrB
+  %iv.next = add nuw nsw i64 %iv, 1
+  %latch.check = icmp ult i64 %iv.next, 16
+  br i1 %latch.check, label %loop, label %exit
+
+exit:
+ ret i64 %iv
+}
+
+
+; Consider an IV of type long and an array access into int array.
+; IV is of type i64 while the range check operands are of type i32 and i64.
+define i64 @iv_rc_different_types(i32 %offA, i32 %offB, i8* %arrA, i8* %arrB, i64 %max)
+{
+; CHECK-LABEL: iv_rc_different_types
+entry:
+; CHECK-LABEL: entry:
+; CHECK: [[lenB:[^ ]+]] = add i32 %lengthB, -1
+; CHECK-NEXT: [[idxB:[^ ]+]] = sub i32 [[lenB]], %offB
+; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i32 15, [[idxB]]
+; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i32 %offB, %lengthB
+; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]]
+; CHECK-NEXT: [[maxMinusOne:[^ ]+]] = add i64 %max, -1
+; CHECK-NEXT: [[limit_checkMax:[^ ]+]] = icmp ule i64 15, [[maxMinusOne]]
+; CHECK-NEXT: [[first_iteration_checkMax:[^ ]+]] = icmp ult  i64 0, %max
+; CHECK-NEXT: [[WideChkMax:[^ ]+]] = and i1 [[first_iteration_checkMax]], [[limit_checkMax]]
+; CHECK-NEXT: [[lenA:[^ ]+]] = add i32 %lengthA, -1
+; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 [[lenA]], %offA
+; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 15, [[idxA]]
+; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA
+; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]]
+  %lengthA = call i32 @length(i8* %arrA)
+  %lengthB = call i32 @length(i8* %arrB)
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: [[BandMax:[^ ]+]] = and i1 [[WideChkB]], [[WideChkMax]]
+; CHECK: [[ABandMax:[^ ]+]] = and i1 [[BandMax]], [[WideChkA]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[ABandMax]], i32 9)
+  %iv = phi i64 [0, %entry ], [ %iv.next, %loop ]
+  %iv.trunc = trunc i64 %iv to i32
+  %indexA = add i32 %iv.trunc, %offA
+  %indexB = add i32 %iv.trunc, %offB
+  %rcA = icmp ult i32 %indexA, %lengthA
+  %rcIV = icmp ult i64 %iv, %max
+  %wide.chk = and i1 %rcA, %rcIV
+  %rcB = icmp ult i32 %indexB, %lengthB
+  %wide.chk.final = and i1 %wide.chk, %rcB
+  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk.final, i32 9) [ "deopt"() ]
+  %indexA.ext = zext i32 %indexA to i64
+  %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
+  %eltA = load i8, i8* %addrA
+  %indexB.ext = zext i32 %indexB to i64
+  %addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext
+  %eltB = load i8, i8* %addrB
+  %result = xor i8 %eltA, %eltB
+  store i8 %result, i8* %addrA
+  %iv.next = add nuw nsw i64 %iv, 1
+  %latch.check = icmp ult i64 %iv, 15
+  br i1 %latch.check, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+; cannot narrow the IV to the range type, because we lose information.
+; for (i64 i= 5; i>= 2; i++)
+; this loop wraps around after reaching 2^64.
+define i64 @iv_rc_different_type(i32 %offA, i8* %arrA) {
+; CHECK-LABEL: iv_rc_different_type
+entry:
+  %lengthA = call i32 @length(i8* %arrA)
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK: %rcA = icmp ult i32 %indexA, %lengthA
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9)
+  %iv = phi i64 [ 5, %entry ], [ %iv.next, %loop ]
+  %iv.trunc.32 = trunc i64 %iv to i32
+  %indexA = add i32 %iv.trunc.32, %offA
+  %rcA = icmp ult i32 %indexA, %lengthA
+  call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9) [ "deopt"() ]
+  %indexA.ext = zext i32 %indexA to i64
+  %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
+  %eltA = load i8, i8* %addrA
+  %res = add i8 %eltA, 2
+  store i8 %eltA, i8* %addrA
+  %iv.next = add i64 %iv, 1
+  %latch.check = icmp sge i64 %iv.next, 2
+  br i1 %latch.check, label %loop, label %exit
+
+exit:
+ ret i64 %iv
+}
author	Anna Thomas <anna@azul.com>
	Thu, 2 Nov 2017 21:21:02 +0000 (21:21 +0000)
committer	Anna Thomas <anna@azul.com>
	Thu, 2 Nov 2017 21:21:02 +0000 (21:21 +0000)
lib/Transforms/Scalar/LoopPredication.cpp		patch \| blob \| history
test/Transforms/LoopPredication/widened.ll	[new file with mode: 0644]	patch \| blob