From ff2cf8fafa5ad9a76e59fa086d969d4e2ecc3a39 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 13 Mar 2021 08:26:27 -0500
Subject: [PATCH] [InstCombine] avoid creating an extra instruction in zext
 fold and possible inf-loop

The structure of this fold is suspect vs. most of instcombine
because it creates instructions and tries to delete them
immediately after.

If we don't have the operand types for the icmps, then we are
not behaving as assumed. And as shown in PR49475, we can inf-loop.

(cherry picked from commit 4224a36957420744756d6a6450eb6502a1bfadc3)
---
 .../Transforms/InstCombine/InstCombineCasts.cpp    |  1 +
 llvm/test/Transforms/InstCombine/zext-or-icmp.ll   | 58 +++++++++++++++++++---
 2 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 0b53007bb6d..07e68c44416 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
     ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
     ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
     if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
       // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index 54ae0858aa6..5ae3d8ea0db 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -107,17 +107,16 @@ block2:
   ret i32 %conv2
 }
 
-; FIXME: This should not end with more instructions than it started from.
+; This should not end with more instructions than it started from.
 
 define i32 @PR49475(i32 %x, i16 %y) {
 ; CHECK-LABEL: @PR49475(
 ; CHECK-NEXT:    [[M:%.*]] = and i16 [[Y:%.*]], 1
 ; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[B11:%.*]] = zext i1 [[B1]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[M]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
-; CHECK-NEXT:    [[Z3:%.*]] = or i32 [[B11]], [[TMP2]]
-; CHECK-NEXT:    ret i32 [[Z3]]
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i16 [[M]], 0
+; CHECK-NEXT:    [[T1:%.*]] = or i1 [[B1]], [[B2]]
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[T1]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
 ;
   %m = and i16 %y, 1
   %b1 = icmp eq i32 %x, 0
@@ -126,3 +125,50 @@ define i32 @PR49475(i32 %x, i16 %y) {
   %z = zext i1 %t1 to i32
   ret i32 %z
 }
+
+; This would infinite-loop.
+
+define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) {
+; CHECK-LABEL: @PR49475_infloop(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[T0:%.*]], 0
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i16 [[INSERT:%.*]], 0
+; CHECK-NEXT:    [[T1:%.*]] = or i1 [[B]], [[B2]]
+; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[T1]] to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[EXT]], [[T0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], 140
+; CHECK-NEXT:    [[XOR1:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[CONV16:%.*]] = sext i8 [[I162:%.*]] to i64
+; CHECK-NEXT:    [[SUB17:%.*]] = sub i64 [[CONV16]], [[E:%.*]]
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[SUB17]], 32
+; CHECK-NEXT:    [[CONV18:%.*]] = ashr exact i64 [[SEXT]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i64 [[CONV18]], [[XOR1]]
+; CHECK-NEXT:    [[CONV19:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    [[OR21:%.*]] = or i16 [[CONV19]], [[INSERT]]
+; CHECK-NEXT:    [[TRUNC44:%.*]] = trunc i16 [[OR21]] to i8
+; CHECK-NEXT:    [[INC:%.*]] = or i8 [[TRUNC44]], [[I162]]
+; CHECK-NEXT:    [[TOBOOL23_NOT:%.*]] = icmp eq i16 [[OR21]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TOBOOL23_NOT]])
+; CHECK-NEXT:    ret i8 [[INC]]
+;
+  %b = icmp eq i32 %t0, 0
+  %b2 = icmp eq i16 %insert, 0
+  %t1 = or i1 %b, %b2
+  %ext = zext i1 %t1 to i32
+  %and = and i32 %t0, %ext
+  %conv13 = zext i32 %and to i64
+  %xor = xor i64 %conv13, 140
+  %conv16 = sext i8 %i162 to i64
+  %sub17 = sub i64 %conv16, %e
+  %sext = shl i64 %sub17, 32
+  %conv18 = ashr exact i64 %sext, 32
+  %cmp = icmp sge i64 %xor, %conv18
+  %conv19 = zext i1 %cmp to i16
+  %or21 = or i16 %insert, %conv19
+  %trunc44 = trunc i16 %or21 to i8
+  %inc = add i8 %i162, %trunc44
+  %tobool23.not = icmp eq i16 %or21, 0
+  call void @llvm.assume(i1 %tobool23.not)
+  ret i8 %inc
+}
+
+declare void @llvm.assume(i1 noundef)
-- 
2.11.0