From 2806865ca5b3390a262386972c93f7ef70ec0757 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 15 Nov 2018 18:19:56 +0000
Subject: [PATCH] [InstCombine] add tests for funnel shift (rotate)
 canonicalization; NFC

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346975 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/Transforms/InstCombine/rotate.ll | 343 +++++++++++++++++++++++++++++++++-
 1 file changed, 342 insertions(+), 1 deletion(-)
diff --git a/test/Transforms/InstCombine/rotate.ll b/test/Transforms/InstCombine/rotate.ll
index c5b03fc2800..7f488a94c6d 100644
--- a/test/Transforms/InstCombine/rotate.ll
+++ b/test/Transforms/InstCombine/rotate.ll
@@ -3,7 +3,348 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
-; These may be UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
+; TODO: Canonicalize rotate by constant to funnel shift intrinsics.
+; This should help cost modeling for vectorization, inlining, etc.
+; If a target does not have a rotate instruction, the expansion will
+; be exactly these same 3 basic ops (shl/lshr/or).
+
+define i32 @rotl_i32_constant(i32 %x) {
+; CHECK-LABEL: @rotl_i32_constant(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 11
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], 21
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %shl = shl i32 %x, 11
+  %shr = lshr i32 %x, 21
+  %r = or i32 %shr, %shl
+  ret i32 %r
+}
+
+define i42 @rotr_i42_constant(i42 %x) {
+; CHECK-LABEL: @rotr_i42_constant(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i42 [[X:%.*]], 31
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i42 [[X]], 11
+; CHECK-NEXT:    [[R:%.*]] = or i42 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i42 [[R]]
+;
+  %shl = shl i42 %x, 31
+  %shr = lshr i42 %x, 11
+  %r = or i42 %shr, %shl
+  ret i42 %r
+}
+
+define i8 @rotr_i8_constant_commute(i8 %x) {
+; CHECK-LABEL: @rotr_i8_constant_commute(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], 3
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %shl = shl i8 %x, 5
+  %shr = lshr i8 %x, 3
+  %r = or i8 %shl, %shr
+  ret i8 %r
+}
+
+define i88 @rotl_i88_constant_commute(i88 %x) {
+; CHECK-LABEL: @rotl_i88_constant_commute(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i88 [[X:%.*]], 44
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i88 [[X]], 44
+; CHECK-NEXT:    [[R:%.*]] = or i88 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i88 [[R]]
+;
+  %shl = shl i88 %x, 44
+  %shr = lshr i88 %x, 44
+  %r = or i88 %shl, %shr
+  ret i88 %r
+}
+
+; Vector types are allowed.
+
+define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
+; CHECK-LABEL: @rotl_v2i16_constant_splat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i16> [[X]], <i16 15, i16 15>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %shl = shl <2 x i16> %x, <i16 1, i16 1>
+  %shr = lshr <2 x i16> %x, <i16 15, i16 15>
+  %r = or <2 x i16> %shl, %shr
+  ret <2 x i16> %r
+}
+
+; Non-power-of-2 vector types are allowed.
+
+define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
+; CHECK-LABEL: @rotr_v2i17_constant_splat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], <i17 12, i17 12>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i17> [[X]], <i17 5, i17 5>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret <2 x i17> [[R]]
+;
+  %shl = shl <2 x i17> %x, <i17 12, i17 12>
+  %shr = lshr <2 x i17> %x, <i17 5, i17 5>
+  %r = or <2 x i17> %shr, %shl
+  ret <2 x i17> %r
+}
+
+; Allow arbitrary shift constants.
+
+define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 17, i32 19>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X]], <i32 15, i32 13>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %shl = shl <2 x i32> %x, <i32 17, i32 19>
+  %shr = lshr <2 x i32> %x, <i32 15, i32 13>
+  %r = or <2 x i32> %shl, %shr
+  ret <2 x i32> %r
+}
+
+define <2 x i36> @rotl_v2i16_constant_nonsplat(<2 x i36> %x) {
+; CHECK-LABEL: @rotl_v2i16_constant_nonsplat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i36> [[X:%.*]], <i36 21, i36 11>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i36> [[X]], <i36 15, i36 25>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i36> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <2 x i36> [[R]]
+;
+  %shl = shl <2 x i36> %x, <i36 21, i36 11>
+  %shr = lshr <2 x i36> %x, <i36 15, i36 25>
+  %r = or <2 x i36> %shl, %shr
+  ret <2 x i36> %r
+}
+
+; The most basic rotate by variable - no guards for UB due to oversized shifts.
+; This cannot be canonicalized to funnel shift target-independently. The safe
+; expansion includes masking for the shift amount that is not included here,
+; so it could be more expensive.
+
+define i32 @rotl_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotl_i32(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %sub = sub i32 32, %y
+  %shl = shl i32 %x, %y
+  %shr = lshr i32 %x, %sub
+  %r = or i32 %shr, %shl
+  ret i32 %r
+}
+
+; Non-power-of-2 types should follow the same reasoning. Left/right is determined by subtract.
+
+define i37 @rotr_i37(i37 %x, i37 %y) {
+; CHECK-LABEL: @rotr_i37(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or i37 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i37 [[R]]
+;
+  %sub = sub i37 37, %y
+  %shl = shl i37 %x, %sub
+  %shr = lshr i37 %x, %y
+  %r = or i37 %shr, %shl
+  ret i37 %r
+}
+
+; Commute 'or' operands.
+
+define i8 @rotr_i8_commute(i8 %x, i8 %y) {
+; CHECK-LABEL: @rotr_i8_commute(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 8, %y
+  %shl = shl i8 %x, %sub
+  %shr = lshr i8 %x, %y
+  %r = or i8 %shl, %shr
+  ret i8 %r
+}
+
+; Vector types should follow the same rules.
+
+define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @rotl_v4i32(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
+  %shl = shl <4 x i32> %x, %y
+  %shr = lshr <4 x i32> %x, %sub
+  %r = or <4 x i32> %shl, %shr
+  ret <4 x i32> %r
+}
+
+; Non-power-of-2 vector types should follow the same rules.
+
+define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
+; CHECK-LABEL: @rotr_v3i42(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i42> <i42 42, i42 42, i42 42>, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret <3 x i42> [[R]]
+;
+  %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
+  %shl = shl <3 x i42> %x, %sub
+  %shr = lshr <3 x i42> %x, %y
+  %r = or <3 x i42> %shr, %shl
+  ret <3 x i42> %r
+}
+
+; TODO:
+; This is the canonical pattern for a UB-safe rotate-by-variable with power-of-2-size scalar type.
+; The backend expansion of funnel shift for targets that don't have a rotate instruction should
+; match the original IR, so it is always good to canonicalize to the intrinsics for this pattern.
+
+define i32 @rotl_safe_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotl_safe_i32(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub i32 0, [[Y:%.*]]
+; CHECK-NEXT:    [[YMASK:%.*]] = and i32 [[Y]], 31
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i32 [[NEGY]], 31
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[YMASK]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[NEGYMASK]]
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %negy = sub i32 0, %y
+  %ymask = and i32 %y, 31
+  %negymask = and i32 %negy, 31
+  %shl = shl i32 %x, %ymask
+  %shr = lshr i32 %x, %negymask
+  %r = or i32 %shr, %shl
+  ret i32 %r
+}
+
+; TODO:
+; Extra uses don't change anything.
+
+define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) {
+; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
+; CHECK-NEXT:    [[YMASK:%.*]] = and i16 [[Y]], 15
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
+; CHECK-NEXT:    store i16 [[NEGYMASK]], i16* [[P:%.*]], align 2
+; CHECK-NEXT:    [[SHL:%.*]] = shl i16 [[X:%.*]], [[YMASK]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i16 [[X]], [[NEGYMASK]]
+; CHECK-NEXT:    [[R:%.*]] = or i16 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %negy = sub i16 0, %y
+  %ymask = and i16 %y, 15
+  %negymask = and i16 %negy, 15
+  store i16 %negymask, i16* %p
+  %shl = shl i16 %x, %ymask
+  %shr = lshr i16 %x, %negymask
+  %r = or i16 %shl, %shr
+  ret i16 %r
+}
+
+; TODO:
+; Left/right is determined by the negation.
+
+define i64 @rotr_safe_i64(i64 %x, i64 %y) {
+; CHECK-LABEL: @rotr_safe_i64(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub i64 0, [[Y:%.*]]
+; CHECK-NEXT:    [[YMASK:%.*]] = and i64 [[Y]], 63
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i64 [[NEGY]], 63
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[X:%.*]], [[YMASK]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[X]], [[NEGYMASK]]
+; CHECK-NEXT:    [[R:%.*]] = or i64 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %negy = sub i64 0, %y
+  %ymask = and i64 %y, 63
+  %negymask = and i64 %negy, 63
+  %shl = shl i64 %x, %ymask
+  %shr = lshr i64 %x, %negymask
+  %r = or i64 %shr, %shl
+  ret i64 %r
+}
+
+; TODO:
+; Extra uses don't change anything.
+
+define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, i8* %p) {
+; CHECK-LABEL: @rotr_safe_i8_commute_extra_use(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[YMASK:%.*]] = and i8 [[Y]], 7
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i8 [[NEGY]], 7
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[YMASK]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[NEGYMASK]]
+; CHECK-NEXT:    store i8 [[SHR]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %negy = sub i8 0, %y
+  %ymask = and i8 %y, 7
+  %negymask = and i8 %negy, 7
+  %shl = shl i8 %x, %ymask
+  %shr = lshr i8 %x, %negymask
+  store i8 %shr, i8* %p
+  %r = or i8 %shl, %shr
+  ret i8 %r
+}
+
+; TODO:
+; Vectors follow the same rules.
+
+define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @rotl_safe_v2i32(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub <2 x i32> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    [[YMASK:%.*]] = and <2 x i32> [[Y]], <i32 31, i32 31>
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and <2 x i32> [[NEGY]], <i32 31, i32 31>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], [[YMASK]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X]], [[NEGYMASK]]
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %negy = sub <2 x i32> zeroinitializer, %y
+  %ymask = and <2 x i32> %y, <i32 31, i32 31>
+  %negymask = and <2 x i32> %negy, <i32 31, i32 31>
+  %shl = shl <2 x i32> %x, %ymask
+  %shr = lshr <2 x i32> %x, %negymask
+  %r = or <2 x i32> %shr, %shl
+  ret <2 x i32> %r
+}
+
+; TODO:
+; Vectors follow the same rules.
+
+define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
+; CHECK-LABEL: @rotr_safe_v3i16(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub <3 x i16> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    [[YMASK:%.*]] = and <3 x i16> [[Y]], <i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and <3 x i16> [[NEGY]], <i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> [[X:%.*]], [[YMASK]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i16> [[X]], [[NEGYMASK]]
+; CHECK-NEXT:    [[R:%.*]] = or <3 x i16> [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret <3 x i16> [[R]]
+;
+  %negy = sub <3 x i16> zeroinitializer, %y
+  %ymask = and <3 x i16> %y, <i16 15, i16 15, i16 15>
+  %negymask = and <3 x i16> %negy, <i16 15, i16 15, i16 15>
+  %shl = shl <3 x i16> %x, %ymask
+  %shr = lshr <3 x i16> %x, %negymask
+  %r = or <3 x i16> %shr, %shl
+  ret <3 x i16> %r
+}
+
+; These are optionally UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
 ; See PR34046, PR16726, and PR39624 for motivating examples:
 ; https://bugs.llvm.org/show_bug.cgi?id=34046
 ; https://bugs.llvm.org/show_bug.cgi?id=16726
-- 
2.11.0