From dfe4fd9cebe745c0faee876c45c2b29e5ce089bf Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Tue, 24 Dec 2013 09:00:21 +0000 Subject: [PATCH] [AArch64]Add patterns to match normal shift nodes: shl, sra and srl. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197969 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 55 +++++++++ test/CodeGen/AArch64/neon-shl-ashr-lshr.ll | 185 +++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 test/CodeGen/AArch64/neon-shl-ashr-lshr.ll diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index cd063d3d2fe..95e54b8ecf1 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -8662,6 +8662,61 @@ def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>; def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>; def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>; +// Additional patterns to match shl to USHL. +def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), + (USHLvvv_8B $Rn, $Rm)>; +def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), + (USHLvvv_4H $Rn, $Rm)>; +def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), + (USHLvvv_2S $Rn, $Rm)>; +def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (USHLddd $Rn, $Rm)>; +def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), + (USHLvvv_16B $Rn, $Rm)>; +def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), + (USHLvvv_8H $Rn, $Rm)>; +def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), + (USHLvvv_4S $Rn, $Rm)>; +def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), + (USHLvvv_2D $Rn, $Rm)>; + +// Additional patterns to match sra, srl. +// For a vector right shift by vector, the shift amounts of SSHL/USHL are +// negative. Negate the vector of shift amount first. +def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), + (USHLvvv_8B $Rn, (NEG8b $Rm))>; +def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), + (USHLvvv_4H $Rn, (NEG4h $Rm))>; +def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), + (USHLvvv_2S $Rn, (NEG2s $Rm))>; +def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (USHLddd $Rn, (NEGdd $Rm))>; +def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), + (USHLvvv_16B $Rn, (NEG16b $Rm))>; +def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), + (USHLvvv_8H $Rn, (NEG8h $Rm))>; +def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), + (USHLvvv_4S $Rn, (NEG4s $Rm))>; +def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), + (USHLvvv_2D $Rn, (NEG2d $Rm))>; + +def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), + (SSHLvvv_8B $Rn, (NEG8b $Rm))>; +def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), + (SSHLvvv_4H $Rn, (NEG4h $Rm))>; +def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), + (SSHLvvv_2S $Rn, (NEG2s $Rm))>; +def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (SSHLddd $Rn, (NEGdd $Rm))>; +def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), + (SSHLvvv_16B $Rn, (NEG16b $Rm))>; +def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), + (SSHLvvv_8H $Rn, (NEG8h $Rm))>; +def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), + (SSHLvvv_4S $Rn, (NEG4s $Rm))>; +def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), + (SSHLvvv_2D $Rn, (NEG2d $Rm))>; + // // Patterns for handling half-precision values // diff --git a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll new file mode 100644 index 00000000000..13912f417c4 --- /dev/null +++ b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll @@ -0,0 +1,185 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: shl.v8i8: +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = shl <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @shl.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: shl.v4i16: +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = shl <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @shl.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: shl.v2i32: +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = shl <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @shl.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: shl.v1i64: +; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = shl <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @shl.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shl.v16i8: +; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = shl <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @shl.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shl.v8i16: +; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = shl <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @shl.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shl.v4i32: +; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = shl <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @shl.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shl.v2i64: +; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = shl <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: lshr.v8i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = lshr <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: lshr.v4i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4 +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = lshr <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @lshr.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: lshr.v2i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = lshr <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @lshr.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: lshr.v1i64: +; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = lshr <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @lshr.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: lshr.v16i8: +; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = lshr <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @lshr.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: lshr.v8i16: +; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h +; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = lshr <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @lshr.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: lshr.v4i32: +; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = lshr <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @lshr.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: lshr.v2i64: +; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = lshr <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: ashr.v8i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = ashr <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: ashr.v4i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4 +; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = ashr <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @ashr.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: ashr.v2i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = ashr <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @ashr.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: ashr.v1i64: +; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = ashr <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @ashr.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: ashr.v16i8: +; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: sshl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = ashr <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @ashr.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: ashr.v8i16: +; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h +; CHECK: sshl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = ashr <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @ashr.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: ashr.v4i32: +; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: sshl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = ashr <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @ashr.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: ashr.v2i64: +; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: sshl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = ashr <2 x i64> %a, %b + ret <2 x i64> %c +} \ No newline at end of file -- 2.11.0