From 28f0327565edb3e9c189b1b2225f489dc42ddb8f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Nov 2011 08:26:24 +0000 Subject: [PATCH] Add AVX2 variable shift instructions and intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143915 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 36 +++++++++++++++ lib/Target/X86/X86InstrSSE.td | 40 +++++++++++++++++ test/CodeGen/X86/avx2-intrinsics-x86.ll | 80 +++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 42209b8291d..cba6599d660 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1771,6 +1771,42 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], []>; } +// Variable bit shift ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + + def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + + def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +} + // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b46e5d10e04..db4382a0c1a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7601,3 +7601,43 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskstore_q, int_x86_avx2_maskstore_q_256, memopv2i64, memopv4i64>, VEX_W; + + +//===----------------------------------------------------------------------===// +// Variable Bit Shifts +// +multiclass avx2_var_shift opc, string OpcodeStr, + PatFrag pf128, PatFrag pf256, + Intrinsic Int128, Intrinsic Int256> { + def rr : AVX28I, VEX_4V; + def rm : AVX28I, + VEX_4V; + def Yrr : AVX28I, VEX_4V; + def Yrm : AVX28I, + VEX_4V; +} + +defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", memopv4i32, memopv8i32, + int_x86_avx2_psllv_d, int_x86_avx2_psllv_d_256>; +defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", memopv2i64, memopv4i64, + int_x86_avx2_psllv_q, int_x86_avx2_psllv_q_256>, + VEX_W; +defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", memopv4i32, memopv8i32, + int_x86_avx2_psrlv_d, int_x86_avx2_psrlv_d_256>; +defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", memopv2i64, memopv4i64, + int_x86_avx2_psrlv_q, int_x86_avx2_psrlv_q_256>, + VEX_W; +defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32, + int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>; diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index da03c386273..bab7fb81e53 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -966,3 +966,83 @@ define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2 ret void } declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind + + +define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsllvd + %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) { + ; CHECK: vpsllvd + %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsllvq + %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vpsllvq + %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsrlvd + %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) { + ; CHECK: vpsrlvd + %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vpsrlvq + %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) { + ; CHECK: vpsrlvq + %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) { + ; CHECK: vpsravd + %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { + ; CHECK: vpsravd + %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone -- 2.11.0