From a9a5240720a1c1ca7a0e18754fb2d1c8cad6c7b1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 6 Sep 2016 00:31:10 +0000 Subject: [PATCH] [AVX-512] Fix v8i64 shift by immediate lowering on 32-bit targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 ++- test/CodeGen/X86/avx512-calling-conv.ll | 7 +++---- test/CodeGen/X86/masked_gather_scatter.ll | 18 +++++++++--------- test/CodeGen/X86/vector-shuffle-512-v8.ll | 8 ++++---- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ccf0fca3d58..862758e97bd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -20215,7 +20215,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Special case in 32-bit mode, where i64 is expanded into high and low parts. if (!Subtarget.is64Bit() && !Subtarget.hasXOP() && - (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64))) { + (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) || + (Subtarget.hasAVX512() && VT == MVT::v8i64))) { // Peek through any splat that was introduced for i64 shift vectorization. int SplatIndex = -1; diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll index 963a3867e68..004761c42fd 100644 --- a/test/CodeGen/X86/avx512-calling-conv.ll +++ b/test/CodeGen/X86/avx512-calling-conv.ll @@ -83,10 +83,9 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) { ; KNL_X32-LABEL: test3: ; KNL_X32: ## BB#0: ; KNL_X32-NEXT: vpmovsxwq %xmm1, %zmm1 -; KNL_X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [63,0,63,0,63,0,63,0,63,0,63,0,63,0,63,0] -; KNL_X32-NEXT: vpsllvq %zmm2, %zmm1, %zmm1 +; KNL_X32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_X32-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL_X32-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 +; KNL_X32-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL_X32-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1} ; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 @@ -319,7 +318,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) { ; KNL_X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; KNL_X32-NEXT: calll _func8xi1 ; KNL_X32-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL_X32-NEXT: vpsllvq LCPI7_0, %zmm0, %zmm0 +; KNL_X32-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL_X32-NEXT: movb $85, %al ; KNL_X32-NEXT: kmovw %eax, %k1 ; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1} diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 96f62cb640f..4fcd7e9c2a7 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -865,7 +865,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x ; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %ymm2, %ymm0 @@ -915,7 +915,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %xmm2, %xmm0 @@ -1020,7 +1020,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind ; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1} ; KNL_32-NEXT: retl @@ -1117,7 +1117,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 +; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: retl @@ -1236,7 +1236,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 @@ -1280,7 +1280,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm1, %zmm1 -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} ; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 @@ -1327,7 +1327,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 @@ -1374,7 +1374,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2 -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 +; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} ; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 @@ -1465,7 +1465,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) { ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2 -; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 +; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: retl diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index 08c105a9238..b1de06569ea 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2082,7 +2082,7 @@ define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1 ; AVX512F-32-LABEL: test_vshuff64x2_512_maskz: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2 -; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpsllq $63, %zmm2, %zmm2 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-32-NEXT: retl @@ -2103,7 +2103,7 @@ define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> ; AVX512F-32-LABEL: test_vshufi64x2_512_mask: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2 -; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpsllq $63, %zmm2, %zmm2 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-32-NEXT: retl @@ -2140,7 +2140,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1 -; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1] @@ -2163,7 +2163,7 @@ define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1 -; AVX512F-32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1] -- 2.11.0