From 9215f302aa992901ec632af0de03134fec438c0b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 31 Dec 2017 17:07:47 +0000 Subject: [PATCH] [X86][SSE] Don't vectorize splat buildvector of binops (PR30780) Don't combine buildvector(binop(),binop(),binop(),binop()) -> binop(buildvector(), buildvector()) if its a splat - keep the binop scalar and just splat the result to avoid large vector constants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++++ test/CodeGen/X86/build-vector-128.ll | 23 ++++++++------------ test/CodeGen/X86/build-vector-256.ll | 4 ++-- test/CodeGen/X86/vector-pcmp.ll | 41 ++++++++++-------------------------- 4 files changed, 26 insertions(+), 46 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 33327ca6b79..19bff03f26a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7723,6 +7723,10 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, case ISD::AND: case ISD::XOR: case ISD::OR: + // Don't do this if the buildvector is a splat - we'd replace one + // constant with an entire vector. + if (Op->getSplatValue()) + return SDValue(); if (!TLI.isOperationLegalOrPromote(Opcode, VT)) return SDValue(); break; diff --git a/test/CodeGen/X86/build-vector-128.ll b/test/CodeGen/X86/build-vector-128.ll index 06f78b7bd0b..6c0c2d30c31 100644 --- a/test/CodeGen/X86/build-vector-128.ll +++ b/test/CodeGen/X86/build-vector-128.ll @@ -468,18 +468,12 @@ define <4 x i32> @test_buildvector_v4i32_splat_zext_i8(i8 %in) { ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-32-NEXT: retl ; -; SSE2-64-LABEL: test_buildvector_v4i32_splat_zext_i8: -; SSE2-64: # %bb.0: -; SSE2-64-NEXT: movd %edi, %xmm0 -; SSE2-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; SSE2-64-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-64-NEXT: retq -; -; SSE41-64-LABEL: test_buildvector_v4i32_splat_zext_i8: -; SSE41-64: # %bb.0: -; SSE41-64-NEXT: movd %edi, %xmm0 -; SSE41-64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero -; SSE41-64-NEXT: retq +; SSE-64-LABEL: test_buildvector_v4i32_splat_zext_i8: +; SSE-64: # %bb.0: +; SSE-64-NEXT: movzbl %dil, %eax +; SSE-64-NEXT: movd %eax, %xmm0 +; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; SSE-64-NEXT: retq ; ; AVX1-32-LABEL: test_buildvector_v4i32_splat_zext_i8: ; AVX1-32: # %bb.0: @@ -490,8 +484,9 @@ define <4 x i32> @test_buildvector_v4i32_splat_zext_i8(i8 %in) { ; ; AVX1-64-LABEL: test_buildvector_v4i32_splat_zext_i8: ; AVX1-64: # %bb.0: -; AVX1-64-NEXT: vmovd %edi, %xmm0 -; AVX1-64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero +; AVX1-64-NEXT: movzbl %dil, %eax +; AVX1-64-NEXT: vmovd %eax, %xmm0 +; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-64-NEXT: retq ; ; AVX2-32-LABEL: test_buildvector_v4i32_splat_zext_i8: diff --git a/test/CodeGen/X86/build-vector-256.ll b/test/CodeGen/X86/build-vector-256.ll index 6e1b73a7c68..d2d7a194c70 100644 --- a/test/CodeGen/X86/build-vector-256.ll +++ b/test/CodeGen/X86/build-vector-256.ll @@ -461,10 +461,10 @@ define <8 x i32> @test_buildvector_v8i32_splat_zext_i8(i8 %in) { ; ; AVX1-64-LABEL: test_buildvector_v8i32_splat_zext_i8: ; AVX1-64: # %bb.0: -; AVX1-64-NEXT: vmovd %edi, %xmm0 +; AVX1-64-NEXT: movzbl %dil, %eax +; AVX1-64-NEXT: vmovd %eax, %xmm0 ; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-64-NEXT: retq ; ; AVX2-32-LABEL: test_buildvector_v8i32_splat_zext_i8: diff --git a/test/CodeGen/X86/vector-pcmp.ll b/test/CodeGen/X86/vector-pcmp.ll index 782c72e2a4d..b06b667ec07 100644 --- a/test/CodeGen/X86/vector-pcmp.ll +++ b/test/CodeGen/X86/vector-pcmp.ll @@ -84,47 +84,28 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %x) { } define <1 x i128> @test_strange_type(<1 x i128> %x) { -; SSE2-LABEL: test_strange_type: -; SSE2: # %bb.0: -; SSE2-NEXT: sarq $63, %rsi -; SSE2-NEXT: movq %rsi, %xmm0 -; SSE2-NEXT: notq %rsi -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movq %xmm1, %rax -; SSE2-NEXT: movq %rsi, %rdx -; SSE2-NEXT: retq -; -; SSE42-LABEL: test_strange_type: -; SSE42: # %bb.0: -; SSE42-NEXT: sarq $63, %rsi -; SSE42-NEXT: movq %rsi, %xmm0 -; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE42-NEXT: pxor %xmm0, %xmm1 -; SSE42-NEXT: movq %xmm1, %rax -; SSE42-NEXT: pextrq $1, %xmm1, %rdx -; SSE42-NEXT: retq +; SSE-LABEL: test_strange_type: +; SSE: # %bb.0: +; SSE-NEXT: sarq $63, %rsi +; SSE-NEXT: notq %rsi +; SSE-NEXT: movq %rsi, %rax +; SSE-NEXT: movq %rsi, %rdx +; SSE-NEXT: retq ; ; AVX1-LABEL: test_strange_type: ; AVX1: # %bb.0: ; AVX1-NEXT: sarq $63, %rsi -; AVX1-NEXT: vmovq %rsi, %xmm0 -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: vpextrq $1, %xmm0, %rdx +; AVX1-NEXT: notq %rsi +; AVX1-NEXT: movq %rsi, %rax +; AVX1-NEXT: movq %rsi, %rdx ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_strange_type: ; AVX2: # %bb.0: ; AVX2-NEXT: sarq $63, %rsi +; AVX2-NEXT: notq %rsi ; AVX2-NEXT: vmovq %rsi, %xmm0 ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vpextrq $1, %xmm0, %rdx ; AVX2-NEXT: retq -- 2.11.0