From f18303af3f681e8123710d6fd2ad90ba00d263da Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Jul 2019 14:34:16 +0000 Subject: [PATCH] [X86][AVX] Combine vpermi(bitcast(x)) -> bitcast(vpermi(x)) iff the number of elements doesn't change. This gets around an issue with combineX86ShuffleChain not being able to hint which domain is preferred for shuffles that can be done with either. Fixes regression introduced in rL365041 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365044 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++++++ test/CodeGen/X86/avx512-shuffles/partial_permute.ll | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2912c249283..3e75756cffd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31946,6 +31946,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. + // TODO: Should we indicate which domain is preferred if both are allowed? bool AllowFloatDomain = FloatDomain || (Depth > 3); bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() && (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); @@ -33057,6 +33058,21 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } return SDValue(); } + case X86ISD::VPERMI: { + // vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements. + // TODO: Remove when we have preferred domains in combineX86ShuffleChain. + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + if (N0.getOpcode() == ISD::BITCAST && + N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) { + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1); + return DAG.getBitcast(VT, Res); + } + return SDValue(); + } case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: diff --git a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index de581aa3fb9..b3e154b3107 100644 --- a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -2216,9 +2216,9 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i64 define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i64> %mask) { ; CHECK-LABEL: test_masked_z_8xi64_to_2xi64_perm_mask0: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,0,2,3,7,4,6,7] ; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,2,3,7,4,6,7] +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> -- 2.11.0