From c5aadce1551b95108781c7d15f65f1bdb47beb03 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 7 Aug 2018 07:40:45 +0000 Subject: [PATCH] Merging r338915: ------------------------------------------------------------------------ r338915 | ctopper | 2018-08-03 22:14:18 +0200 (Fri, 03 Aug 2018) | 5 lines [SelectionDAG] Teach LegalizeVectorTypes to widen the mask input to a masked store. The mask operand is visited before the data operand so we need to be able to widen it. Fixes PR38436. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_70@339106 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 39 +++++++++++----- test/CodeGen/X86/masked_memop.ll | 59 ++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 11 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f5d9dd234af..b21249d01ef 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3641,26 +3641,43 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { - assert(OpNo == 3 && "Can widen only data operand of mstore"); + assert((OpNo == 2 || OpNo == 3) && + "Can widen only data or mask operand of mstore"); MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); EVT MaskVT = Mask.getValueType(); SDValue StVal = MST->getValue(); - // Widen the value - SDValue WideVal = GetWidenedVector(StVal); SDLoc dl(N); - // The mask should be widened as well. - EVT WideVT = WideVal.getValueType(); - EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), - MaskVT.getVectorElementType(), - WideVT.getVectorNumElements()); - Mask = ModifyToType(Mask, WideMaskVT, true); + if (OpNo == 3) { + // Widen the value + StVal = GetWidenedVector(StVal); + + // The mask should be widened as well. + EVT WideVT = StVal.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); + } else { + EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT); + Mask = ModifyToType(Mask, WideMaskVT, true); + + EVT ValueVT = StVal.getValueType(); + if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector) + StVal = GetWidenedVector(StVal); + else { + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), + WideMaskVT.getVectorNumElements()); + StVal = ModifyToType(StVal, WideVT); + } + } assert(Mask.getValueType().getVectorNumElements() == - WideVal.getValueType().getVectorNumElements() && + StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); - return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(), + return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), Mask, MST->getMemoryVT(), MST->getMemOperand(), false, MST->isCompressingStore()); } diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index 812d9f50fe3..36da9386fb0 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -1310,6 +1310,65 @@ define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x ret void } +; This needs to be widened to v4i32. +; This used to assert in type legalization. PR38436 +; FIXME: The codegen for AVX512 should use KSHIFT to zero the upper bits of the mask. +define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) { +; AVX1-LABEL: widen_masked_store: +; AVX1: ## %bb.0: +; AVX1-NEXT: vmovd %edx, %xmm1 +; AVX1-NEXT: vmovd %esi, %xmm2 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX1-NEXT: vmovd %ecx, %xmm2 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX1-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: widen_masked_store: +; AVX2: ## %bb.0: +; AVX2-NEXT: vmovd %edx, %xmm1 +; AVX2-NEXT: vmovd %esi, %xmm2 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX2-NEXT: vmovd %ecx, %xmm2 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX2-NEXT: vpmaskmovd %xmm0, %xmm1, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: widen_masked_store: +; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; SKX-LABEL: widen_masked_store: +; SKX: ## %bb.0: +; SKX-NEXT: vpslld $31, %xmm1, %xmm1 +; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vmovdqa32 %xmm1, %xmm1 {%k1} {z} +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] +; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} +; SKX-NEXT: retq + call void @llvm.masked.store.v3i32(<3 x i32> %v, <3 x i32>* %p, i32 16, <3 x i1> %mask) + ret void +} +declare void @llvm.masked.store.v3i32(<3 x i32>, <3 x i32>*, i32, <3 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>) -- 2.11.0