From cacf9441bd73957c86f4b753ef6b9179b3fb2c87 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 3 Oct 2017 12:41:39 +0000 Subject: [PATCH] [X86][SSE] Add support for decoding PACKSS/PACKUS shuffles masks with UNDEF git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314792 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++---- test/CodeGen/X86/avx2-vbroadcast.ll | 10 ++++------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5f82c852de6..b02cb8b2f17 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5967,13 +5967,13 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, // If we know input saturation won't happen we can treat this // as a truncation shuffle. if (Opcode == X86ISD::PACKSS) { - if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt || - DAG.ComputeNumSignBits(N1) <= NumBitsPerElt) + if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) || + (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)) return false; } else { APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); - if (!DAG.MaskedValueIsZero(N0, ZeroMask) || - !DAG.MaskedValueIsZero(N1, ZeroMask)) + if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) || + (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask))) return false; } @@ -6042,6 +6042,14 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, for (int i = 0, e = Inputs.size(); i < e; ++i) { int lo = UsedInputs.size() * MaskWidth; int hi = lo + MaskWidth; + + // Strip UNDEF input usage. + if (Inputs[i].isUndef()) + for (int &M : Mask) + if ((lo <= M) && (M < hi)) + M = SM_SentinelUndef; + + // Check for unused inputs. if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { UsedInputs.push_back(Inputs[i]); continue; diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 8daafb0e905..0b3a9194214 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -267,16 +267,14 @@ define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { ; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16: ; X32-AVX2: ## BB#0: ; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X32-AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 +; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16: ; X64-AVX2: ## BB#0: -; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X64-AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 +; X64-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 ; X64-AVX2-NEXT: retq ; ; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16: -- 2.11.0