From 714162bb4f96cff3959a08a2a9a8c9ad583fde02 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 7 Dec 2016 11:19:00 +0000 Subject: [PATCH] [X86][XOP] Fix VPERMIL2 non-constant pool shuffle decoding (PR31296) The non-constant pool version of DecodeVPERMIL2PMask was not offsetting correctly for the second input. I've updated the code to match the implementation in the constant-pool version. Annoyingly this bug was hidden for so long as it's tricky to combine to useful variable shuffle masks that don't become constant-pool entries. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288898 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 14 ++++++++------ test/CodeGen/X86/vector-shuffle-combining-xop.ll | 10 ++++++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 3c04bf4899f..1be5aec849f 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -548,10 +548,11 @@ void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef RawMask, unsigned VecSize = VT.getSizeInBits(); unsigned EltSize = VT.getScalarSizeInBits(); unsigned NumLanes = VecSize / 128; - unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes; - assert((VecSize == 128 || VecSize == 256) && - "Unexpected vector size"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumEltsPerLane = NumElts / NumLanes; + assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size"); assert((EltSize == 32 || EltSize == 64) && "Unexpected element size"); + assert((NumElts == RawMask.size()) && "Unexpected mask size"); for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { // VPERMIL2 Operation. @@ -572,14 +573,15 @@ void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef RawMask, continue; } - unsigned Index = i & ~(NumEltsPerLane - 1); + int Index = i & ~(NumEltsPerLane - 1); if (EltSize == 64) Index += (Selector >> 1) & 0x1; else Index += Selector & 0x3; - unsigned SrcOffset = (Selector >> 2) & 1; - ShuffleMask.push_back((int)(SrcOffset + Index)); + int Src = (Selector >> 2) & 0x1; + Index += Src * NumElts; + ShuffleMask.push_back(Index); } } diff --git a/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 41c1866de0d..d7073d6f67a 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -345,12 +345,18 @@ define <16 x i8> @constant_fold_vpperm() { define <4 x float> @PR31296(i8* %in) { ; X32-LABEL: PR31296: ; X32: # BB#0: # %entry -; X32-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00] +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u> +; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1] ; X32-NEXT: retl ; ; X64-LABEL: PR31296: ; X64: # BB#0: # %entry -; X64-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00] +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: vmovq %rax, %xmm0 +; X64-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u> +; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1] ; X64-NEXT: retq entry: %0 = getelementptr i8, i8* %in, i32 0 -- 2.11.0