From: Michael Zuckerman Date: Wed, 13 Sep 2017 18:28:09 +0000 (+0000) Subject: Refactoring the stride 4 code in the X86interleavedaccess NFC X-Git-Tag: android-x86-7.1-r4~11137 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=c208bb49457f0ce2f71060a0ba93ee4bbde2d58a;p=android-x86%2Fexternal-llvm.git Refactoring the stride 4 code in the X86interleavedaccess NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313166 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp index f680cfaf995..1ced9a9083f 100644 --- a/lib/Target/X86/X86InterleavedAccess.cpp +++ b/lib/Target/X86/X86InterleavedAccess.cpp @@ -210,6 +210,14 @@ static void createConcatShuffleMask(int NumElements, Mask.push_back(i + Offset + NumElements); } +// Changing the scale of the vector type by reducing the number of elements and +// doubling the scalar size. +static MVT scaleVectorType(MVT VT) { + unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2; + return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), + VT.getVectorNumElements() / 2); +} + void X86InterleavedAccessGroup::interleave8bitStride4( ArrayRef Matrix, SmallVectorImpl &TransposedMatrix, unsigned numberOfElement) { @@ -220,49 +228,32 @@ void X86InterleavedAccessGroup::interleave8bitStride4( // Matrix[2]= y0 y1 y2 y3 y4 ... y31 // Matrix[3]= k0 k1 k2 k3 k4 ... k31 - Type *VecTyepVt = VectorType::get(Type::getInt8Ty(Shuffles[0]->getContext()), - numberOfElement); - Type *VecTyepVtHalf = VectorType::get( - Type::getInt16Ty(Shuffles[0]->getContext()), numberOfElement / 2); - MVT VT = MVT::getVT(VecTyepVt); - MVT HalfVT = MVT::getVT(VecTyepVtHalf); + MVT VT = MVT::getVectorVT(MVT::i8, numberOfElement); + MVT HalfVT = scaleVectorType(VT); TransposedMatrix.resize(4); - - SmallVector MaskHighTemp; - SmallVector MaskLowTemp; + SmallVector MaskHigh; + SmallVector MaskLow; SmallVector MaskHighTemp1; SmallVector MaskLowTemp1; - SmallVector MaskHighTemp2; - SmallVector MaskLowTemp2; + SmallVector MaskHighWord; + SmallVector MaskLowWord; SmallVector ConcatLow; SmallVector ConcatHigh; // MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86 // shuffle pattern. - createUnpackShuffleMask(VT, MaskHighTemp, false, false); - createUnpackShuffleMask(VT, MaskLowTemp, true, false); - ArrayRef MaskHigh = makeArrayRef(MaskHighTemp); - ArrayRef MaskLow = makeArrayRef(MaskLowTemp); - - // ConcatHigh and ConcatLow built in the vperm2i128 and vinserti128 X86 - // shuffle pattern. - - createConcatShuffleMask(32, ConcatLow, true); - createConcatShuffleMask(32, ConcatHigh, false); - ArrayRef MaskConcatLow = makeArrayRef(ConcatLow); - ArrayRef MaskConcatHigh = makeArrayRef(ConcatHigh); + createUnpackShuffleMask(VT, MaskHigh, false, false); + createUnpackShuffleMask(VT, MaskLow, true, false); // MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86 // shuffle pattern. createUnpackShuffleMask(HalfVT, MaskLowTemp1, true, false); createUnpackShuffleMask(HalfVT, MaskHighTemp1, false, false); - scaleShuffleMask(2, makeArrayRef(MaskHighTemp1), MaskHighTemp2); - scaleShuffleMask(2, makeArrayRef(MaskLowTemp1), MaskLowTemp2); - ArrayRef MaskHighWord = makeArrayRef(MaskHighTemp2); - ArrayRef MaskLowWord = makeArrayRef(MaskLowTemp2); + scaleShuffleMask(2, MaskHighTemp1, MaskHighWord); + scaleShuffleMask(2, MaskLowTemp1, MaskLowWord); // IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23 // IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31 @@ -299,16 +290,22 @@ void X86InterleavedAccessGroup::interleave8bitStride4( TransposedMatrix[3] = High1; return; } - // cmyk0 cmyk1 cmyk2 cmyk3 | cmyk4 cmyk5 cmyk6 cmyk7 - // cmyk8 cmyk9 cmyk10 cmyk11 | cmyk12 cmyk13 cmyk14 cmyk15 + + // cmyk0 cmyk1 cmyk2 cmyk3 | cmyk4 cmyk5 cmyk6 cmyk7 + // cmyk8 cmyk9 cmyk10 cmyk11 | cmyk12 cmyk13 cmyk14 cmyk15 // cmyk16 cmyk17 cmyk18 cmyk19 | cmyk20 cmyk21 cmyk22 cmyk23 // cmyk24 cmyk25 cmyk26 cmyk27 | cmyk28 cmyk29 cmyk30 cmyk31 - TransposedMatrix[0] = Builder.CreateShuffleVector(Low, High, MaskConcatLow); - TransposedMatrix[1] = Builder.CreateShuffleVector(Low1, High1, MaskConcatLow); - TransposedMatrix[2] = Builder.CreateShuffleVector(Low, High, MaskConcatHigh); - TransposedMatrix[3] = - Builder.CreateShuffleVector(Low1, High1, MaskConcatHigh); + // ConcatHigh and ConcatLow built in the vperm2i128 and vinserti128 X86 + // shuffle pattern. + SmallVector ConcatHigh12, ConcatHigh13; + createConcatShuffleMask(numberOfElement, ConcatLow, true); + createConcatShuffleMask(numberOfElement, ConcatHigh, false); + + TransposedMatrix[0] = Builder.CreateShuffleVector(Low, High, ConcatLow); + TransposedMatrix[1] = Builder.CreateShuffleVector(Low1, High1, ConcatLow); + TransposedMatrix[2] = Builder.CreateShuffleVector(Low, High, ConcatHigh); + TransposedMatrix[3] = Builder.CreateShuffleVector(Low1, High1, ConcatHigh); } // createShuffleStride returns shuffle mask of size N. @@ -606,3 +603,4 @@ bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI, return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); } +