From 3d5e161fe45f9553009c0377a5689324aab1c64a Mon Sep 17 00:00:00 2001 From: Silviu Baranga Date: Wed, 5 Sep 2012 08:57:21 +0000 Subject: [PATCH] Fixed the DAG combiner to better handle the folding of AND nodes for vector types. The previous code was making the assumption that the length of the bitmask returned by isConstantSplat was equal to the size of the vector type. Now we first make sure that the splat value has at least the length of the vector lane type, then we only use as many fields as we have available in the splat value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163203 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++++++++++- test/CodeGen/ARM/vector-extend-narrow.ll | 11 +++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ee6c2a39f3e..45c38968ea2 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2496,8 +2496,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // lanes of the constant together. EVT VT = Vector->getValueType(0); unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); } } diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll index 8fd3db29197..22af7976212 100644 --- a/test/CodeGen/ARM/vector-extend-narrow.ll +++ b/test/CodeGen/ARM/vector-extend-narrow.ll @@ -62,3 +62,14 @@ define <4 x i8> @i(<4 x i8>* %x) { %2 = sdiv <4 x i8> zeroinitializer, %1 ret <4 x i8> %2 } +; CHECK: j: +define <4 x i32> @j(<4 x i8>* %in) nounwind { + ; CHECK: vld1 + ; CHECK: vmovl.u8 + ; CHECK: vmovl.u16 + ; CHECK-NOT: vand + %1 = load <4 x i8>* %in, align 4 + %2 = zext <4 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +} + -- 2.11.0