return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
}
-static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, const SDLoc &dl) {
- assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
- return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
-}
-
/// Widen a vector to a larger size with the same scalar type, with the new
/// elements either zero or undef.
static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
}
-/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
-/// instructions. This is used because creating CONCAT_VECTOR nodes of
-/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
-/// large BUILD_VECTORS.
-static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- const SDLoc &dl) {
- SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
-}
-
-static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- const SDLoc &dl) {
- SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
-}
-
static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT,
unsigned NumElems, SelectionDAG &DAG,
const SDLoc &dl, unsigned VectorWidth) {
// 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
-static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
assert((ResVT.is256BitVector() ||
ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- unsigned NumElems = ResVT.getVectorNumElements();
- if (ResVT.is256BitVector())
- return concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
+ unsigned NumOperands = Op.getNumOperands();
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ SDValue SubVec = Op.getOperand(i);
+ if (SubVec.isUndef())
+ continue;
+ if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
+ ++NumZero;
+ else {
+ assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
+ NonZeros |= 1 << i;
+ ++NumNonZero;
+ }
+ }
+
+ // If there are zero or one non-zeros we can handle this very simply.
+ if (NumNonZero <= 1) {
+ SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl)
+ : DAG.getUNDEF(ResVT);
+ if (!NumNonZero)
+ return Vec;
+ unsigned Idx = countTrailingZeros(NonZeros);
+ SDValue SubVec = Op.getOperand(Idx);
+ unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements();
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec,
+ DAG.getIntPtrConstant(Idx * SubVecNumElts, dl));
+ }
- if (Op.getNumOperands() == 4) {
+ if (NumOperands > 2) {
MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
ResVT.getVectorNumElements()/2);
- SDValue V3 = Op.getOperand(2);
- SDValue V4 = Op.getOperand(3);
- return concat256BitVectors(
- concat128BitVectors(V1, V2, HalfVT, NumElems / 2, DAG, dl),
- concat128BitVectors(V3, V4, HalfVT, NumElems / 2, DAG, dl), ResVT,
- NumElems, DAG, dl);
+ ArrayRef<SDUse> Ops = Op->ops();
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
+ Ops.slice(0, NumOperands/2));
+ SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT,
+ Ops.slice(NumOperands/2));
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
- return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
+
+ assert(NumNonZero == 2 && "Simple cases not handled?");
+
+ SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT,
+ DAG.getUNDEF(ResVT), Op.getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ unsigned NumElems = ResVT.getVectorNumElements();
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1),
+ DAG.getIntPtrConstant(NumElems/2, dl));
}
// Return true if all the operands of the given CONCAT_VECTORS node are zeros
return SDValue();
}
+// TODO: Merge this with LowerAVXCONCAT_VECTORS?
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
const X86Subtarget &Subtarget,
SelectionDAG & DAG) {
// from two other 128-bit ones.
// 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
- return LowerAVXCONCAT_VECTORS(Op, DAG);
+ return LowerAVXCONCAT_VECTORS(Op, DAG, Subtarget);
}
//===----------------------------------------------------------------------===//
def : Pat<(masked_store addr:$dst, Mask,
(_.info512.VT (insert_subvector undef,
- (_.info256.VT (insert_subvector undef,
- (_.info128.VT _.info128.RC:$src),
- (iPTR 0))),
+ (_.info128.VT _.info128.RC:$src),
(iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
def : Pat<(masked_store addr:$dst, Mask,
(_.info512.VT (insert_subvector undef,
- (_.info256.VT (insert_subvector undef,
- (_.info128.VT _.info128.RC:$src),
- (iPTR 0))),
+ (_.info128.VT _.info128.RC:$src),
(iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (insert_subvector undef,
- (_.info256.VT (insert_subvector undef,
- (_.info128.VT (X86vzmovl _.info128.RC:$src)),
- (iPTR 0))),
+ (_.info128.VT (X86vzmovl _.info128.RC:$src)),
(iPTR 0))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (insert_subvector undef,
- (_.info256.VT (insert_subvector undef,
- (_.info128.VT (X86vzmovl _.info128.RC:$src)),
- (iPTR 0))),
+ (_.info128.VT (X86vzmovl _.info128.RC:$src)),
(iPTR 0))))),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,