Info.flags |= MachineMemOperand::MOStore;
break;
}
+ case GATHER:
+ case GATHER_AVX2: {
+ Info.ptrVal = nullptr;
+ MVT DataVT = MVT::getVT(I.getType());
+ MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
+ unsigned NumElts = std::min(DataVT.getVectorNumElements(),
+ IndexVT.getVectorNumElements());
+ Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
+ Info.align = 1;
+ Info.flags |= MachineMemOperand::MOLoad;
+ break;
+ }
default:
return false;
}
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- EVT MaskVT = Mask.getValueType();
+ EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
- SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
- SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
- SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
- SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
- SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
- return DAG.getMergeValues(RetOps, dl);
+
+ MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
}
-static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Mask, SDValue Base,
- SDValue Index, SDValue ScaleOp, SDValue Chain,
- const X86Subtarget &Subtarget) {
+static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
- SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
- SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
- SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
- SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
- SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
- return DAG.getMergeValues(RetOps, dl);
+
+ MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale,
+ return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
Chain, Subtarget);
}
case SCATTER: {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithChain[] = {
- X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, X86::VGATHERDPDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, X86::VGATHERDPSrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, X86::VGATHERDPSYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, X86::VPGATHERDQrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, X86::VPGATHERDQYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, X86::VPGATHERQDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, X86::VPGATHERQDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, X86::VGATHERQPDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, X86::VGATHERQPDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, X86::VGATHERQPSrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, X86::VGATHERQPSYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, X86::VPGATHERQQrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, X86::VPGATHERQQYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, 0, 0),
- X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
- X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),