if (Subtarget.hasVLX()) {
// Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
}
}
if (Subtarget.hasVLX()) {
}
SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) const {
- assert(!Op.getSimpleValueType().isVector());
-
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
+ MVT VT = Op.getSimpleValueType();
+
+ if (VT.isVector()) {
+ assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
+ SDValue Src = Op.getOperand(0);
+ SDLoc dl(Op);
+ if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
+ return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI,
+ dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+ DAG.getUNDEF(MVT::v2f32)));
+ }
+
+ return SDValue();
+ }
+
+ assert(!VT.isVector());
+
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
IsSigned, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (StackSlot.getNode())
// Load the result.
- return DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot,
- MachinePointerInfo());
+ return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo());
// The node is the result.
return FIST;
case ISD::SIGN_EXTEND_VECTOR_INREG:
return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, Subtarget, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
case ISD::FABS:
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_2f32_to_2i64:
-; AVX: # BB#0:
-; AVX-NEXT: vcvttss2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm1
-; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-NEXT: vcvttss2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX-NEXT: retq
+; VEX-LABEL: fptosi_2f32_to_2i64:
+; VEX: # BB#0:
+; VEX-NEXT: vcvttss2si %xmm0, %rax
+; VEX-NEXT: vmovq %rax, %xmm1
+; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; VEX-NEXT: vcvttss2si %xmm0, %rax
+; VEX-NEXT: vmovq %rax, %xmm0
+; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; VEX-NEXT: retq
+;
+; AVX512F-LABEL: fptosi_2f32_to_2i64:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vcvttss2si %xmm0, %rax
+; AVX512F-NEXT: vmovq %rax, %xmm1
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm0, %rax
+; AVX512F-NEXT: vmovq %rax, %xmm0
+; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptosi_2f32_to_2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
+; AVX512VL-NEXT: vmovq %rax, %xmm1
+; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512VL-NEXT: vcvttss2si %xmm0, %rax
+; AVX512VL-NEXT: vmovq %rax, %xmm0
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptosi_2f32_to_2i64:
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: vcvttss2si %xmm0, %rax
+; AVX512DQ-NEXT: vmovq %rax, %xmm1
+; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512DQ-NEXT: vcvttss2si %xmm0, %rax
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64:
+; AVX512VLDQ: # BB#0:
+; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
%shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%cvt = fptosi <2 x float> %shuf to <2 x i64>
ret <2 x i64> %cvt
; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; VEX-NEXT: retq
;
-; AVX512-LABEL: fptoui_2f32_to_2i64:
-; AVX512: # BB#0:
-; AVX512-NEXT: vcvttss2usi %xmm0, %rax
-; AVX512-NEXT: vmovq %rax, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX512-NEXT: vcvttss2usi %xmm0, %rax
-; AVX512-NEXT: vmovq %rax, %xmm0
-; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512-NEXT: retq
+; AVX512F-LABEL: fptoui_2f32_to_2i64:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512F-NEXT: vmovq %rax, %xmm1
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512F-NEXT: vmovq %rax, %xmm0
+; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fptoui_2f32_to_2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512VL-NEXT: vmovq %rax, %xmm1
+; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512VL-NEXT: vmovq %rax, %xmm0
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: retq
+;
+; AVX512DQ-LABEL: fptoui_2f32_to_2i64:
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512DQ-NEXT: vmovq %rax, %xmm1
+; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512DQ-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512DQ-NEXT: vmovq %rax, %xmm0
+; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512DQ-NEXT: retq
+;
+; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64:
+; AVX512VLDQ: # BB#0:
+; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: retq
%shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%cvt = fptoui <2 x float> %shuf to <2 x i64>
ret <2 x i64> %cvt