From c2a5d16d1e3b8c49f5bbb1ff87a76ac4f88edb89 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 23 Sep 2016 06:24:43 +0000 Subject: [PATCH] [AVX-512] Split X86ISD::VFPROUND and X86ISD::VFPEXT into separate opcodes for each type constraint. This revealed that scalar intrinsics could create nodes with a rounding mode of FROUND_CUR_DIRECTION, but the patterns didn't check for it. It just worked because isel doesn't check operand count and we had a pattern without the rounding mode argument at all. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282231 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++++ lib/Target/X86/X86ISelLowering.h | 4 ++-- lib/Target/X86/X86InstrAVX512.td | 18 ++++++++++-------- lib/Target/X86/X86InstrFragmentsSIMD.td | 19 ++++--------------- lib/Target/X86/X86IntrinsicsInfo.h | 8 ++++---- 5 files changed, 24 insertions(+), 29 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1722a4a745d..ce4c2aee8d1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -22514,7 +22514,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; + case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND"; + case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; + case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; + case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD"; case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD"; case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6478ceef4ba..6a9f5364842 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -297,10 +297,10 @@ namespace llvm { VTRUNCUS, VTRUNCS, // Vector FP extend. - VFPEXT, + VFPEXT, VFPEXT_RND, VFPEXTS_RND, // Vector FP round. - VFPROUND, + VFPROUND, VFPROUND_RND, VFPROUNDS_RND, // Vector signed/unsigned integer to double. CVTDQ2PD, CVTUDQ2PD, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 6a01081c949..ed89864e7fd 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5725,14 +5725,16 @@ multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _ (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT _.RC:$src1), - (_Src.VT _Src.RC:$src2)))>, + (_Src.VT _Src.RC:$src2), + (i32 FROUND_CURRENT)))>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; defm rm : AVX512_maskable_scalar, + (_Src.ScalarLdFrag addr:$src2))), + (i32 FROUND_CURRENT)))>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; } @@ -5759,29 +5761,29 @@ multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInf EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_B, EVEX_RC; } -multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, SDNode OpNodeRnd, X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, + defm Z : avx512_cvt_fp_scalar, avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, EVEX_V512, XD; } } -multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, SDNode OpNodeRnd, X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, + defm Z : avx512_cvt_fp_scalar, avx512_cvt_fp_sae_scalar, EVEX_CD8<32, CD8VT1>, XS, EVEX_V512; } } -defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround, +defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86froundRnd, f64x_info, f32x_info>; -defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext, +defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpextRnd,f32x_info, f64x_info >; def : Pat<(f64 (fpextend FR32X:$src)), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 09cc8553494..25934deef3d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -143,25 +143,14 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND", SDTCVecEltisVT<1, f64>, SDTCisSameSizeAs<0, 1>]>>; -def X86fround: SDNode<"X86ISD::VFPROUND", - SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, - SDTCisSameAs<0, 1>, - SDTCVecEltisVT<2, f64>, - SDTCisSameSizeAs<0, 2>]>>; -def X86froundRnd: SDNode<"X86ISD::VFPROUND", +def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, SDTCisSameAs<0, 1>, SDTCVecEltisVT<2, f64>, SDTCisSameSizeAs<0, 2>, SDTCisVT<3, i32>]>>; -def X86fpext : SDNode<"X86ISD::VFPEXT", - SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>, - SDTCisSameAs<0, 1>, - SDTCVecEltisVT<2, f32>, - SDTCisSameSizeAs<0, 2>]>>; - -def X86fpextRnd : SDNode<"X86ISD::VFPEXT", +def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>, SDTCisSameAs<0, 1>, SDTCVecEltisVT<2, f32>, @@ -567,12 +556,12 @@ def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, f32>, SDTCisVT<2, i32>]> >; -def X86vfpextRnd : SDNode<"X86ISD::VFPEXT", +def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>, SDTCVecEltisVT<1, f32>, SDTCisOpSmallerThanOp<1, 0>, SDTCisVT<2, i32>]>>; -def X86vfproundRnd: SDNode<"X86ISD::VFPROUND", +def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, f64>, SDTCisOpSmallerThanOp<0, 1>, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 94379f73104..3f1a6d78886 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -502,7 +502,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS, ISD::FP_ROUND, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS, - ISD::FP_ROUND, X86ISD::VFPROUND), + ISD::FP_ROUND, X86ISD::VFPROUND_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK, @@ -532,7 +532,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_256, INTR_TYPE_1OP_MASK, ISD::FP_EXTEND, 0), X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK, - ISD::FP_EXTEND, X86ISD::VFPEXT), + ISD::FP_EXTEND, X86ISD::VFPEXT_RND), X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK, @@ -564,9 +564,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::VFPROUND, 0), + X86ISD::VFPROUNDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::VFPEXT, 0), + X86ISD::VFPEXTS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK, -- 2.11.0