OSDN Git Service

[X86] Allow int_x86_sse2_cvtps2dq and int_x86_avx_cvt_ps2dq_256 to select EVEX encode...
authorCraig Topper <craig.topper@intel.com>
Sat, 24 Feb 2018 18:58:07 +0000 (18:58 +0000)
committerCraig Topper <craig.topper@intel.com>
Sat, 24 Feb 2018 18:58:07 +0000 (18:58 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326041 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrMMX.td
lib/Target/X86/X86InstrSSE.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx-intrinsics-x86.ll
test/CodeGen/X86/avx-schedule.ll
test/CodeGen/X86/sse2-intrinsics-x86.ll

index 46dfdb9..8f3afb9 100644 (file)
@@ -668,7 +668,7 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
 def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
           (MMX_MOVFR642Qrr FR64:$src)>;
 def : Pat<(x86mmx (MMX_X86movdq2q
-                   (bc_v2i64 (v4i32 (int_x86_sse2_cvtps2dq VR128:$src))))),
+                   (bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
           (MMX_CVTPS2PIirr VR128:$src)>;
 def : Pat<(x86mmx (MMX_X86movdq2q
                    (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
index d514888..a7c7a4f 100644 (file)
@@ -1782,34 +1782,36 @@ def : Pat<(v4f32 (X86Movss
           (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
 } // Predicates = [UseSSE1]
 
+let Predicates = [HasAVX, NoVLX] in {
 // Convert packed single/double fp to doubleword
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+                       [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))],
                        IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))],
+                         (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))],
                        IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG;
 def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
-                          (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
+                          (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))],
                         IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
-                          (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))],
+                          (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))],
                         IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG;
+}
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+                     [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))],
                      IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
-                       (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
+                       (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))],
                      IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
 
index 5181d34..3e3a863 100644 (file)
@@ -374,6 +374,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx_cmp_ps_256,    INTR_TYPE_3OP, X86ISD::CMPP, 0),
   X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
   X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
@@ -1602,6 +1603,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_cvtdq2ps,     INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(sse2_cvtpd2dq,     INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvtpd2ps,     INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
+  X86_INTRINSIC_DATA(sse2_cvtps2dq,     INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvttpd2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvttps2dq,    INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(sse2_max_pd,       INTR_TYPE_2OP, X86ISD::FMAX, 0),
index 41bf89e..6c18efd 100644 (file)
@@ -178,10 +178,15 @@ declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
 
 
 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
-; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+; AVX-LABEL: test_x86_avx_cvt_ps2dq_256:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0]
+; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_x86_avx_cvt_ps2dq_256:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0]
+; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   ret <8 x i32> %res
 }
index ad06954..9aaca64 100644 (file)
@@ -1408,7 +1408,7 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33]
 ; SKX-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
-; SKX-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKX-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_cvtps2dq:
index e3c02b6..f14356f 100644 (file)
@@ -369,10 +369,15 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
 ; SSE-NEXT:    cvtps2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5b,0xc0]
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
-; VCHECK-LABEL: test_x86_sse2_cvtps2dq:
-; VCHECK:       ## %bb.0:
-; VCHECK-NEXT:    vcvtps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5b,0xc0]
-; VCHECK-NEXT:    retl ## encoding: [0xc3]
+; AVX2-LABEL: test_x86_sse2_cvtps2dq:
+; AVX2:       ## %bb.0:
+; AVX2-NEXT:    vcvtps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5b,0xc0]
+; AVX2-NEXT:    retl ## encoding: [0xc3]
+;
+; SKX-LABEL: test_x86_sse2_cvtps2dq:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    vcvtps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0]
+; SKX-NEXT:    retl ## encoding: [0xc3]
   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %res
 }