[X86] Allow legacy vcvtps2ph intrinsics to select EVEX encoded instructions. Rely...

author Craig Topper <craig.topper@intel.com>

Wed, 8 Nov 2017 04:00:30 +0000 (04:00 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 8 Nov 2017 04:00:30 +0000 (04:00 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 8 Nov 2017 04:00:30 +0000 (04:00 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 8 Nov 2017 04:00:30 +0000 (04:00 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index eb5a3dd..955a40e 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7698,11 +7698,11 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop> {
               T8PD, VEX, Sched<[WriteCvtF2FLd]>;
  }
  
-multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop> {
    def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
                 (ins RC:$src1, i32u8imm:$src2),
                 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-               [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
+               [(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
                 TAPD, VEX, Sched<[WriteCvtF2F]>;
    let hasSideEffects = 0, mayStore = 1,
        SchedRW = [WriteCvtF2FLd, WriteRMW] in
@@ -7715,6 +7715,8 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
  let Predicates = [HasF16C, NoVLX] in {
    defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem>;
    defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem>, VEX_L;
+  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem>;
+  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem>, VEX_L;
  
    // Pattern match vcvtph2ps of a scalar i64 load.
    def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
@@ -7724,23 +7726,17 @@ let Predicates = [HasF16C, NoVLX] in {
    def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
                (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
              (VCVTPH2PSrm addr:$src)>;
-}
-
-let Predicates = [HasF16C] in {
-  defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
-  defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
  
-  def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
-                  (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
-                   addr:$dst),
-                   (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
-  def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
-                  (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
-                   addr:$dst),
-                   (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
-  def : Pat<(store (v8i16 (int_x86_vcvtps2ph_256 VR256:$src1, i32:$src2)),
-                   addr:$dst),
-                   (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(store (f64 (extractelt
+                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+                         (iPTR 0))), addr:$dst),
+            (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+  def : Pat<(store (i64 (extractelt
+                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))),
+                         (iPTR 0))), addr:$dst),
+            (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
+  def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst),
+            (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>;
  }
  
  // Patterns for  matching conversions from float to half-float and vice versa.
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h

index 0ed9d2f..9edac22 100644 (file)
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1588,6 +1588,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
    X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
    X86_INTRINSIC_DATA(vcvtph2ps_128,     INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
    X86_INTRINSIC_DATA(vcvtph2ps_256,     INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
+  X86_INTRINSIC_DATA(vcvtps2ph_128,     INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
+  X86_INTRINSIC_DATA(vcvtps2ph_256,     INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
    X86_INTRINSIC_DATA(xop_vpcomb,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
    X86_INTRINSIC_DATA(xop_vpcomd,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
    X86_INTRINSIC_DATA(xop_vpcomq,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll

index 18b9d8b..c0fa94c 100644 (file)
--- a/test/CodeGen/X86/f16c-intrinsics.ll
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -121,12 +121,12 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
  ;
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
  ; X32-AVX512VL:       # BB#0:
-; X32-AVX512VL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
+; X32-AVX512VL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
  ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
  ; X64-AVX512VL:       # BB#0:
-; X64-AVX512VL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
+; X64-AVX512VL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
  ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    %res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
    ret <8 x i16> %res
@@ -148,13 +148,13 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
  ;
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
  ; X32-AVX512VL:       # BB#0:
-; X32-AVX512VL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
+; X32-AVX512VL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
  ; X32-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
  ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
  ; X64-AVX512VL:       # BB#0:
-; X64-AVX512VL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
+; X64-AVX512VL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
  ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
  ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    %res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
@@ -238,13 +238,13 @@ define void @test_x86_vcvtps2ph_256_m(<8 x i16>* nocapture %d, <8 x float> %a) n
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
  ; X32-AVX512VL:       # BB#0: # %entry
  ; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT:    vcvtps2ph $3, %ymm0, (%eax) # encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
+; X32-AVX512VL-NEXT:    vcvtps2ph $3, %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
  ; X32-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
  ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
  ; X64-AVX512VL:       # BB#0: # %entry
-; X64-AVX512VL-NEXT:    vcvtps2ph $3, %ymm0, (%rdi) # encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
+; X64-AVX512VL-NEXT:    vcvtps2ph $3, %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x07,0x03]
  ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
  ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
  entry:
@@ -268,7 +268,7 @@ define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) n
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
  ; X32-AVX512VL:       # BB#0: # %entry
  ; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
  ; X32-AVX512VL-NEXT:    vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
  ; X32-AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
  ; X32-AVX512VL-NEXT:    vpmovdw %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x00]
@@ -276,7 +276,7 @@ define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) n
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
  ; X64-AVX512VL:       # BB#0: # %entry
-; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
  ; X64-AVX512VL-NEXT:    vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
  ; X64-AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
  ; X64-AVX512VL-NEXT:    vpmovdw %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07]
@@ -303,12 +303,14 @@ define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
  ; X32-AVX512VL:       # BB#0: # %entry
  ; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
+; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X32-AVX512VL-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
  ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
  ; X64-AVX512VL:       # BB#0: # %entry
-; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
+; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X64-AVX512VL-NEXT:    vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
  ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
  entry:
    %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
@@ -333,12 +335,14 @@ define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x3
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
  ; X32-AVX512VL:       # BB#0: # %entry
  ; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
+; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X32-AVX512VL-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
  ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
  ; X64-AVX512VL:       # BB#0: # %entry
-; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
+; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
+; X64-AVX512VL-NEXT:    vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
  ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
  entry:
    %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
author	Craig Topper <craig.topper@intel.com>
	Wed, 8 Nov 2017 04:00:30 +0000 (04:00 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 8 Nov 2017 04:00:30 +0000 (04:00 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
lib/Target/X86/X86IntrinsicsInfo.h		patch \| blob \| history
test/CodeGen/X86/f16c-intrinsics.ll		patch \| blob \| history