[X86] Add patterns to fold EVEX store with EVEX encoded vcvtps2ph instructions. Remov...

author Craig Topper <craig.topper@intel.com>

Wed, 8 Nov 2017 04:00:31 +0000 (04:00 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 8 Nov 2017 04:00:31 +0000 (04:00 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 8 Nov 2017 04:00:31 +0000 (04:00 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 8 Nov 2017 04:00:31 +0000 (04:00 +0000)
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index 4aa57f4..84b44ac 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -7225,17 +7225,16 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
                     (X86cvtps2ph (_src.VT _src.RC:$src1),
                                  (i32 imm:$src2)),
                     NoItinerary, 0, 0>, AVX512AIi8Base;
-  def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
-             (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
-             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-             [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
-                                     (i32 imm:$src2))),
-                                     addr:$dst)]>;
-  let hasSideEffects = 0, mayStore = 1 in
-  def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
-             (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
-             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
-              []>, EVEX_K;
+  let hasSideEffects = 0, mayStore = 1 in {
+    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
+               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+               []>;
+    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
+               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+                []>, EVEX_K;
+  }
  }
  multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
    let hasSideEffects = 0 in
@@ -7255,6 +7254,19 @@ let Predicates = [HasAVX512] in {
      defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
                          EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
    }
+
+  def : Pat<(store (f64 (extractelt
+                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
+                         (iPTR 0))), addr:$dst),
+            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
+  def : Pat<(store (i64 (extractelt
+                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
+                         (iPTR 0))), addr:$dst),
+            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
+  def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
+            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
+  def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
+            (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
  }
  
  // Patterns for matching conversions from float to half-float and vice versa.
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll

index c0fa94c..64f8fd0 100644 (file)
--- a/test/CodeGen/X86/f16c-intrinsics.ll
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -303,14 +303,12 @@ define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
  ; X32-AVX512VL:       # BB#0: # %entry
  ; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
-; X32-AVX512VL-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
+; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
  ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
  ; X64-AVX512VL:       # BB#0: # %entry
-; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
-; X64-AVX512VL-NEXT:    vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
+; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
  ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
  entry:
    %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
@@ -335,14 +333,12 @@ define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x3
  ; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
  ; X32-AVX512VL:       # BB#0: # %entry
  ; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
-; X32-AVX512VL-NEXT:    vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
+; X32-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
  ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
  ;
  ; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
  ; X64-AVX512VL:       # BB#0: # %entry
-; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
-; X64-AVX512VL-NEXT:    vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
+; X64-AVX512VL-NEXT:    vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
  ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
  entry:
    %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
author	Craig Topper <craig.topper@intel.com>
	Wed, 8 Nov 2017 04:00:31 +0000 (04:00 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 8 Nov 2017 04:00:31 +0000 (04:00 +0000)
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history
test/CodeGen/X86/f16c-intrinsics.ll		patch \| blob \| history