[X86] Remove cvtps2ph xmm->xmm from store folding tables. Add the evex versions of...

author Craig Topper <craig.topper@intel.com>

Sun, 7 Jan 2018 06:24:23 +0000 (06:24 +0000)

committer Craig Topper <craig.topper@intel.com>

Sun, 7 Jan 2018 06:24:23 +0000 (06:24 +0000)
author Craig Topper <craig.topper@intel.com>
Sun, 7 Jan 2018 06:24:23 +0000 (06:24 +0000)
committer Craig Topper <craig.topper@intel.com>
Sun, 7 Jan 2018 06:24:23 +0000 (06:24 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index da7f97f..be91886 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -540,8 +540,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VMOVDQU64Z128rr,    X86::VMOVDQU64Z128mr,  TB_FOLDED_STORE },
  
      // F16C foldable instructions
-    { X86::VCVTPS2PHrr,        X86::VCVTPS2PHmr,      TB_FOLDED_STORE },
-    { X86::VCVTPS2PHYrr,       X86::VCVTPS2PHYmr,     TB_FOLDED_STORE }
+    { X86::VCVTPS2PHYrr,       X86::VCVTPS2PHYmr,     TB_FOLDED_STORE },
+    { X86::VCVTPS2PHZ256rr,    X86::VCVTPS2PHZ256mr,  TB_FOLDED_STORE },
+    { X86::VCVTPS2PHZrr,       X86::VCVTPS2PHZmr,     TB_FOLDED_STORE },
    };
  
    for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll

index 4165aea..481fb95 100644 (file)
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -535,15 +535,6 @@ define <4 x double> @stack_fold_cvtps2pd_ymm_int(<4 x float> %a0) {
  }
  declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
  
-define <8 x i16> @stack_fold_cvtps2ph(<4 x float> %a0) {
-  ;CHECK-LABEL: stack_fold_cvtps2ph
-  ;CHECK:   vcvtps2ph $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
-  %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
-  %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  ret <8 x i16> %1
-}
-declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
-
  define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) {
    ;CHECK-LABEL: stack_fold_cvtps2ph_ymm
    ;CHECK:   vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
diff --git a/test/CodeGen/X86/stack-folding-fp-avx512.ll b/test/CodeGen/X86/stack-folding-fp-avx512.ll

index 7bd4602..5b05756 100644 (file)
--- a/test/CodeGen/X86/stack-folding-fp-avx512.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx512.ll
@@ -208,6 +208,15 @@ define <8 x float> @stack_fold_cvtpd2ps(<8 x double> %a0) {
    ret <8 x float> %2
  }
  
+define <16 x i16> @stack_fold_cvtps2ph(<16 x float> %a0) {
+  ;CHECK-LABEL: stack_fold_cvtps2ph
+  ;CHECK:   vcvtps2ph $0, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 32-byte Folded Spill
+  %1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 0, <16 x i16> undef, i16 -1)
+  %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  ret <16 x i16> %1
+}
+declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
+
  define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
    ;CHECK-LABEL: stack_fold_insertps
    ;CHECK:       vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
author	Craig Topper <craig.topper@intel.com>
	Sun, 7 Jan 2018 06:24:23 +0000 (06:24 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sun, 7 Jan 2018 06:24:23 +0000 (06:24 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
test/CodeGen/X86/stack-folding-fp-avx1.ll		patch \| blob \| history
test/CodeGen/X86/stack-folding-fp-avx512.ll		patch \| blob \| history