OSDN Git Service

[X86] Add EVEX vcvtph2ps to the load folding tables.
authorCraig Topper <craig.topper@intel.com>
Sun, 7 Jan 2018 06:24:24 +0000 (06:24 +0000)
committerCraig Topper <craig.topper@intel.com>
Sun, 7 Jan 2018 06:24:24 +0000 (06:24 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321951 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrInfo.cpp
test/CodeGen/X86/stack-folding-fp-avx512.ll

index be91886..24a6380 100644 (file)
@@ -1099,8 +1099,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPSRLWZ128ri,         X86::VPSRLWZ128mi,         0 },
 
     // F16C foldable instructions
-    { X86::VCVTPH2PSrr,        X86::VCVTPH2PSrm,            0 },
+    { X86::VCVTPH2PSrr,        X86::VCVTPH2PSrm,            TB_NO_REVERSE },
     { X86::VCVTPH2PSYrr,       X86::VCVTPH2PSYrm,           0 },
+    { X86::VCVTPH2PSZ128rr,    X86::VCVTPH2PSZ128rm,        TB_NO_REVERSE },
+    { X86::VCVTPH2PSZ256rr,    X86::VCVTPH2PSZ256rm,        0 },
+    { X86::VCVTPH2PSZrr,       X86::VCVTPH2PSZrm,           0 },
 
     // AES foldable instructions
     { X86::AESIMCrr,              X86::AESIMCrm,              TB_ALIGN_16 },
index 5b05756..9057f26 100644 (file)
@@ -208,6 +208,15 @@ define <8 x float> @stack_fold_cvtpd2ps(<8 x double> %a0) {
   ret <8 x float> %2
 }
 
+define <16 x float> @stack_fold_cvtph2ps(<16 x i16> %a0) {
+  ;CHECK-LABEL: stack_fold_cvtph2ps
+  ;CHECK:   vcvtph2ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> undef, i16 -1, i32 4)
+  ret <16 x float> %2
+}
+declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
+
 define <16 x i16> @stack_fold_cvtps2ph(<16 x float> %a0) {
   ;CHECK-LABEL: stack_fold_cvtps2ph
   ;CHECK:   vcvtps2ph $0, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 32-byte Folded Spill