From 44156e3f6149e1e9fd44e069d5bd52615b961356 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 May 2018 01:54:33 +0000 Subject: [PATCH] [X86] Add some load folding patterns for cvtsi2ss/sd into intrinsic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 20 +++++++++++++ lib/Target/X86/X86InstrSSE.td | 40 +++++++++++++++++++++++++ test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll | 2 +- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 81b4a2adf02..94617bc7de6 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6938,9 +6938,19 @@ def : Pat<(v4f32 (X86Movss def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), + (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), + (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; + +def : Pat<(v4f32 (X86Movss + (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; +def : Pat<(v4f32 (X86Movss + (v4f32 VR128X:$dst), + (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), + (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), @@ -6948,8 +6958,18 @@ def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), + (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), + (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; + +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; + +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128X:$dst), + (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), + (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; } // Predicates = [HasAVX512] // Convert float/double to signed/unsigned int 32/64 with truncation diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 71eebcfa97d..c25b95d1e5a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1410,9 +1410,19 @@ def : Pat<(v4f32 (X86Movss def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), + (VCVTSI642SSrm_Int VR128:$dst, addr:$src)>; + +def : Pat<(v4f32 (X86Movss + (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; +def : Pat<(v4f32 (X86Movss + (v4f32 VR128:$dst), + (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), + (VCVTSI2SSrm_Int VR128:$dst, addr:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), @@ -1420,8 +1430,18 @@ def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), + (VCVTSI642SDrm_Int VR128:$dst, addr:$src)>; + +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; + +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128:$dst), + (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), + (VCVTSI2SDrm_Int VR128:$dst, addr:$src)>; } // Predicates = [UseAVX] let Predicates = [UseSSE2] in { @@ -1444,8 +1464,18 @@ def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))), + (CVTSI642SDrm_Int VR128:$dst, addr:$src)>; + +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; + +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128:$dst), + (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))), + (CVTSI2SDrm_Int VR128:$dst, addr:$src)>; } // Predicates = [UseSSE2] let Predicates = [UseSSE1] in { @@ -1456,8 +1486,18 @@ def : Pat<(v4f32 (X86Movss def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))), + (CVTSI642SSrm_Int VR128:$dst, addr:$src)>; + +def : Pat<(v4f32 (X86Movss + (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; + +def : Pat<(v4f32 (X86Movss + (v4f32 VR128:$dst), + (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))), + (CVTSI2SSrm_Int VR128:$dst, addr:$src)>; } // Predicates = [UseSSE1] let Predicates = [HasAVX, NoVLX] in { diff --git a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index 31411d5f37b..7d1a25b0ac4 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { ; CHECK-LABEL: test_x86_sse2_psll_dq_bs: -- 2.11.0