From 20a68803919cf78110c062c1e1061e08c02ed6d2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 1 Jul 2019 07:09:23 +0000 Subject: [PATCH] [X86] Add MOVHPDrm/MOVLPDrm patterns that use VZEXT_LOAD. We already had patterns that used scalar_to_vector+load. But we can also have a vzload. Found while investigating combining scalar_to_vector+load to vzload. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 6 ++++++ lib/Target/X86/X86InstrSSE.td | 12 ++++++++++++ test/CodeGen/X86/merge-consecutive-loads-128.ll | 3 +-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index b3343213c11..6452e632f40 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6349,6 +6349,12 @@ let Predicates = [HasAVX512] in { def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload addr:$src2))), + (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + + // VMOVLPD patterns + def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload addr:$src2))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; } let SchedRW = [WriteFStore] in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ebc284ca091..f1728d8b8de 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -699,11 +699,17 @@ let Predicates = [UseAVX] in { def : Pat<(v2f64 (X86Unpckl VR128:$src1, (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (VMOVHPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))), + (VMOVHPDrm VR128:$src1, addr:$src2)>; def : Pat<(store (f64 (extractelt (v2f64 (X86VPermilpi VR128:$src, (i8 1))), (iPTR 0))), addr:$dst), (VMOVHPDmr addr:$dst, VR128:$src)>; + + // MOVLPD patterns + def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; } let Predicates = [UseSSE1] in { @@ -722,11 +728,17 @@ let Predicates = [UseSSE2] in { def : Pat<(v2f64 (X86Unpckl VR128:$src1, (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (MOVHPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))), + (MOVHPDrm VR128:$src1, addr:$src2)>; def : Pat<(store (f64 (extractelt (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), (iPTR 0))), addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; + + // MOVLPD patterns + def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll index b7c8820709a..679b57569b2 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -1173,8 +1173,7 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_2345_volatile: -- 2.11.0