From c7da972d1d4332eb92279e9c0be9957425f79fac Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Tue, 6 Dec 2016 08:08:04 +0000
Subject: [PATCH] [X86] Remove bad pattern that caused 128-bit loads being used
 by scalar sqrt/rcp/rsqrt intrinsics to select the memory form of the
 corresponding instruction and violate the semantics of the intrinsic.

The intrinsics are supposed to pass the upper bits straight through to their output register. This means we need to make sure we still perform the 128-bit load to get those upper bits to pass to give to the instruction since the memory form of the instruction only reads 32 or 64 bits.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288781 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86InstrSSE.td           | 3 ---
 test/CodeGen/X86/sse2-intrinsics-x86.ll | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b91f707373e..1a8e001808e 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3421,9 +3421,6 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   // because the high elements of the destination are unchanged in SSE.
   def : Pat<(Intr VR128:$src),
             (!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;
-  def : Pat<(Intr (load addr:$src)),
-            (vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
-                                      addr:$src), VR128))>;
   }
   // We don't want to fold scalar loads into these instructions unless
   // optimizing for size. This is because the folded instruction will have a
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index 22f25c19b0b..747bee127e3 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -1504,7 +1504,8 @@ define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
 ; SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
 ; SSE:       ## BB#0:
 ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT:    sqrtsd (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x51,0x00]
+; SSE-NEXT:    movaps (%eax), %xmm0 ## encoding: [0x0f,0x28,0x00]
+; SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
 ; SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; AVX2-LABEL: test_x86_sse2_sqrt_sd_vec_load:
-- 
2.11.0