From 3d518c829632d757f5dcf161b9e4b74945bdf75c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 May 2018 00:29:40 +0000 Subject: [PATCH] [X86] Remove an autoupgrade legacy cvtss2sd intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332187 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 3 - lib/IR/AutoUpgrade.cpp | 5 ++ lib/Target/X86/X86InstrSSE.td | 20 ++---- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 1 - test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll | 55 +++++++++++----- test/CodeGen/X86/sse2-intrinsics-x86.ll | 76 ++++------------------ test/CodeGen/X86/sse_partial_update.ll | 9 +-- 7 files changed, 67 insertions(+), 102 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index cfee9108fbe..d3b3d1a5e4c 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -488,9 +488,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_cvtss2sd : // TODO: Remove this intrinsic. - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">, diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index e79dbcecc24..9d603146b00 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -258,6 +258,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name == "sse.cvtsi642ss" || // Added in 7.0 Name == "sse2.cvtsi2sd" || // Added in 7.0 Name == "sse2.cvtsi642sd" || // Added in 7.0 + Name == "sse2.cvtss2sd" || // Added in 7.0 Name == "sse2.cvtdq2pd" || // Added in 3.9 Name == "sse2.cvtps2pd" || // Added in 3.9 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 @@ -1559,6 +1560,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateSIToFP(CI->getArgOperand(1), CI->getType()->getVectorElementType()); Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); + } else if (IsX86 && Name == "sse2.cvtss2sd") { + Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0); + Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType()); + Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); } else if (IsX86 && (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtps2pd" || Name == "avx.cvtdq2.pd.256" || diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 30e9d5847ec..71eebcfa97d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1360,35 +1360,29 @@ def : Pat<(fpextend (loadf32 addr:$src)), def : Pat<(extloadf32 addr:$src), (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))]>, - XS, VEX_4V, VEX_WIG, + []>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))]>, - XS, VEX_4V, VEX_WIG, + []>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))]>, - XS, Requires<[UseSSE2]>, + []>, XS, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; +let mayLoad = 1 in def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))]>, - XS, Requires<[UseSSE2]>, + []>, XS, Requires<[UseSSE2]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; } } // isCodeGenOnly = 1 diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4d83f51f123..5c1e263608f 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2563,7 +2563,6 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::x86_sse2_cvtsd2si64: case Intrinsic::x86_sse2_cvtsd2si: case Intrinsic::x86_sse2_cvtsd2ss: - case Intrinsic::x86_sse2_cvtss2sd: case Intrinsic::x86_sse2_cvttsd2si64: case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse_cvtss2si64: diff --git a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index 2d51c49796f..31411d5f37b 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -259,23 +259,6 @@ declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnon define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { -; SSE-LABEL: test_x86_sse2_cvtsi2sd: -; SSE: ## %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SSE-NEXT: cvtsi2sdl %eax, %xmm0 ## encoding: [0xf2,0x0f,0x2a,0xc0] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; AVX2-LABEL: test_x86_sse2_cvtsi2sd: -; AVX2: ## %bb.0: -; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX2-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] -; -; SKX-LABEL: test_x86_sse2_cvtsi2sd: -; SKX: ## %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SKX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] ; CHECK-LABEL: test_x86_sse2_cvtsi2sd: ; CHECK: ## %bb.0: ; CHECK-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 @@ -284,3 +267,41 @@ define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { ret <2 x double> %res } declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { +; CHECK-LABEL: test_x86_sse2_cvtss2sd: +; CHECK: ## %bb.0: +; CHECK-NEXT: cvtss2sd %xmm1, %xmm0 +; CHECK-NEXT: retl + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { +; CHECK-LABEL: test_x86_sse2_cvtss2sd_load: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm1, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: retl + %a1 = load <4 x float>, <4 x float>* %p1 + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + +define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize { +; CHECK-LABEL: test_x86_sse2_cvtss2sd_load_optsize: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cvtss2sd (%eax), %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; CHECK-NEXT: retl + %a1 = load <4 x float>, <4 x float>* %p1 + %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 6c06997b58d..085649a530a 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -457,58 +457,6 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, <2 x do } -define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { -; SSE-LABEL: test_x86_sse2_cvtss2sd: -; SSE: ## %bb.0: -; SSE-NEXT: cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse2_cvtss2sd: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone - - -define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { -; SSE-LABEL: test_x86_sse2_cvtss2sd_load: -; SSE: ## %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse2_cvtss2sd_load: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; VCHECK-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %a1 = load <4 x float>, <4 x float>* %p1 - %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} - - -define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize { -; SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize: -; SSE: ## %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00] -; SSE-NEXT: retl ## encoding: [0xc3] -; -; VCHECK-LABEL: test_x86_sse2_cvtss2sd_load_optsize: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; VCHECK-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00] -; VCHECK-NEXT: retl ## encoding: [0xc3] - %a1 = load <4 x float>, <4 x float>* %p1 - %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} - - define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { ; SSE-LABEL: test_x86_sse2_cvttpd2dq: ; SSE: ## %bb.0: @@ -746,21 +694,21 @@ define <8 x i16> @test_x86_sse2_packssdw_128_fold() { ; SSE: ## %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI34_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI31_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packssdw_128_fold: ; AVX2: ## %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI31_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packssdw_128_fold: ; SKX: ## %bb.0: -; SKX-NEXT: vmovaps LCPI34_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] +; SKX-NEXT: vmovaps LCPI31_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI31_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) ret <8 x i16> %res @@ -793,21 +741,21 @@ define <16 x i8> @test_x86_sse2_packsswb_128_fold() { ; SSE: ## %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI36_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI33_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packsswb_128_fold: ; AVX2: ## %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packsswb_128_fold: ; SKX: ## %bb.0: -; SKX-NEXT: vmovaps LCPI36_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI33_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI33_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res @@ -840,21 +788,21 @@ define <16 x i8> @test_x86_sse2_packuswb_128_fold() { ; SSE: ## %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A] -; SSE-NEXT: ## fixup A - offset: 3, value: LCPI38_0, kind: FK_Data_4 +; SSE-NEXT: ## fixup A - offset: 3, value: LCPI35_0, kind: FK_Data_4 ; SSE-NEXT: retl ## encoding: [0xc3] ; ; AVX2-LABEL: test_x86_sse2_packuswb_128_fold: ; AVX2: ## %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI38_0, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; SKX-LABEL: test_x86_sse2_packuswb_128_fold: ; SKX: ## %bb.0: -; SKX-NEXT: vmovaps LCPI38_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; SKX-NEXT: vmovaps LCPI35_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] ; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 4, value: LCPI38_0, kind: FK_Data_4 +; SKX-NEXT: ## fixup A - offset: 4, value: LCPI35_0, kind: FK_Data_4 ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) ret <16 x i8> %res diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll index f1007cc9951..d6929930844 100644 --- a/test/CodeGen/X86/sse_partial_update.ll +++ b/test/CodeGen/X86/sse_partial_update.ll @@ -98,8 +98,9 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) { ; CHECK-LABEL: load_fold_cvtss2sd_int: ; CHECK: ## %bb.0: -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtss2sd (%rdi), %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; CHECK-NEXT: retq %ld = load <4 x float>, <4 x float> *%a %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> , <4 x float> %ld) @@ -109,8 +110,8 @@ define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) { define <2 x double> @load_fold_cvtss2sd_int_optsize(<4 x float> *%a) optsize { ; CHECK-LABEL: load_fold_cvtss2sd_int_optsize: ; CHECK: ## %bb.0: -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtss2sd (%rdi), %xmm0 +; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; CHECK-NEXT: retq %ld = load <4 x float>, <4 x float> *%a %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> , <4 x float> %ld) @@ -120,8 +121,8 @@ define <2 x double> @load_fold_cvtss2sd_int_optsize(<4 x float> *%a) optsize { define <2 x double> @load_fold_cvtss2sd_int_minsize(<4 x float> *%a) minsize { ; CHECK-LABEL: load_fold_cvtss2sd_int_minsize: ; CHECK: ## %bb.0: -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtss2sd (%rdi), %xmm0 +; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; CHECK-NEXT: retq %ld = load <4 x float>, <4 x float> *%a %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> , <4 x float> %ld) -- 2.11.0