From: Craig Topper Date: Sat, 11 May 2019 04:19:33 +0000 (+0000) Subject: [X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1. X-Git-Tag: android-x86-9.0-r1~3550 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=9668e7d9d2d401ac27deea6b08d26d13f688d749;p=android-x86%2Fexternal-llvm.git [X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1. We were checking for SSE4.1 for FP types, but not integer 128-bit types. Fixes PR41837. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360512 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 6fca1acb009..74464f28c02 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -396,7 +396,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (IsNonTemporal && Alignment >= 16) + if (IsNonTemporal && Alignment >= 16 && HasSSE41) Opc = HasVLX ? X86::VMOVNTDQAZ128rm : HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; else if (Alignment >= 16) diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll index db1ebfe6060..37e380b2b48 100644 --- a/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -300,10 +300,20 @@ entry: } define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) { -; SSE-LABEL: test_load_nt16xi8: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt16xi8: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt16xi8: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt16xi8: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt16xi8: ; AVX: # %bb.0: # %entry @@ -320,10 +330,20 @@ entry: } define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) { -; SSE-LABEL: test_load_nt8xi16: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt8xi16: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt8xi16: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt8xi16: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt8xi16: ; AVX: # %bb.0: # %entry @@ -340,10 +360,20 @@ entry: } define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) { -; SSE-LABEL: test_load_nt4xi32: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt4xi32: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt4xi32: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt4xi32: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt4xi32: ; AVX: # %bb.0: # %entry @@ -360,10 +390,20 @@ entry: } define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) { -; SSE-LABEL: test_load_nt2xi64: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt2xi64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt2xi64: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt2xi64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt2xi64: ; AVX: # %bb.0: # %entry