From aa87382d917d31c55f4a155b7814e2a3801af2f7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 15 Nov 2018 00:21:41 +0000 Subject: [PATCH] [X86] Don't mark SEXTLOADS with narrow types as Custom with -x86-experimental-vector-widening-legalization. The narrow types end up requesting widening, but generic legalization will end up scalaring and using a build_vector to do the widening. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346916 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 35 ++++- test/CodeGen/X86/shrink_vmul-widen.ll | 258 ++++++++++---------------------- test/CodeGen/X86/vec_int_to_fp-widen.ll | 40 ++--- test/CodeGen/X86/vector-sext-widen.ll | 19 +-- test/CodeGen/X86/vector-zext-widen.ll | 14 +- 5 files changed, 139 insertions(+), 227 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8e86bb78ada..dae29807acf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -849,9 +849,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // scalars) and extend in-register to a legal 128-bit vector type. For sext // loads these must work with a single scalar load. for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom); + if (!ExperimentalVectorWideningLegalization) { + // We don't want narrow result types here when widening. + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom); + } setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom); @@ -860,6 +863,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); } + if (ExperimentalVectorWideningLegalization) { + // Explicitly code the list so we don't use narrow result types. + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Custom); + } + for (auto VT : { MVT::v2f64, MVT::v2i64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -979,17 +993,22 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); } - for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); + if (!ExperimentalVectorWideningLegalization) { + // Avoid narrow result types when widening. The legal types are listed + // in the next loop. + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); + } } // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); - setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); + if (!ExperimentalVectorWideningLegalization) + setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); diff --git a/test/CodeGen/X86/shrink_vmul-widen.ll b/test/CodeGen/X86/shrink_vmul-widen.ll index b1278738ee5..5c2468cb992 100644 --- a/test/CodeGen/X86/shrink_vmul-widen.ll +++ b/test/CodeGen/X86/shrink_vmul-widen.ll @@ -37,24 +37,20 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; ; X86-AVX-LABEL: mul_2xi8: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movzbl 1(%edx,%ecx), %edi -; X86-AVX-NEXT: movzbl (%edx,%ecx), %edx +; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx ; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx -; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax ; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 -; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; X86-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8: @@ -75,15 +71,13 @@ define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X64-AVX-LABEL: mul_2xi8: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; X64-AVX-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq entry: @@ -923,24 +917,20 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, ; ; X86-AVX-LABEL: mul_2xi8_sext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi -; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx +; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx ; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movsbl 1(%eax,%ecx), %edx -; X86-AVX-NEXT: movsbl (%eax,%ecx), %eax +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax ; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; X86-AVX-NEXT: vpmovsxbd %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_sext: @@ -963,14 +953,12 @@ define void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, ; X64-AVX-LABEL: mul_2xi8_sext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movsbl 1(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movsbl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-NEXT: vpmovsxbd %xmm1, %xmm1 ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq @@ -1023,24 +1011,20 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl ; ; X86-AVX-LABEL: mul_2xi8_sext_zext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movsbl 1(%edx,%ecx), %edi -; X86-AVX-NEXT: movsbl (%edx,%ecx), %edx +; X86-AVX-NEXT: movzwl (%edx,%ecx), %edx ; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl 1(%eax,%ecx), %edx -; X86-AVX-NEXT: movzbl (%eax,%ecx), %eax +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X86-AVX-NEXT: movzwl (%eax,%ecx), %eax ; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_sext_zext: @@ -1064,14 +1048,12 @@ define void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonl ; X64-AVX-LABEL: mul_2xi8_sext_zext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl 1(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movzbl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX-NEXT: movzwl (%rdi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rsi,%rdx), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq @@ -1118,24 +1100,18 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b ; ; X86-AVX-LABEL: mul_2xi16_sext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi -; X86-AVX-NEXT: movswl (%edx,%ecx), %edx -; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX-NEXT: movswl 2(%eax,%ecx), %edx -; X86-AVX-NEXT: movswl (%eax,%ecx), %eax -; X86-AVX-NEXT: vmovd %eax, %xmm1 -; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm1, %xmm1 ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_sext: @@ -1153,14 +1129,10 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b ; X64-AVX-LABEL: mul_2xi16_sext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: movswl 2(%rsi,%rdx), %ecx -; X64-AVX-NEXT: movswl (%rsi,%rdx), %esi -; X64-AVX-NEXT: vmovd %esi, %xmm1 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm1, %xmm1 ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4) ; X64-AVX-NEXT: retq @@ -1213,22 +1185,18 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; ; X86-AVX-LABEL: mul_2xi16_sext_zext: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %edi ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl c, %esi -; X86-AVX-NEXT: movswl 2(%edx,%ecx), %edi -; X86-AVX-NEXT: movswl (%edx,%ecx), %edx -; X86-AVX-NEXT: vmovd %edx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4) ; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: popl %edi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_sext_zext: @@ -1252,10 +1220,8 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; X64-AVX-LABEL: mul_2xi16_sext_zext: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rdx), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rdx), %edi -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0 @@ -1460,20 +1426,14 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst1: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst1: @@ -1491,10 +1451,9 @@ define void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst1: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1534,20 +1493,14 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst2: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst2: @@ -1566,10 +1519,9 @@ define void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst2: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1611,20 +1563,14 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst3: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst3: @@ -1645,10 +1591,9 @@ define void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst3: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1690,20 +1635,14 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst4: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movzbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movzbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst4: @@ -1724,10 +1663,9 @@ define void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst4: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movzbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movzbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1769,20 +1707,14 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst5: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst5: @@ -1803,10 +1735,9 @@ define void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst5: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1848,20 +1779,14 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi8_varconst6: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movsbl 1(%ecx,%eax), %esi -; X86-AVX-NEXT: movsbl (%ecx,%eax), %ecx +; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi8_varconst6: @@ -1882,10 +1807,9 @@ define void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi8_varconst6: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movsbl 1(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movsbl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx +; X64-AVX-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -1988,20 +1912,13 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi16_varconst2: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi -; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx -; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_varconst2: @@ -2019,10 +1936,8 @@ define void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi16_varconst2: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq @@ -2137,20 +2052,13 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; ; X86-AVX-LABEL: mul_2xi16_varconst4: ; X86-AVX: # %bb.0: # %entry -; X86-AVX-NEXT: pushl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %esi, -8 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl c, %edx -; X86-AVX-NEXT: movswl 2(%ecx,%eax), %esi -; X86-AVX-NEXT: movswl (%ecx,%eax), %ecx -; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) -; X86-AVX-NEXT: popl %esi -; X86-AVX-NEXT: .cfi_def_cfa_offset 4 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: mul_2xi16_varconst4: @@ -2172,10 +2080,8 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { ; X64-AVX-LABEL: mul_2xi16_varconst4: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: movq {{.*}}(%rip), %rax -; X64-AVX-NEXT: movswl 2(%rdi,%rsi), %ecx -; X64-AVX-NEXT: movswl (%rdi,%rsi), %edx -; X64-AVX-NEXT: vmovd %edx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq diff --git a/test/CodeGen/X86/vec_int_to_fp-widen.ll b/test/CodeGen/X86/vec_int_to_fp-widen.ll index 97a5e5a96ac..b87a15442db 100644 --- a/test/CodeGen/X86/vec_int_to_fp-widen.ll +++ b/test/CodeGen/X86/vec_int_to_fp-widen.ll @@ -3168,19 +3168,15 @@ define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) { ; ; SSE41-LABEL: sitofp_load_2i16_to_2f64: ; SSE41: # %bb.0: -; SSE41-NEXT: movswl 2(%rdi), %eax -; SSE41-NEXT: movswl (%rdi), %ecx -; SSE41-NEXT: movd %ecx, %xmm0 -; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: sitofp_load_2i16_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: movswl 2(%rdi), %eax -; AVX-NEXT: movswl (%rdi), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <2 x i16>, <2 x i16> *%a @@ -3201,19 +3197,17 @@ define <2 x double> @sitofp_load_2i8_to_2f64(<2 x i8> *%a) { ; ; SSE41-LABEL: sitofp_load_2i8_to_2f64: ; SSE41: # %bb.0: -; SSE41-NEXT: movsbl 1(%rdi), %eax -; SSE41-NEXT: movsbl (%rdi), %ecx -; SSE41-NEXT: movd %ecx, %xmm0 -; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: movzwl (%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: sitofp_load_2i8_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: movsbl 1(%rdi), %eax -; AVX-NEXT: movsbl (%rdi), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <2 x i8>, <2 x i8> *%a @@ -3606,19 +3600,17 @@ define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) { ; ; SSE41-LABEL: uitofp_load_2i8_to_2f64: ; SSE41: # %bb.0: -; SSE41-NEXT: movzbl 1(%rdi), %eax -; SSE41-NEXT: movzbl (%rdi), %ecx -; SSE41-NEXT: movd %ecx, %xmm0 -; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: movzwl (%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: uitofp_load_2i8_to_2f64: ; AVX: # %bb.0: -; AVX-NEXT: movzbl 1(%rdi), %eax -; AVX-NEXT: movzbl (%rdi), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <2 x i8>, <2 x i8> *%a diff --git a/test/CodeGen/X86/vector-sext-widen.ll b/test/CodeGen/X86/vector-sext-widen.ll index 3986600d9ba..d0969c6607e 100644 --- a/test/CodeGen/X86/vector-sext-widen.ll +++ b/test/CodeGen/X86/vector-sext-widen.ll @@ -5062,29 +5062,26 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) { ; ; SSE41-LABEL: sext_2i8_to_2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: movsbl 1(%rdi), %eax -; SSE41-NEXT: movsbl (%rdi), %ecx -; SSE41-NEXT: movd %ecx, %xmm0 -; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: movzwl (%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 ; SSE41-NEXT: paddd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: sext_2i8_to_2i32: ; AVX: # %bb.0: -; AVX-NEXT: movsbl 1(%rdi), %eax -; AVX-NEXT: movsbl (%rdi), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 ; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X32-SSE41-LABEL: sext_2i8_to_2i32: ; X32-SSE41: # %bb.0: ; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE41-NEXT: movsbl 1(%eax), %ecx -; X32-SSE41-NEXT: movsbl (%eax), %eax +; X32-SSE41-NEXT: movzwl (%eax), %eax ; X32-SSE41-NEXT: movd %eax, %xmm0 -; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 +; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 ; X32-SSE41-NEXT: paddd %xmm0, %xmm0 ; X32-SSE41-NEXT: retl %x = load <2 x i8>, <2 x i8>* %addr, align 1 diff --git a/test/CodeGen/X86/vector-zext-widen.ll b/test/CodeGen/X86/vector-zext-widen.ll index 853be12f4e8..bc8896f0116 100644 --- a/test/CodeGen/X86/vector-zext-widen.ll +++ b/test/CodeGen/X86/vector-zext-widen.ll @@ -2272,19 +2272,17 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) { ; ; SSE41-LABEL: zext_2i8_to_2i32: ; SSE41: # %bb.0: -; SSE41-NEXT: movzbl 1(%rdi), %eax -; SSE41-NEXT: movzbl (%rdi), %ecx -; SSE41-NEXT: movd %ecx, %xmm0 -; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: movzwl (%rdi), %eax +; SSE41-NEXT: movd %eax, %xmm0 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; SSE41-NEXT: paddd %xmm0, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: zext_2i8_to_2i32: ; AVX: # %bb.0: -; AVX-NEXT: movzbl 1(%rdi), %eax -; AVX-NEXT: movzbl (%rdi), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 -; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %x = load <2 x i8>, <2 x i8>* %addr, align 1 -- 2.11.0