From 7e3dc40fab4df617b6a17777ca9c76dc6c687d6e Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 27 Aug 2014 11:39:47 +0000 Subject: [PATCH] [x86] Fix a regression introduced with r213897 for 32-bit targets where we stopped efficiently lowering sextload using the SSE41 instructions for that operation. This is a consequence of a bad predicate I used thinking of the memory access needs. The code actually handles the cases where the predicate doesn't apply, and handles them much better. =] Simple fix and a test case added. Fixes PR20767. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216538 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++---- test/CodeGen/X86/vec_sext.ll | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f63beab44fd..8abbc1d4e1c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1016,10 +1016,8 @@ void X86TargetLowering::resetOperationActions() { // scalars) and extend in-register to a legal 128-bit vector type. For sext // loads these must work with a single scalar load. setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Custom); - if (Subtarget->is64Bit()) { - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i8, Custom); - } + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v8i8, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, Custom); diff --git a/test/CodeGen/X86/vec_sext.ll b/test/CodeGen/X86/vec_sext.ll index 776ddec2e63..0fd66a30939 100644 --- a/test/CodeGen/X86/vec_sext.ll +++ b/test/CodeGen/X86/vec_sext.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86-64 -; PR 9267 +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s define<4 x i32> @func_16_32() { %F = load <4 x i16>* undef @@ -67,3 +67,14 @@ define<4 x i64> @const_16_64() { ret <4 x i64> %G } +define <4 x i32> @sextload(<4 x i16>* %ptr) { +; From PR20767 - make sure that we correctly use SSE4.1 to do sign extension +; loads for both 32-bit and 64-bit x86 targets. +; CHECK-LABEL: sextload: +; CHECK: vpmovsxwd {{.*}}, %xmm0 +; CHECK-NEXT: ret +entry: + %l = load<4 x i16>* %ptr + %m = sext<4 x i16> %l to <4 x i32> + ret <4 x i32> %m +} -- 2.11.0