From 537a9f7685a8cd9d58fd7e448d89e513c1a45fb9 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 2 Nov 2011 22:52:45 +0000 Subject: [PATCH] Try to lower memset/memcpy/memmove to vector instructions on ARM where the alignment permits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143582 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 28 +++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.h | 7 ++++++- test/CodeGen/ARM/2011-10-26-memset-with-neon.ll | 20 ++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/ARM/2011-10-26-memset-with-neon.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 31e522d4d70..222a399c28c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8127,6 +8127,34 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { } } +static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, + unsigned AlignCheck) { + return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && + (DstAlign == 0 || DstAlign % AlignCheck == 0)); +} + +EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + MachineFunction &MF) const { + const Function *F = MF.getFunction(); + + // See if we can use NEON instructions for this... + if (NonScalarIntSafe && + !F->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { + return MVT::v4i32; + } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { + return MVT::v2i32; + } + } + + // Let the target-independent logic figure it out. + return MVT::Other; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5da9b27fca6..43e43dd35bf 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -266,9 +266,14 @@ namespace llvm { /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. - /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON? virtual bool allowsUnalignedMemoryAccesses(EVT VT) const; + virtual EVT getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + MachineFunction &MF) const; + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll new file mode 100644 index 00000000000..3c9216cde7b --- /dev/null +++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s + +; Should trigger a NEON store. +; CHECK: vstr.64 +define void @f_0_12(i8* nocapture %c) nounwind optsize { +entry: + call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + ret void +} + +; Trigger multiple NEON stores. +; CHECK: vstmia +; CHECK-NEXT: vstmia +define void @f_0_40(i8* nocapture %c) nounwind optsize { +entry: + call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false) + ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind -- 2.11.0