From: Clement Courbet Date: Fri, 21 Apr 2017 09:20:55 +0000 (+0000) Subject: use repmovsb when optimizing forminsize X-Git-Tag: android-x86-7.1-r4~17377 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=69190edbb93a67fb7a83bcb18c5e7bbc83239836;p=android-x86%2Fexternal-llvm.git use repmovsb when optimizing forminsize git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300960 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index f09040bfe88..1a72a0ba3a6 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -181,6 +181,24 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( return Chain; } +namespace { + +// Represents a cover of a buffer of SizeVal bytes with blocks of size +// AVT, as well as how many bytes remain (BytesLeft is always smaller than +// the block size). +struct RepMovsRepeats { + RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) { + const unsigned UBytes = AVT.getSizeInBits() / 8; + Count = SizeVal / UBytes; + BytesLeft = SizeVal % UBytes; + } + + unsigned Count; + unsigned BytesLeft; +}; + +} // namespace + SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, @@ -231,14 +249,18 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( // QWORD aligned AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal, dl); - unsigned BytesLeft = SizeVal % UBytes; + RepMovsRepeats Repeats(SizeVal, AVT); + if (Repeats.BytesLeft > 0 && + DAG.getMachineFunction().getFunction()->optForMinSize()) { + // When agressively optimizing for size, avoid generating the code to handle + // BytesLeft. + AVT = MVT::i8; + Repeats = RepMovsRepeats(SizeVal, AVT); + } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, - Count, InFlag); + DAG.getIntPtrConstant(Repeats.Count, dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); @@ -253,9 +275,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SmallVector Results; Results.push_back(RepMovs); - if (BytesLeft) { + if (Repeats.BytesLeft) { // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; + unsigned Offset = SizeVal - Repeats.BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); @@ -266,7 +288,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), + DAG.getConstant(Repeats.BytesLeft, dl, + SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); diff --git a/test/CodeGen/X86/memcpy-struct-by-value.ll b/test/CodeGen/X86/memcpy-struct-by-value.ll index bf889161023..3c01adbb83f 100644 --- a/test/CodeGen/X86/memcpy-struct-by-value.ll +++ b/test/CodeGen/X86/memcpy-struct-by-value.ll @@ -17,3 +17,29 @@ define void @test1(%struct.large* nocapture %x) nounwind { ; FAST: rep;movsb ; HASWELL: rep;movsb } + +define void @test2(%struct.large* nocapture %x) nounwind minsize { + call void @foo(%struct.large* align 8 byval %x) + ret void + +; ALL-LABEL: test2: +; NOFAST: rep;movsq +; GENERIC: rep;movsq +; FAST: rep;movsb +; HASWELL: rep;movsb +} + +%struct.large_oddsize = type { [4095 x i8] } + +declare void @foo_oddsize(%struct.large_oddsize* align 8 byval) nounwind + +define void @test3(%struct.large_oddsize* nocapture %x) nounwind minsize { + call void @foo_oddsize(%struct.large_oddsize* align 8 byval %x) + ret void + +; ALL-LABEL: test3: +; NOFAST: rep;movsb +; GENERIC: rep;movsb +; FAST: rep;movsb +; HASWELL: rep;movsb +}