From d8f57105010cc7e78026e511d5def873fc91e0e7 Mon Sep 17 00:00:00 2001 From: Haicheng Wu Date: Sun, 18 Feb 2018 13:51:33 +0000 Subject: [PATCH] [AArch64] Coalesce Copy Zero during instruction selection Add special case for copy of zero to avoid a double copy. Differential Revision: https://reviews.llvm.org/D36104 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@325459 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 30 ++++++++++++++- test/CodeGen/AArch64/arm64-addr-type-promotion.ll | 1 + test/CodeGen/AArch64/arm64-cse.ll | 2 +- test/CodeGen/AArch64/copy-zero-reg.ll | 47 +++++++++++++++++++++++ test/CodeGen/AArch64/i128-fast-isel-fallback.ll | 2 +- 5 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/AArch64/copy-zero-reg.ll diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 07922959fbb..c4fc79ccd3e 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2777,7 +2777,35 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } break; } - + case ISD::CopyToReg: { + // Special case for copy of zero to avoid a double copy. + SDNode *CopyVal = Node->getOperand(2).getNode(); + ConstantSDNode *CopyValConst = dyn_cast(CopyVal); + if (!CopyValConst || !CopyValConst->isNullValue()) + break; + const SDValue &Dest = Node->getOperand(1); + if (!TargetRegisterInfo::isVirtualRegister( + cast(Dest)->getReg())) + break; + unsigned ZeroReg; + EVT ZeroVT = CopyValConst->getValueType(0); + if (ZeroVT == MVT::i32) + ZeroReg = AArch64::WZR; + else if (ZeroVT == MVT::i64) + ZeroReg = AArch64::XZR; + else + break; + unsigned NumOperands = Node->getNumOperands(); + SDValue ZeroRegVal = CurDAG->getRegister(ZeroReg, ZeroVT); + // Replace the source operand (#0) with ZeroRegVal. + SDValue Ops[] = {Node->getOperand(0), Node->getOperand(1), ZeroRegVal, + (NumOperands == 4) ? Node->getOperand(3) : SDValue()}; + SDValue New = + CurDAG->getNode(ISD::CopyToReg, SDLoc(Node), Node->getVTList(), + makeArrayRef(Ops, NumOperands)); + ReplaceNode(Node, New.getNode()); + return; + } case ISD::FrameIndex: { // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. int FI = cast(Node)->getIndex(); diff --git a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll index 0009fe52e17..7e5f7dbfc40 100644 --- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -28,6 +28,7 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) { ; Next BB ; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2] ; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2] +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]] entry: %idxprom = sext i32 %i1 to i64 diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll index 030857df777..39cb3f753d3 100644 --- a/test/CodeGen/AArch64/arm64-cse.ll +++ b/test/CodeGen/AArch64/arm64-cse.ll @@ -10,7 +10,7 @@ entry: ; CHECK: subs ; CHECK-NOT: cmp ; CHECK-NOT: sub -; CHECK: b.ge +; CHECK: b.lt ; CHECK: sub ; CHECK: sub ; CHECK-NOT: sub diff --git a/test/CodeGen/AArch64/copy-zero-reg.ll b/test/CodeGen/AArch64/copy-zero-reg.ll new file mode 100644 index 00000000000..a31d6b31cdf --- /dev/null +++ b/test/CodeGen/AArch64/copy-zero-reg.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s + +; Verify there is no tiny block having only one mov wzr instruction between for.body.lr.ph and sw.epilog.loopexit +define void @unroll_by_2(i32 %trip_count, i32* %p) { +; CHECK-LABEL: unroll_by_2 +; CHECK: // %for.body.lr.ph +; CHECK: mov w{{[0-9]+}}, wzr +; CHECK: b.eq +; CHECK-NOT: mov w{{[0-9]+}}, wzr +; CHECK: // %for.body.lr.ph.new +; CHECK: // %for.body +; CHECK: // %sw.epilog.loopexit +; CHECK: // %for.body.epil +; CHECK: // %exit +; CHECK-NEXT: ret +for.body.lr.ph: + %xtraiter = and i32 %trip_count, 1 + %cmp = icmp eq i32 %trip_count, 1 + br i1 %cmp, label %sw.epilog.loopexit, label %for.body.lr.ph.new + +for.body.lr.ph.new: + %unroll_iter = sub nsw i32 %trip_count, %xtraiter + br label %for.body + +for.body: + %indvars = phi i32 [ 0, %for.body.lr.ph.new ], [ %indvars.next, %for.body ] + %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub, %for.body ] + %array = getelementptr inbounds i32, i32 * %p, i32 %indvars + store i32 %niter, i32* %array + %indvars.next = add i32 %indvars, 2 + %niter.nsub = add i32 %niter, -2 + %niter.ncmp = icmp eq i32 %niter.nsub, 0 + br i1 %niter.ncmp, label %sw.epilog.loopexit, label %for.body + +sw.epilog.loopexit: + %indvars.unr = phi i32 [ 0, %for.body.lr.ph ], [ %indvars.next, %for.body ] + %lcmp.mod = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod, label %exit, label %for.body.epil + +for.body.epil: + %array.epil = getelementptr inbounds i32, i32* %p, i32 %indvars.unr + store i32 %indvars.unr, i32* %array.epil + br label %exit + +exit: + ret void +} diff --git a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll index 80c83bd4823..9d445d9a792 100644 --- a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll +++ b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll @@ -10,7 +10,7 @@ define void @test1() { ; registers that make up the i128 pair ; CHECK: mov x0, xzr -; CHECK: mov x1, x0 +; CHECK: mov x1, xzr ; CHECK: bl _test2 } -- 2.11.0