From 3ab10c1918df0d00e31eef7a6f4de6ece022282d Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 5 Jan 2015 18:52:29 +0000 Subject: [PATCH] [PowerPC] Remove zexts after i32 ctlz The 64-bit semantics of cntlzw are not special, the 32-bit population count is stored as a 64-bit value in the range [0,32]. As a result, it is always zero extended, and it can be added to the PPCISelDAGToDAG peephole optimization as a frontier instruction for the removal of unnecessary zero extensions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225192 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 7 +++++++ lib/Target/PowerPC/PPCInstr64Bit.td | 5 ++++- test/CodeGen/PowerPC/rm-zext.ll | 24 ++++++++++++++++++++---- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3e787bedd27..ab3a01d6b39 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -3736,6 +3736,12 @@ static bool PeepholePPC64ZExtGather(SDValue Op32, return true; } + // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended. + if (Op32.getMachineOpcode() == PPC::CNTLZW) { + ToPromote.insert(Op32.getNode()); + return true; + } + // Next, check for those instructions we can look through. // Assuming the mask does not wrap around, then the higher-order bits are @@ -3925,6 +3931,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { case PPC::LIS: NewOpcode = PPC::LIS8; break; case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; + case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; case PPC::OR: NewOpcode = PPC::OR8; break; case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 29b13f8a53a..1b6fcbeec73 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -551,7 +551,10 @@ defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), "srad", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; -let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS), + "cntlzw", "$rA, $rS", IIC_IntGeneral, []>; + defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS), "extsb", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; diff --git a/test/CodeGen/PowerPC/rm-zext.ll b/test/CodeGen/PowerPC/rm-zext.ll index 291b7919280..33995e114d2 100644 --- a/test/CodeGen/PowerPC/rm-zext.ll +++ b/test/CodeGen/PowerPC/rm-zext.ll @@ -40,10 +40,10 @@ entry: } ; Function Attrs: nounwind readnone -declare i32 @llvm.bswap.i32(i32) #1 +declare i32 @llvm.bswap.i32(i32) #0 ; Function Attrs: nounwind readonly -define zeroext i32 @bs32(i32* nocapture readonly %x) #0 { +define zeroext i32 @bs32(i32* nocapture readonly %x) #1 { entry: %0 = load i32* %x, align 4 %1 = tail call i32 @llvm.bswap.i32(i32 %0) @@ -55,7 +55,7 @@ entry: } ; Function Attrs: nounwind readonly -define zeroext i16 @bs16(i16* nocapture readonly %x) #0 { +define zeroext i16 @bs16(i16* nocapture readonly %x) #1 { entry: %0 = load i16* %x, align 2 %1 = tail call i16 @llvm.bswap.i16(i16 %0) @@ -67,7 +67,23 @@ entry: } ; Function Attrs: nounwind readnone -declare i16 @llvm.bswap.i16(i16) #1 +declare i16 @llvm.bswap.i16(i16) #0 + +; Function Attrs: nounwind readnone +define zeroext i32 @ctlz32(i32 zeroext %x) #0 { +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) + ret i32 %0 + +; CHECK-LABEL: @ctlz32 +; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32 +; CHECK: blr +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.ctlz.i32(i32, i1) #0 + attributes #0 = { nounwind readnone } +attributes #1 = { nounwind readonly } -- 2.11.0