From c108e2cef7689c3c5ce4bc5edd5fe0c32f735b02 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 8 Jan 2019 10:12:36 +0000 Subject: [PATCH] [ARM] Add missing patterns for DSP muls Using a PatLeaf for sext_16_node allowed matching smulbb and smlabb instructions once the operands had been sign extended. But we also need to use sext_inreg operands along with sext_16_node to catch a few more cases that enable use to remove the unnecessary sxth. Differential Revision: https://reviews.llvm.org/D55992 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350613 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 95 +++++++++----------- lib/Target/ARM/ARMInstrThumb2.td | 45 +++++----- test/CodeGen/ARM/smul.ll | 181 ++++++++++++++++++++++++++++++++------- 3 files changed, 209 insertions(+), 112 deletions(-) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4f42601a894..46bdeba6bbd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -426,24 +426,22 @@ def imm16_31 : ImmLeafComputeNumSignBits(SDValue(N,0)) >= 17) - return true; - - if (N->getOpcode() != ISD::SRA) - return false; - if (N->getOperand(0).getOpcode() != ISD::SHL) - return false; - - auto *ShiftVal = dyn_cast(N->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; + return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; +}]>; - ShiftVal = dyn_cast(N->getOperand(0)->getOperand(1)); - if (!ShiftVal || ShiftVal->getZExtValue() != 16) - return false; +def sext_bottom_16 : PatFrag<(ops node:$a), + (sext_inreg node:$a, i16)>; +def sext_top_16 : PatFrag<(ops node:$a), + (i32 (sra node:$a, (i32 16)))>; - return true; -}]>; +def bb_mul : PatFrag<(ops node:$a, node:$b), + (mul (sext_bottom_16 node:$a), (sext_bottom_16 node:$b))>; +def bt_mul : PatFrag<(ops node:$a, node:$b), + (mul (sext_bottom_16 node:$a), (sra node:$b, (i32 16)))>; +def tb_mul : PatFrag<(ops node:$a, node:$b), + (mul (sra node:$a, (i32 16)), (sext_bottom_16 node:$b))>; +def tt_mul : PatFrag<(ops node:$a, node:$b), + (mul (sra node:$a, (i32 16)), (sra node:$b, (i32 16)))>; /// Split a 32-bit immediate into two 16 bit parts. def hi16 : SDNodeXForm { def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), - (sext_inreg GPR:$Rm, i16)))]>, + [(set GPR:$Rd, (bb_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), - (sra GPR:$Rm, (i32 16))))]>, + [(set GPR:$Rd, (bt_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), - (sext_inreg GPR:$Rm, i16)))]>, + [(set GPR:$Rd, (tb_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), - (sra GPR:$Rm, (i32 16))))]>, + [(set GPR:$Rd, (tt_mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV5TE]>, Sched<[WriteMUL16, ReadMUL, ReadMUL]>; @@ -4287,35 +4281,31 @@ multiclass AI_smla { (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add GPR:$Ra, - (mul (sext_inreg GPRnopc:$Rn, i16), - (sext_inreg GPRnopc:$Rm, i16))))]>, + (bb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), - (sra GPRnopc:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (bt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), - (sext_inreg GPRnopc:$Rm, i16))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (tb_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), - (sra GPRnopc:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (tt_mul GPRnopc:$Rn, GPRnopc:$Rm)))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; @@ -5816,26 +5806,21 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; // smul* and smla* def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; -def : ARMV5MOPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sext_bottom_16 GPR:$b)), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sext_top_16 GPR:$b)), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sext_top_16 GPR:$a), sext_16_node:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_bottom_16 GPR:$b))), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sext_top_16 GPR:$b))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5MOPat<(add GPR:$acc, (mul (sext_top_16 GPR:$a), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), (SMULBB GPR:$a, GPR:$b)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 18a7ee4c419..7a6673b49d5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2732,28 +2732,25 @@ class T2ThreeRegSMUL op22_20, bits<2> op5_4, string opc, } def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb", - [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16)))]>; + [(set rGPR:$Rd, (bb_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt", - [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16))))]>; + [(set rGPR:$Rd, (bt_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb", - [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16)))]>; + [(set rGPR:$Rd, (tb_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt", - [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16))))]>; + [(set rGPR:$Rd, (tt_mul rGPR:$Rn, rGPR:$Rm))]>; def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>; def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>; -def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn), - (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; -def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), +def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_bottom_16 rGPR:$Rm)), + (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sext_top_16 rGPR:$Rm)), (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; -def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), +def : Thumb2DSPPat<(mul (sext_top_16 rGPR:$Rn), sext_16_node:$Rm), (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; + def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm), (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm), @@ -2781,18 +2778,13 @@ class T2FourRegSMLA op22_20, bits<2> op5_4, string opc, } def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb", - [(set rGPR:$Rd, (add rGPR:$Ra, - (mul (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (bb_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt", - [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16)))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (bt_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb", - [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (tb_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt", - [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16)))))]>; + [(set rGPR:$Rd, (add rGPR:$Ra, (tt_mul rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>; def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", @@ -2800,11 +2792,14 @@ def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)), (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; -def : Thumb2DSPMulPat<(add rGPR:$Ra, - (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))), +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, + (sext_bottom_16 rGPR:$Rm))), + (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, + (sext_top_16 rGPR:$Rm))), (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; -def : Thumb2DSPMulPat<(add rGPR:$Ra, - (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), +def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sext_top_16 rGPR:$Rn), + sext_16_node:$Rm)), (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll index 2b7be41ddb2..7091f8d1914 100644 --- a/test/CodeGen/ARM/smul.ll +++ b/test/CodeGen/ARM/smul.ll @@ -1,14 +1,14 @@ -; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o /dev/null +; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o - | FileCheck %s --check-prefix=DISABLED ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s ; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s ; RUN: llc -mtriple=thumbv6t2-none-eabi %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv6-none-eabi %s -o - | FileCheck %s -check-prefix=CHECK-THUMBV6 +; RUN: llc -mtriple=thumbv6-none-eabi %s -o - | FileCheck %s -check-prefix=DISABLED define i32 @f1(i16 %x, i32 %y) { ; CHECK-LABEL: f1: ; CHECK-NOT: sxth ; CHECK: {{smulbt r0, r0, r1|smultb r0, r1, r0}} -; CHECK-THUMBV6-NOT: {{smulbt|smultb}} +; DISABLED-NOT: {{smulbt|smultb}} %tmp1 = sext i16 %x to i32 %tmp2 = ashr i32 %y, 16 %tmp3 = mul i32 %tmp2, %tmp1 @@ -18,7 +18,7 @@ define i32 @f1(i16 %x, i32 %y) { define i32 @f2(i32 %x, i32 %y) { ; CHECK-LABEL: f2: ; CHECK: smultt -; CHECK-THUMBV6-NOT: smultt +; DISABLED-NOT: smultt %tmp1 = ashr i32 %x, 16 %tmp3 = ashr i32 %y, 16 %tmp4 = mul i32 %tmp3, %tmp1 @@ -29,7 +29,7 @@ define i32 @f3(i32 %a, i16 %x, i32 %y) { ; CHECK-LABEL: f3: ; CHECK-NOT: sxth ; CHECK: {{smlabt r0, r1, r2, r0|smlatb r0, r2, r1, r0}} -; CHECK-THUMBV6-NOT: {{smlabt|smlatb}} +; DISABLED-NOT: {{smlabt|smlatb}} %tmp = sext i16 %x to i32 %tmp2 = ashr i32 %y, 16 %tmp3 = mul i32 %tmp2, %tmp @@ -40,7 +40,7 @@ define i32 @f3(i32 %a, i16 %x, i32 %y) { define i32 @f4(i32 %a, i32 %x, i32 %y) { ; CHECK-LABEL: f4: ; CHECK: smlatt -; CHECK-THUMBV6-NOT: smlatt +; DISABLED-NOT: smlatt %tmp1 = ashr i32 %x, 16 %tmp3 = ashr i32 %y, 16 %tmp4 = mul i32 %tmp3, %tmp1 @@ -52,7 +52,7 @@ define i32 @f5(i32 %a, i16 %x, i16 %y) { ; CHECK-LABEL: f5: ; CHECK-NOT: sxth ; CHECK: smlabb -; CHECK-THUMBV6-NOT: smlabb +; DISABLED-NOT: smlabb %tmp1 = sext i16 %x to i32 %tmp3 = sext i16 %y to i32 %tmp4 = mul i32 %tmp3, %tmp1 @@ -64,7 +64,7 @@ define i32 @f6(i32 %a, i32 %x, i16 %y) { ; CHECK-LABEL: f6: ; CHECK-NOT: sxth ; CHECK: {{smlatb r0, r1, r2, r0|smlabt r0, r2, r1, r0}} -; CHECK-THUMBV6-NOT: {{smlatb|smlabt}} +; DISABLED-NOT: {{smlatb|smlabt}} %tmp1 = sext i16 %y to i32 %tmp2 = ashr i32 %x, 16 %tmp3 = mul i32 %tmp2, %tmp1 @@ -75,7 +75,7 @@ define i32 @f6(i32 %a, i32 %x, i16 %y) { define i32 @f7(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: f7: ; CHECK: smlawb r0, r0, r1, r2 -; CHECK-THUMBV6-NOT: smlawb +; DISABLED-NOT: smlawb %shl = shl i32 %b, 16 %shr = ashr exact i32 %shl, 16 %conv = sext i32 %a to i64 @@ -91,7 +91,7 @@ define i32 @f8(i32 %a, i16 signext %b, i32 %c) { ; CHECK-LABEL: f8: ; CHECK-NOT: sxth ; CHECK: smlawb r0, r0, r1, r2 -; CHECK-THUMBV6-NOT: smlawb +; DISABLED-NOT: smlawb %conv = sext i32 %a to i64 %conv1 = sext i16 %b to i64 %mul = mul nsw i64 %conv1, %conv @@ -104,7 +104,7 @@ define i32 @f8(i32 %a, i16 signext %b, i32 %c) { define i32 @f9(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: f9: ; CHECK: smlawt r0, r0, r1, r2 -; CHECK-THUMBV6-NOT: smlawt +; DISABLED-NOT: smlawt %conv = sext i32 %a to i64 %shr = ashr i32 %b, 16 %conv1 = sext i32 %shr to i64 @@ -118,7 +118,7 @@ define i32 @f9(i32 %a, i32 %b, i32 %c) { define i32 @f10(i32 %a, i32 %b) { ; CHECK-LABEL: f10: ; CHECK: smulwb r0, r0, r1 -; CHECK-THUMBV6-NOT: smulwb +; DISABLED-NOT: smulwb %shl = shl i32 %b, 16 %shr = ashr exact i32 %shl, 16 %conv = sext i32 %a to i64 @@ -133,7 +133,7 @@ define i32 @f11(i32 %a, i16 signext %b) { ; CHECK-LABEL: f11: ; CHECK-NOT: sxth ; CHECK: smulwb r0, r0, r1 -; CHECK-THUMBV6-NOT: smulwb +; DISABLED-NOT: smulwb %conv = sext i32 %a to i64 %conv1 = sext i16 %b to i64 %mul = mul nsw i64 %conv1, %conv @@ -145,7 +145,7 @@ define i32 @f11(i32 %a, i16 signext %b) { define i32 @f12(i32 %a, i32 %b) { ; CHECK-LABEL: f12: ; CHECK: smulwt r0, r0, r1 -; CHECK-THUMBV6-NOT: smulwt +; DISABLED-NOT: smulwt %conv = sext i32 %a to i64 %shr = ashr i32 %b, 16 %conv1 = sext i32 %shr to i64 @@ -159,7 +159,7 @@ define i32 @f13(i32 %x, i16 %y) { ; CHECK-LABEL: f13: ; CHECK-NOT: sxth ; CHECK: {{smultb r0, r0, r1|smulbt r0, r1, r0}} -; CHECK-THUMBV6-NOT: {{smultb|smulbt}} +; DISABLED-NOT: {{smultb|smulbt}} %tmp1 = sext i16 %y to i32 %tmp2 = ashr i32 %x, 16 %tmp3 = mul i32 %tmp2, %tmp1 @@ -169,11 +169,11 @@ define i32 @f13(i32 %x, i16 %y) { define i32 @f14(i32 %x, i32 %y) { ; CHECK-LABEL: f14: ; CHECK-NOT: sxth -; CHECK: {{smultb r0, r0, r1|smulbt r0, r1, r0}} -; CHECK-THUMBV6-NOT: {{smultb|smulbt}} - %tmp1 = shl i32 %y, 16 +; CHECK: {{smultb r0, r1, r0|smulbt r0, r0, r1}} +; DISABLED-NOT: {{smultb|smulbt}} + %tmp1 = shl i32 %x, 16 %tmp2 = ashr i32 %tmp1, 16 - %tmp3 = ashr i32 %x, 16 + %tmp3 = ashr i32 %y, 16 %tmp4 = mul i32 %tmp3, %tmp2 ret i32 %tmp4 } @@ -182,7 +182,7 @@ define i32 @f15(i32 %x, i32 %y) { ; CHECK-LABEL: f15: ; CHECK-NOT: sxth ; CHECK: {{smulbt r0, r0, r1|smultb r0, r1, r0}} -; CHECK-THUMBV6-NOT: {{smulbt|smultb}} +; DISABLED-NOT: {{smulbt|smultb}} %tmp1 = shl i32 %x, 16 %tmp2 = ashr i32 %tmp1, 16 %tmp3 = ashr i32 %y, 16 @@ -194,7 +194,7 @@ define i32 @f16(i16 %x, i16 %y) { ; CHECK-LABEL: f16: ; CHECK-NOT: sxth ; CHECK: smulbb -; CHECK-THUMBV6-NOT: smulbb +; DISABLED-NOT: smulbb %tmp1 = sext i16 %x to i32 %tmp2 = sext i16 %x to i32 %tmp3 = mul i32 %tmp1, %tmp2 @@ -203,8 +203,9 @@ define i32 @f16(i16 %x, i16 %y) { define i32 @f17(i32 %x, i32 %y) { ; CHECK-LABEL: f17: +; CHECK-NOT: sxth ; CHECK: smulbb -; CHECK-THUMBV6-NOT: smulbb +; DISABLED-NOT: smulbb %tmp1 = shl i32 %x, 16 %tmp2 = shl i32 %y, 16 %tmp3 = ashr i32 %tmp1, 16 @@ -215,8 +216,9 @@ define i32 @f17(i32 %x, i32 %y) { define i32 @f18(i32 %a, i32 %x, i32 %y) { ; CHECK-LABEL: f18: +; CHECK-NOT: sxth ; CHECK: {{smlabt r0, r1, r2, r0|smlatb r0, r2, r1, r0}} -; CHECK-THUMBV6-NOT: {{smlabt|smlatb}} +; DISABLED-NOT: {{smlabt|smlatb}} %tmp0 = shl i32 %x, 16 %tmp1 = ashr i32 %tmp0, 16 %tmp2 = ashr i32 %y, 16 @@ -227,20 +229,21 @@ define i32 @f18(i32 %a, i32 %x, i32 %y) { define i32 @f19(i32 %a, i32 %x, i32 %y) { ; CHECK-LABEL: f19: -; CHECK: {{smlatb r0, r1, r2, r0|smlabt r0, r2, r1, r0}} -; CHECK-THUMBV6-NOT: {{smlatb|smlabt}} - %tmp0 = shl i32 %y, 16 +; CHECK: {{smlatb r0, r2, r1, r0|smlabt r0, r1, r2, r0}} +; DISABLED-NOT: {{smlatb|smlabt}} + %tmp0 = shl i32 %x, 16 %tmp1 = ashr i32 %tmp0, 16 - %tmp2 = ashr i32 %x, 16 - %tmp3 = mul i32 %tmp2, %tmp1 + %tmp2 = ashr i32 %y, 16 + %tmp3 = mul i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %a ret i32 %tmp5 } define i32 @f20(i32 %a, i32 %x, i32 %y) { ; CHECK-LABEL: f20: +; CHECK-NOT: sxth ; CHECK: smlabb -; CHECK-THUMBV6-NOT: smlabb +; DISABLED-NOT: smlabb %tmp1 = shl i32 %x, 16 %tmp2 = ashr i32 %tmp1, 16 %tmp3 = shl i32 %y, 16 @@ -254,7 +257,7 @@ define i32 @f21(i32 %a, i32 %x, i16 %y) { ; CHECK-LABEL: f21 ; CHECK-NOT: sxth ; CHECK: smlabb -; CHECK-THUMBV6-NOT: smlabb +; DISABLED-NOT: smlabb %tmp1 = shl i32 %x, 16 %tmp2 = ashr i32 %tmp1, 16 %tmp3 = sext i16 %y to i32 @@ -263,12 +266,26 @@ define i32 @f21(i32 %a, i32 %x, i16 %y) { ret i32 %tmp5 } +define i32 @f21_b(i32 %a, i32 %x, i16 %y) { +; CHECK-LABEL: f21_b +; CHECK-NOT: sxth +; CHECK: smlabb +; DISABLED-NOT: smlabb + %tmp1 = shl i32 %x, 16 + %tmp2 = ashr i32 %tmp1, 16 + %tmp3 = sext i16 %y to i32 + %tmp4 = mul i32 %tmp3, %tmp2 + %tmp5 = add i32 %a, %tmp4 + ret i32 %tmp5 +} + @global_b = external global i16, align 2 define i32 @f22(i32 %a) { ; CHECK-LABEL: f22: +; CHECK-NOT: sxth ; CHECK: smulwb r0, r0, r1 -; CHECK-THUMBV6-NOT: smulwb +; DISABLED-NOT: smulwb %b = load i16, i16* @global_b, align 2 %sext = sext i16 %b to i64 %conv = sext i32 %a to i64 @@ -280,8 +297,9 @@ define i32 @f22(i32 %a) { define i32 @f23(i32 %a, i32 %c) { ; CHECK-LABEL: f23: +; CHECK-NOT: sxth ; CHECK: smlawb r0, r0, r2, r1 -; CHECK-THUMBV6-NOT: smlawb +; DISABLED-NOT: smlawb %b = load i16, i16* @global_b, align 2 %sext = sext i16 %b to i64 %conv = sext i32 %a to i64 @@ -291,3 +309,102 @@ define i32 @f23(i32 %a, i32 %c) { %add = add nsw i32 %conv5, %c ret i32 %add } + +; CHECK-LABEL: f24 +; CHECK-NOT: sxth +; CHECK: smulbb +define i32 @f24(i16* %a, i32* %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %ld.1 = load i32, i32* %b, align 4 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %ld.1, 16 + %conv.1 = ashr i32 %shift, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + store i32 %ld.1, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f25 +; CHECK-NOT: sxth +; CHECK: smulbb +define i32 @f25(i16* %a, i32 %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %b, 16 + %conv.1 = ashr i32 %shift, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + store i32 %b, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f25_b +; CHECK-NOT: sxth +; CHECK: smulbb +define i32 @f25_b(i16* %a, i32 %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %b, 16 + %conv.1 = ashr i32 %shift, 16 + %mul.0 = mul i32 %conv.1, %conv.0 + store i32 %b, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f26 +; CHECK-NOT: sxth +; CHECK: {{smulbt | smultb}} +define i32 @f26(i16* %a, i32 %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %conv.0 = sext i16 %ld.0 to i32 + %conv.1 = ashr i32 %b, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + store i32 %b, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f26_b +; CHECK-NOT: sxth +; CHECK: {{smulbt | smultb}} +define i32 @f26_b(i16* %a, i32 %b, i32* %c) { + %ld.0 = load i16, i16* %a, align 2 + %conv.0 = sext i16 %ld.0 to i32 + %conv.1 = ashr i32 %b, 16 + %mul.0 = mul i32 %conv.1, %conv.0 + store i32 %b, i32* %c + ret i32 %mul.0 +} + +; CHECK-LABEL: f27 +; CHECK-NOT: sxth +; CHECK: smulbb +; CHECK: {{smlabt | smlatb}} +define i32 @f27(i16* %a, i32* %b) { + %ld.0 = load i16, i16* %a, align 2 + %ld.1 = load i32, i32* %b, align 4 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %ld.1, 16 + %conv.1 = ashr i32 %shift, 16 + %conv.2 = ashr i32 %ld.1, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + %mul.1 = mul i32 %conv.0, %conv.2 + %add = add i32 %mul.0, %mul.1 + ret i32 %add +} + +; CHECK-LABEL: f27_b +; CHECK-NOT: sxth +; CHECK: smulbb +; CHECK: {{smlabt | smlatb}} +define i32 @f27_b(i16* %a, i32* %b) { + %ld.0 = load i16, i16* %a, align 2 + %ld.1 = load i32, i32* %b, align 4 + %conv.0 = sext i16 %ld.0 to i32 + %shift = shl i32 %ld.1, 16 + %conv.1 = ashr i32 %shift, 16 + %conv.2 = ashr i32 %ld.1, 16 + %mul.0 = mul i32 %conv.0, %conv.1 + %mul.1 = mul i32 %conv.2, %conv.0 + %add = add i32 %mul.0, %mul.1 + ret i32 %add +} + -- 2.11.0