From 504c369213efb263136bb048e79af3516511c040 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Mon, 17 Dec 2007 22:32:34 +0000 Subject: [PATCH] - Restore some i8 functionality in CellSPU - New test case: nand.ll git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 7 + lib/Target/CellSPU/SPUISelLowering.cpp | 14 +- lib/Target/CellSPU/SPUInstrInfo.cpp | 6 +- lib/Target/CellSPU/SPUInstrInfo.td | 414 ++++++++++++++++++++++++++++++--- lib/Target/CellSPU/SPUOperands.td | 50 +++- lib/Target/CellSPU/SPURegisterInfo.cpp | 9 +- lib/Target/CellSPU/SPURegisterInfo.td | 34 +++ test/CodeGen/CellSPU/and_ops.ll | 22 +- test/CodeGen/CellSPU/nand.ll | 119 ++++++++++ 9 files changed, 612 insertions(+), 63 deletions(-) create mode 100644 test/CodeGen/CellSPU/nand.ll diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index ab02a812d68..ba406f42936 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -63,6 +63,13 @@ namespace { } #endif + //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values + bool + isI32IntU10Immediate(ConstantSDNode *CN) + { + return isU10Constant((int) CN->getValue()); + } + //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values bool isI16IntS10Immediate(ConstantSDNode *CN) diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 3a23c6fec9e..d7091eb9b88 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -119,11 +119,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Set up the SPU's register classes: // NOTE: i8 register class is not registered because we cannot determine when // we need to zero or sign extend for custom-lowered loads and stores. - addRegisterClass(MVT::i16, SPU::R16CRegisterClass); - addRegisterClass(MVT::i32, SPU::R32CRegisterClass); - addRegisterClass(MVT::i64, SPU::R64CRegisterClass); - addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); - addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); + // NOTE: Ignore the previous note. For now. :-) + addRegisterClass(MVT::i8, SPU::R8CRegisterClass); + addRegisterClass(MVT::i16, SPU::R16CRegisterClass); + addRegisterClass(MVT::i32, SPU::R32CRegisterClass); + addRegisterClass(MVT::i64, SPU::R64CRegisterClass); + addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); + addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); // SPU has no sign or zero extended loads for i1, i8, i16: @@ -925,7 +927,7 @@ LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex) } case MVT::i8: if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass); + unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass); MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8); ++ArgRegIdx; diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index ea7e107c490..5846aad72e2 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -54,11 +54,11 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, case SPU::ORIr64: case SPU::ORHIv8i16: case SPU::ORHIr16: - // case SPU::ORHI1To2: + case SPU::ORHI1To2: case SPU::ORBIv16i8: - //case SPU::ORBIr8: + case SPU::ORBIr8: case SPU::ORI2To4: - // case SPU::ORI1To4: + case SPU::ORI1To4: case SPU::AHIvec: case SPU::AHIr16: case SPU::AIvec: diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 2ec14d241b5..792041ea376 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -111,6 +111,11 @@ let isLoad = 1 in { "lqd\t$rT, $src", LoadStore, [(set R16C:$rT, (load dform_addr:$src))]>; + def LQDr8: + RI10Form<0b00101100, (outs R8C:$rT), (ins memri10:$src), + "lqd\t$rT, $src", LoadStore, + [(set R8C:$rT, (load dform_addr:$src))]>; + def LQAv16i8: RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), "lqa\t$rT, $src", LoadStore, @@ -171,6 +176,11 @@ let isLoad = 1 in { "lqa\t$rT, $src", LoadStore, [(set R16C:$rT, (load aform_addr:$src))]>; + def LQAr8: + RI16Form<0b100001100, (outs R8C:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", LoadStore, + [(set R8C:$rT, (load aform_addr:$src))]>; + def LQXv16i8: RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), "lqx\t$rT, $src", LoadStore, @@ -231,14 +241,17 @@ let isLoad = 1 in { "lqx\t$rT, $src", LoadStore, [(set R16C:$rT, (load xform_addr:$src))]>; + def LQXr8: + RRForm<0b00100011100, (outs R8C:$rT), (ins memrr:$src), + "lqx\t$rT, $src", LoadStore, + [(set R8C:$rT, (load xform_addr:$src))]>; + /* Load quadword, PC relative: Not much use at this point in time. Might be of use later for relocatable code. def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp), "lqr\t$rT, $disp", LoadStore, [(set VECREG:$rT, (load iaddr:$disp))]>; */ - - // Catch-all for unaligned loads: } //===----------------------------------------------------------------------===// @@ -295,6 +308,10 @@ let isStore = 1 in { "stqd\t$rT, $src", LoadStore, [(store R16C:$rT, dform_addr:$src)]>; + def STQDr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, memri10:$src), + "stqd\t$rT, $src", LoadStore, + [(store R8C:$rT, dform_addr:$src)]>; + def STQAv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, addr256k:$src), "stqa\t$rT, $src", LoadStore, [(store (v16i8 VECREG:$rT), aform_addr:$src)]>; @@ -340,6 +357,14 @@ let isStore = 1 in { "stqa\t$rT, $src", LoadStore, [(store R64FP:$rT, aform_addr:$src)]>; + def STQAr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, addr256k:$src), + "stqa\t$rT, $src", LoadStore, + [(store R16C:$rT, aform_addr:$src)]>; + + def STQAr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, addr256k:$src), + "stqa\t$rT, $src", LoadStore, + [(store R8C:$rT, aform_addr:$src)]>; + def STQXv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), "stqx\t$rT, $src", LoadStore, [(store (v16i8 VECREG:$rT), xform_addr:$src)]>; @@ -368,26 +393,36 @@ let isStore = 1 in { "stqx\t$rT, $src", LoadStore, [(store GPRC:$rT, xform_addr:$src)]>; - def STQXr64 : RI10Form<0b00100100, (outs), (ins R64C:$rT, memrr:$src), + def STQXr64: + RI10Form<0b00100100, (outs), (ins R64C:$rT, memrr:$src), "stqx\t$rT, $src", LoadStore, [(store R64C:$rT, xform_addr:$src)]>; - def STQXr32 : RI10Form<0b00100100, (outs), (ins R32C:$rT, memrr:$src), + def STQXr32: + RI10Form<0b00100100, (outs), (ins R32C:$rT, memrr:$src), "stqx\t$rT, $src", LoadStore, [(store R32C:$rT, xform_addr:$src)]>; // Floating Point - def STQXf32 : RI10Form<0b00100100, (outs), (ins R32FP:$rT, memrr:$src), + def STQXf32: + RI10Form<0b00100100, (outs), (ins R32FP:$rT, memrr:$src), "stqx\t$rT, $src", LoadStore, [(store R32FP:$rT, xform_addr:$src)]>; - def STQXf64 : RI10Form<0b00100100, (outs), (ins R64FP:$rT, memrr:$src), + def STQXf64: + RI10Form<0b00100100, (outs), (ins R64FP:$rT, memrr:$src), "stqx\t$rT, $src", LoadStore, [(store R64FP:$rT, xform_addr:$src)]>; - def STQXr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, memrr:$src), + def STQXr16: + RI10Form<0b00100100, (outs), (ins R16C:$rT, memrr:$src), "stqx\t$rT, $src", LoadStore, [(store R16C:$rT, xform_addr:$src)]>; + + def STQXr8: + RI10Form<0b00100100, (outs), (ins R8C:$rT, memrr:$src), + "stqx\t$rT, $src", LoadStore, + [(store R8C:$rT, xform_addr:$src)]>; /* Store quadword, PC relative: Not much use at this point in time. Might be useful for relocatable code. @@ -448,6 +483,13 @@ def ILHr16: "ilh\t$rT, $val", ImmLoad, [(set R16C:$rT, immSExt16:$val)]>; +// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with +// the right constant") +def ILHr8: + RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val), + "ilh\t$rT, $val", ImmLoad, + [(set R8C:$rT, immSExt8:$val)]>; + // IL does sign extension! def ILr64: RI16Form<0b100000010, (outs R64C:$rT), (ins s16imm_i64:$val), @@ -626,25 +668,32 @@ def Ar32 : RRForm<0b00000011000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "a\t$rT, $rA, $rB", IntegerOp, [(set R32C:$rT, (add R32C:$rA, R32C:$rB))]>; +def Ar8: + RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "a\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (add R8C:$rA, R8C:$rB))]>; + def AIvec: RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), "ai\t$rT, $rA, $val", IntegerOp, [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>; -def AIr32 : RI10Form<0b00111000, (outs R32C:$rT), - (ins R32C:$rA, s10imm_i32:$val), - "ai\t$rT, $rA, $val", IntegerOp, - [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>; +def AIr32: + RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + "ai\t$rT, $rA, $val", IntegerOp, + [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>; -def SFHvec : RRForm<0b00010010000, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB), - "sfh\t$rT, $rA, $rB", IntegerOp, - [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; +def SFHvec: + RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "sfh\t$rT, $rA, $rB", IntegerOp, + [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; -def SFHr16 : RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - "sfh\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; +def SFHr16: + RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + "sfh\t$rT, $rA, $rB", IntegerOp, + [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; def SFHIvec: RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -977,6 +1026,11 @@ def XSBHr16: "xsbh\t$rDst, $rSrc", IntegerOp, [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>; +def XSBHr8: + RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R8C:$rSrc), + "xsbh\t$rDst, $rSrc", IntegerOp, + [(set R16C:$rDst, (sext R8C:$rSrc))]>; + // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 // pattern below). Intentionally doesn't match a pattern because we want the @@ -1070,6 +1124,11 @@ def ANDr16: "and\t$rT, $rA, $rB", IntegerOp, [(set R16C:$rT, (and R16C:$rA, R16C:$rB))]>; +def ANDr8: + RRForm<0b10000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "and\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (and R8C:$rA, R8C:$rB))]>; + // Hacked form of AND to zero-extend 16-bit quantities to 32-bit // quantities -- see 16->32 zext pattern. // @@ -1112,12 +1171,22 @@ def ANDCr16: "andc\t$rT, $rA, $rB", IntegerOp, [(set R16C:$rT, (and R16C:$rA, (not R16C:$rB)))]>; +def ANDCr8: + RRForm<0b10000011010, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "andc\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (and R8C:$rA, (not R8C:$rB)))]>; + def ANDBIv16i8: RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), "andbi\t$rT, $rA, $val", IntegerOp, [(set (v16i8 VECREG:$rT), (and (v16i8 VECREG:$rA), (v16i8 v16i8U8Imm:$val)))]>; +def ANDBIr8: + RI10Form<0b01101000, (outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + "andbi\t$rT, $rA, $val", IntegerOp, + [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>; + def ANDHIv8i16: RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), "andhi\t$rT, $rA, $val", IntegerOp, @@ -1127,7 +1196,12 @@ def ANDHIv8i16: def ANDHIr16: RI10Form<0b10101000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), "andhi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (and R16C:$rA, i16ImmU10:$val))]>; + [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>; + +def ANDHI1To2: + RI10Form<0b10101000, (outs R16C:$rT), (ins R8C:$rA, s10imm:$val), + "andhi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (and (zext R8C:$rA), i16ImmSExt10:$val))]>; def ANDIv4i32: RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -1140,6 +1214,13 @@ def ANDIr32: "andi\t$rT, $rA, $val", IntegerOp, [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>; +// Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32 +// pattern below. +def ANDI1To4: + RI10Form<0b10101000, (outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), + "andi\t$rT, $rA, $val", IntegerOp, + [(set R32C:$rT, (and (zext R8C:$rA), i32ImmSExt10:$val))]>; + // Hacked form of ANDI to zero-extend i16 quantities to i32. See the // zext 16->32 pattern below. // @@ -1199,7 +1280,20 @@ def ORr16: "or\t$rT, $rA, $rB", IntegerOp, [(set R16C:$rT, (or R16C:$rA, R16C:$rB))]>; +def ORr8: + RRForm<0b10000010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "or\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (or R8C:$rA, R8C:$rB))]>; + // ORv*_*: Used in scalar->vector promotions: +def ORv16i8_i8: + RRForm<0b10000010000, (outs VECREG:$rT), (ins R8C:$rA, R8C:$rB), + "or\t$rT, $rA, $rB", IntegerOp, + [/* no pattern */]>; + +def : Pat<(v16i8 (SPUpromote_scalar R8C:$rA)), + (ORv16i8_i8 R8C:$rA, R8C:$rA)>; + def ORv8i16_i16: RRForm<0b10000010000, (outs VECREG:$rT), (ins R16C:$rA, R16C:$rB), "or\t$rT, $rA, $rB", IntegerOp, @@ -1241,6 +1335,14 @@ def : Pat<(v2f64 (SPUpromote_scalar R64FP:$rA)), (ORv2f64_f64 R64FP:$rA, R64FP:$rA)>; // ORi*_v*: Used to extract vector element 0 (the preferred slot) +def ORi8_v16i8: + RRForm<0b10000010000, (outs R8C:$rT), (ins VECREG:$rA, VECREG:$rB), + "or\t$rT, $rA, $rB", IntegerOp, + [/* no pattern */]>; + +def : Pat<(SPUextract_elt0 (v16i8 VECREG:$rA)), + (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>; + def ORi16_v8i16: RRForm<0b10000010000, (outs R16C:$rT), (ins VECREG:$rA, VECREG:$rB), "or\t$rT, $rA, $rB", IntegerOp, @@ -1325,6 +1427,11 @@ def ORCr16: "orc\t$rT, $rA, $rB", IntegerOp, [(set R16C:$rT, (or R16C:$rA, (not R16C:$rB)))]>; +def ORCr8: + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "orc\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (or R8C:$rA, (not R8C:$rB)))]>; + // OR byte immediate def ORBIv16i8: RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), @@ -1332,29 +1439,40 @@ def ORBIv16i8: [(set (v16i8 VECREG:$rT), (or (v16i8 VECREG:$rA), (v16i8 v16i8U8Imm:$val)))]>; +def ORBIr8: + RI10Form<0b01100000, (outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + "orbi\t$rT, $rA, $val", IntegerOp, + [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>; + // OR halfword immediate def ORHIv8i16: - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), "orhi\t$rT, $rA, $val", IntegerOp, [(set (v8i16 VECREG:$rT), (or (v8i16 VECREG:$rA), - v8i16SExt10Imm:$val))]>; + v8i16Uns10Imm:$val))]>; def ORHIr16: - RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, u10imm:$val), + "orhi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>; + +// Hacked form of ORHI used to promote 8-bit registers to 16-bit +def ORHI1To2: + RI10Form<0b10100000, (outs R16C:$rT), (ins R8C:$rA, s10imm:$val), "orhi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (or R16C:$rA, i16ImmSExt10:$val))]>; + [(set R16C:$rT, (or (anyext R8C:$rA), i16ImmSExt10:$val))]>; // Bitwise "or" with immediate def ORIv4i32: - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), "ori\t$rT, $rA, $val", IntegerOp, [(set (v4i32 VECREG:$rT), (or (v4i32 VECREG:$rA), - v4i32SExt10Imm:$val))]>; + v4i32Uns10Imm:$val))]>; def ORIr32: - RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val), "ori\t$rT, $rA, $val", IntegerOp, - [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>; + [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>; // Hacked forms of or immediate to copy one 32- and 64-bit FP register // to another. Do not match patterns. @@ -1381,15 +1499,24 @@ def ORI2To4: "ori\t$rT, $rA, $val", IntegerOp, [(set R32C:$rT, (or (anyext R16C:$rA), i32ImmSExt10:$val))]>; +// ORI1To4: Hacked version of the ORI instruction to extend 16-bit quantities +// to 32-bit quantities. Used exclusively to match "anyext" conversions (vide +// infra "anyext 16->32" pattern.) +def ORI1To4: + RI10Form<0b00100000, (outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), + "ori\t$rT, $rA, $val", IntegerOp, + [(set R32C:$rT, (or (anyext R8C:$rA), i32ImmSExt10:$val))]>; + // ORX: "or" across the vector: or's $rA's word slots leaving the result in // $rT[0], slots 1-3 are zeroed. // -// Needs to match an intrinsic pattern. +// FIXME: Needs to match an intrinsic pattern. def ORXv4i32: RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "orx\t$rT, $rA, $rB", IntegerOp, []>; +// XOR: def XORv16i8: RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "xor\t$rT, $rA, $rB", IntegerOp, @@ -1441,11 +1568,21 @@ def XORr16: "xor\t$rT, $rA, $rB", IntegerOp, [(set R16C:$rT, (xor R16C:$rA, R16C:$rB))]>; +def XORr8: + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "xor\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (xor R8C:$rA, R8C:$rB))]>; + def XORBIv16i8: RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), "xorbi\t$rT, $rA, $val", IntegerOp, [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>; +def XORBIr8: + RI10Form<0b01100000, (outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + "xorbi\t$rT, $rA, $val", IntegerOp, + [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>; + def XORHIv8i16: RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), "xorhi\t$rT, $rA, $val", IntegerOp, @@ -1497,6 +1634,11 @@ def NANDr16: "nand\t$rT, $rA, $rB", IntegerOp, [(set R16C:$rT, (not (and R16C:$rA, R16C:$rB)))]>; +def NANDr8: + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "nand\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (not (and R8C:$rA, R8C:$rB)))]>; + // NOR: def NORv16i8: RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), @@ -1526,6 +1668,11 @@ def NORr16: "nor\t$rT, $rA, $rB", IntegerOp, [(set R16C:$rT, (not (or R16C:$rA, R16C:$rB)))]>; +def NORr8: + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "nor\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (not (or R8C:$rA, R8C:$rB)))]>; + // EQV: Equivalence (1 for each same bit, otherwise 0) def EQVv16i8: RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), @@ -1593,6 +1740,18 @@ def : Pat<(xor R16C:$rA, (not R16C:$rB)), def : Pat<(xor (not R16C:$rA), R16C:$rB), (EQVr16 R16C:$rA, R16C:$rB)>; +def EQVr8: + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "eqv\t$rT, $rA, $rB", IntegerOp, + [(set R8C:$rT, (or (and R8C:$rA, R8C:$rB), + (and (not R8C:$rA), (not R8C:$rB))))]>; + +def : Pat<(xor R8C:$rA, (not R8C:$rB)), + (EQVr8 R8C:$rA, R8C:$rB)>; + +def : Pat<(xor (not R8C:$rA), R8C:$rB), + (EQVr8 R8C:$rA, R8C:$rB)>; + // gcc optimizes (p & q) | (~p & ~q) -> ~(p | q) | (p & q), so match that // pattern also: def : Pat<(or (vnot (or (v16i8 VECREG:$rA), (v16i8 VECREG:$rB))), @@ -1613,6 +1772,9 @@ def : Pat<(or (not (or R32C:$rA, R32C:$rB)), (and R32C:$rA, R32C:$rB)), def : Pat<(or (not (or R16C:$rA, R16C:$rB)), (and R16C:$rA, R16C:$rB)), (EQVr16 R16C:$rA, R16C:$rB)>; +def : Pat<(or (not (or R8C:$rA, R8C:$rB)), (and R8C:$rA, R8C:$rB)), + (EQVr8 R8C:$rA, R8C:$rB)>; + // Select bits: def SELBv16i8: RRRForm<0b1000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), @@ -1901,6 +2063,43 @@ def : Pat<(or (and (not R16C:$rC), R16C:$rA), def : Pat<(or (and (not R16C:$rC), R16C:$rA), (and R16C:$rC, R16C:$rB)), (SELBr16 R16C:$rA, R16C:$rB, R16C:$rC)>; + +def SELBr8: + RRRForm<0b1000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB, R8C:$rC), + "selb\t$rT, $rA, $rB, $rC", IntegerOp, + []>; + +def : Pat<(or (and R8C:$rA, R8C:$rC), + (and R8C:$rB, (not R8C:$rC))), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; + +def : Pat<(or (and R8C:$rC, R8C:$rA), + (and R8C:$rB, (not R8C:$rC))), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; + +def : Pat<(or (and R8C:$rA, R8C:$rC), + (and (not R8C:$rC), R8C:$rB)), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; + +def : Pat<(or (and R8C:$rC, R8C:$rA), + (and (not R8C:$rC), R8C:$rB)), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; + +def : Pat<(or (and R8C:$rA, (not R8C:$rC)), + (and R8C:$rB, R8C:$rC)), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; + +def : Pat<(or (and R8C:$rA, (not R8C:$rC)), + (and R8C:$rC, R8C:$rB)), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; + +def : Pat<(or (and (not R8C:$rC), R8C:$rA), + (and R8C:$rB, R8C:$rC)), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; + +def : Pat<(or (and (not R8C:$rC), R8C:$rA), + (and R8C:$rC, R8C:$rB)), + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; //===----------------------------------------------------------------------===// // Vector shuffle... @@ -1958,10 +2157,13 @@ def SHLHr16_r32: [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>; def SHLHIv8i16: - RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, u7imm_i8:$val), "shlhi\t$rT, $rA, $val", RotateShift, [(set (v8i16 VECREG:$rT), - (SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i16 uimm7:$val)))]>; + (SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i8 uimm7:$val)))]>; + +def : Pat<(SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i16 uimm7:$val)), + (SHLHIv8i16 VECREG:$rA, imm:$val)>; def : Pat<(SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val)), (SHLHIv8i16 VECREG:$rA, imm:$val)>; @@ -1970,6 +2172,9 @@ def SHLHIr16: RI7Form<0b11111010000, (outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val), "shlhi\t$rT, $rA, $val", RotateShift, [(set R16C:$rT, (shl R16C:$rA, (i32 uimm7:$val)))]>; + +def : Pat<(shl R16C:$rA, (i8 uimm7:$val)), + (SHLHIr16 R16C:$rA, uimm7:$val)>; def : Pat<(shl R16C:$rA, (i16 uimm7:$val)), (SHLHIr16 R16C:$rA, uimm7:$val)>; @@ -1986,10 +2191,13 @@ def SHLr32: [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; def SHLIv4i32: - RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, u7imm_i8:$val), "shli\t$rT, $rA, $val", RotateShift, [(set (v4i32 VECREG:$rT), - (SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i16 uimm7:$val)))]>; + (SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i8 uimm7:$val)))]>; + +def: Pat<(SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i16 uimm7:$val)), + (SHLIv4i32 VECREG:$rA, uimm7:$val)>; def: Pat<(SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i32 uimm7:$val)), (SHLIv4i32 VECREG:$rA, uimm7:$val)>; @@ -2002,6 +2210,9 @@ def SHLIr32: def : Pat<(shl R32C:$rA, (i16 uimm7:$val)), (SHLIr32 R32C:$rA, uimm7:$val)>; +def : Pat<(shl R32C:$rA, (i8 uimm7:$val)), + (SHLIr32 R32C:$rA, uimm7:$val)>; + // SHLQBI vec form: Note that this will shift the entire vector (the 128-bit // register) to the left. Vector form is here to ensure type correctness. def SHLQBIvec: @@ -2044,11 +2255,27 @@ def ROTHr16_r32: "roth\t$rT, $rA, $rB", RotateShift, [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>; +// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or +// 32-bit register +def ROTHr16_r8: + RRForm<0b00111010000, (outs R16C:$rT), (ins R16C:$rA, R8C:$rB), + "roth\t$rT, $rA, $rB", RotateShift, + [(set R16C:$rT, (rotl R16C:$rA, (i32 (zext R8C:$rB))))]>; + +def : Pat<(rotl R16C:$rA, (i32 (sext R8C:$rB))), + (ROTHr16_r8 R16C:$rA, R8C:$rB)>; + +def : Pat<(rotl R16C:$rA, (i32 (zext R8C:$rB))), + (ROTHr16_r8 R16C:$rA, R8C:$rB)>; + +def : Pat<(rotl R16C:$rA, (i32 (anyext R8C:$rB))), + (ROTHr16_r8 R16C:$rA, R8C:$rB)>; + def ROTHIv8i16: - RI7Form<0b00111110000, (outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + RI7Form<0b00111110000, (outs VECREG:$rT), (ins VECREG:$rA, u7imm_i8:$val), "rothi\t$rT, $rA, $val", RotateShift, [(set (v8i16 VECREG:$rT), - (SPUvec_rotl_v8i16 VECREG:$rA, (i16 uimm7:$val)))]>; + (SPUvec_rotl_v8i16 VECREG:$rA, (i8 uimm7:$val)))]>; def : Pat<(SPUvec_rotl_v8i16 VECREG:$rA, (i16 uimm7:$val)), (ROTHIv8i16 VECREG:$rA, imm:$val)>; @@ -2066,6 +2293,11 @@ def ROTHIr16_i32: "rothi\t$rT, $rA, $val", RotateShift, [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>; +def ROTHIr16_i8: + RI7Form<0b00111110000, (outs R16C:$rT), (ins R16C:$rA, u7imm_i8:$val), + "rothi\t$rT, $rA, $val", RotateShift, + [(set R16C:$rT, (rotl R16C:$rA, (i8 uimm7:$val)))]>; + def ROTv4i32: RRForm<0b00011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), "rot\t$rT, $rA, $rB", RotateShift, @@ -2077,6 +2309,30 @@ def ROTr32: "rot\t$rT, $rA, $rB", RotateShift, [(set R32C:$rT, (rotl R32C:$rA, R32C:$rB))]>; +// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or +// 32-bit register +def ROTr32_r16_anyext: + RRForm<0b00011010000, (outs R32C:$rT), (ins R32C:$rA, R16C:$rB), + "rot\t$rT, $rA, $rB", RotateShift, + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>; + +def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))), + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; + +def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))), + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; + +def ROTr32_r8_anyext: + RRForm<0b00011010000, (outs R32C:$rT), (ins R32C:$rA, R8C:$rB), + "rot\t$rT, $rA, $rB", RotateShift, + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>; + +def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))), + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; + +def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))), + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; + def ROTIv4i32: RI7Form<0b00011110000, (outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), "roti\t$rT, $rA, $val", RotateShift, @@ -2086,6 +2342,9 @@ def ROTIv4i32: def : Pat<(SPUvec_rotl_v4i32 (v4i32 VECREG:$rA), (i16 uimm7:$val)), (ROTIv4i32 VECREG:$rA, imm:$val)>; +def : Pat<(SPUvec_rotl_v4i32 (v4i32 VECREG:$rA), (i8 uimm7:$val)), + (ROTIv4i32 VECREG:$rA, imm:$val)>; + def ROTIr32: RI7Form<0b00011110000, (outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val), "roti\t$rT, $rA, $val", RotateShift, @@ -2096,6 +2355,11 @@ def ROTIr32_i16: "roti\t$rT, $rA, $val", RotateShift, [(set R32C:$rT, (rotl R32C:$rA, (i16 uimm7:$val)))]>; +def ROTIr32_i8: + RI7Form<0b00111110000, (outs R32C:$rT), (ins R32C:$rA, u7imm_i8:$val), + "roti\t$rT, $rA, $val", RotateShift, + [(set R32C:$rT, (rotl R32C:$rA, (i8 uimm7:$val)))]>; + // ROTQBY* vector forms: This rotates the entire vector, but vector registers // are used here for type checking (instances where ROTQBI is used actually // use vector registers) @@ -2155,9 +2419,9 @@ def : Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), R16C:$rB), (ROTHMv8i16 VECREG:$rA, (SFIr32 (XSHWr16 R16C:$rB), 0))>; -def : Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), /* R8C */ R16C:$rB), +def : Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), R8C:$rB), (ROTHMv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 /* (XSBHr8 R8C */ R16C:$rB) /*)*/, 0))>; + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; // ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left // Note: This instruction doesn't match a pattern because rB must be negated @@ -2174,9 +2438,9 @@ def : Pat<(srl R16C:$rA, R16C:$rB), (ROTHMr16 R16C:$rA, (SFIr32 (XSHWr16 R16C:$rB), 0))>; -def : Pat<(srl R16C:$rA, /* R8C */ R16C:$rB), +def : Pat<(srl R16C:$rA, R8C:$rB), (ROTHMr16 R16C:$rA, - (SFIr32 (XSHWr16 /* (XSBHr8 R8C */ R16C:$rB) /* ) */, 0))>; + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; // ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is // that the immediate can be complemented, so that the user doesn't have to @@ -2189,6 +2453,9 @@ def ROTHMIv8i16: def: Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), (i16 imm:$val)), (ROTHMIv8i16 VECREG:$rA, imm:$val)>; + +def: Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), (i8 imm:$val)), + (ROTHMIv8i16 VECREG:$rA, imm:$val)>; def ROTHMIr16: RI7Form<0b10111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val), @@ -2198,6 +2465,9 @@ def ROTHMIr16: def: Pat<(srl R16C:$rA, (i16 uimm7:$val)), (ROTHMIr16 R16C:$rA, uimm7:$val)>; +def: Pat<(srl R16C:$rA, (i8 uimm7:$val)), + (ROTHMIr16 R16C:$rA, uimm7:$val)>; + // ROTM v4i32 form: See the ROTHM v8i16 comments. def ROTMv4i32: RRForm<0b10011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2227,6 +2497,10 @@ def : Pat<(srl R32C:$rA, R16C:$rB), (ROTMr32 R32C:$rA, (SFIr32 (XSHWr16 R16C:$rB), 0))>; +def : Pat<(srl R32C:$rA, R8C:$rB), + (ROTMr32 R32C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + // ROTMI v4i32 form: See the comment for ROTHM v8i16. def ROTMIv4i32: RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), @@ -2236,6 +2510,9 @@ def ROTMIv4i32: def : Pat<(SPUvec_srl_v4i32 VECREG:$rA, (i16 uimm7:$val)), (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; + +def : Pat<(SPUvec_srl_v4i32 VECREG:$rA, (i8 uimm7:$val)), + (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; // ROTMI r32 form: know how to complement the immediate value. def ROTMIr32: @@ -2246,6 +2523,9 @@ def ROTMIr32: def : Pat<(srl R32C:$rA, (i16 imm:$val)), (ROTMIr32 R32C:$rA, uimm7:$val)>; +def : Pat<(srl R32C:$rA, (i8 imm:$val)), + (ROTMIr32 R32C:$rA, uimm7:$val)>; + // ROTQMBYvec: This is a vector form merely so that when used in an // instruction pattern, type checking will succeed. This instruction assumes // that the user knew to complement $rB. @@ -2291,6 +2571,10 @@ def : Pat<(SPUvec_sra_v8i16 VECREG:$rA, R16C:$rB), (ROTMAHv8i16 VECREG:$rA, (SFIr32 (XSHWr16 R16C:$rB), 0))>; +def : Pat<(SPUvec_sra_v8i16 VECREG:$rA, R8C:$rB), + (ROTMAHv8i16 VECREG:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + def ROTMAHr16: RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), "rotmah\t$rT, $rA, $rB", RotateShift, @@ -2303,6 +2587,10 @@ def : Pat<(sra R16C:$rA, R16C:$rB), (ROTMAHr16 R16C:$rA, (SFIr32 (XSHWr16 R16C:$rB), 0))>; +def : Pat<(sra R16C:$rA, R8C:$rB), + (ROTMAHr16 R16C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + def ROTMAHIv8i16: RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), "rotmahi\t$rT, $rA, $val", RotateShift, @@ -2312,6 +2600,9 @@ def ROTMAHIv8i16: def : Pat<(SPUvec_sra_v8i16 (v8i16 VECREG:$rA), (i16 uimm7:$val)), (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>; +def : Pat<(SPUvec_sra_v8i16 (v8i16 VECREG:$rA), (i8 uimm7:$val)), + (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>; + def ROTMAHIr16: RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val), "rotmahi\t$rT, $rA, $val", RotateShift, @@ -2320,6 +2611,9 @@ def ROTMAHIr16: def : Pat<(sra R16C:$rA, (i32 imm:$val)), (ROTMAHIr16 R16C:$rA, uimm7:$val)>; +def : Pat<(sra R16C:$rA, (i8 imm:$val)), + (ROTMAHIr16 R16C:$rA, uimm7:$val)>; + def ROTMAv4i32: RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), "rotma\t$rT, $rA, $rB", RotateShift, @@ -2332,6 +2626,10 @@ def : Pat<(SPUvec_sra_v4i32 VECREG:$rA, R16C:$rB), (ROTMAv4i32 (v4i32 VECREG:$rA), (SFIr32 (XSHWr16 R16C:$rB), 0))>; +def : Pat<(SPUvec_sra_v4i32 VECREG:$rA, R8C:$rB), + (ROTMAv4i32 (v4i32 VECREG:$rA), + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + def ROTMAr32: RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "rotma\t$rT, $rA, $rB", RotateShift, @@ -2344,6 +2642,10 @@ def : Pat<(sra R32C:$rA, R16C:$rB), (ROTMAr32 R32C:$rA, (SFIr32 (XSHWr16 R16C:$rB), 0))>; +def : Pat<(sra R32C:$rA, R8C:$rB), + (ROTMAr32 R32C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + def ROTMAIv4i32: RRForm<0b01011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), "rotmai\t$rT, $rA, $val", RotateShift, @@ -2361,6 +2663,9 @@ def ROTMAIr32: def : Pat<(sra R32C:$rA, (i16 uimm7:$val)), (ROTMAIr32 R32C:$rA, uimm7:$val)>; +def : Pat<(sra R32C:$rA, (i8 uimm7:$val)), + (ROTMAIr32 R32C:$rA, uimm7:$val)>; + //===----------------------------------------------------------------------===// // Branch and conditionals: //===----------------------------------------------------------------------===// @@ -2401,12 +2706,21 @@ let isTerminator = 1, isBarrier = 1 in { } // Comparison operators: +def CEQBr8: + RRForm<0b00001011110, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + "ceqb\t$rT, $rA, $rB", ByteOp, + [/* no pattern to match */]>; def CEQBv16i8: RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "ceqb\t$rT, $rA, $rB", ByteOp, [/* no pattern to match: intrinsic */]>; +def CEQBIr8: + RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm:$val), + "ceqbi\t$rT, $rA, $val", ByteOp, + [/* no pattern to match: intrinsic */]>; + def CEQBIv16i8: RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm:$val), "ceqbi\t$rT, $rA, $val", ByteOp, @@ -3075,6 +3389,10 @@ def : Pat<(SPUFPconstant (f32 fpimm:$imm)), def : Pat<(v4i32 v4i32Imm:$imm), (IOHLvec (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))), (LO16_vec v4i32Imm:$imm))>; + +// 8-bit constants +def : Pat<(i8 imm:$imm), + (ILHr8 imm:$imm)>; //===----------------------------------------------------------------------===// // Call instruction patterns: @@ -3095,14 +3413,34 @@ def : Pat<(SPUextract_i1_zext R32C:$rSrc), def : Pat<(sext_inreg R32C:$rSrc, i8), (XSHWr32 (XSBHr32 R32C:$rSrc))>; +def : Pat<(i32 (sext R8C:$rSrc)), + (XSHWr16 (XSBHr8 R8C:$rSrc))>; + def : Pat<(SPUextract_i8_sext VECREG:$rSrc), (XSHWr32 (XSBHr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc), (v4i32 VECREG:$rSrc))))>; +// zext 8->16: Zero extend bytes to halfwords +def : Pat<(i16 (zext R8C:$rSrc)), + (ANDHI1To2 R8C:$rSrc, 0xff)>; + +// zext 8->32 from preferred slot in load/store def : Pat<(SPUextract_i8_zext VECREG:$rSrc), (ANDIr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc), (v4i32 VECREG:$rSrc)), 0xff)>; +// zext 8->32: Zero extend bytes to words +def : Pat<(i32 (zext R8C:$rSrc)), + (ANDI1To4 R8C:$rSrc, 0xff)>; + +// anyext 8->16: Extend 8->16 bits, irrespective of sign +def : Pat<(i16 (anyext R8C:$rSrc)), + (ORHI1To2 R8C:$rSrc, 0)>; + +// anyext 8->32: Extend 8->32 bits, irrespective of sign +def : Pat<(i32 (anyext R8C:$rSrc)), + (ORI1To4 R8C:$rSrc, 0)>; + // zext 16->32: Zero extend halfwords to words (note that we have to juggle the // 0xffff constant since it will not fit into an immediate.) def : Pat<(i32 (zext R16C:$rSrc)), diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index 6f079eab647..d55e8683104 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -99,15 +99,21 @@ def i32ImmSExt10 : PatLeaf<(imm), [{ return isI32IntS10Immediate(N); }]>; +// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned +// field. Used by RI10Form instructions like 'ldq'. +def i32ImmUns10 : PatLeaf<(imm), [{ + return isI32IntU10Immediate(N); +}]>; + // i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign // extended field. Used by RI10Form instructions like 'ldq'. def i16ImmSExt10 : PatLeaf<(imm), [{ return isI16IntS10Immediate(N); }]>; -// i16ImmU10 predicate - True if the i16 immediate fits into a 10-bit unsigned +// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned // value. Used by RI10Form instructions. -def i16ImmU10 : PatLeaf<(imm), [{ +def i16ImmUns10 : PatLeaf<(imm), [{ return isI16IntU10Immediate(N); }]>; @@ -261,9 +267,21 @@ def v8i16SExt10Imm: PatLeaf<(build_vector), [{ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).Val != 0; }], v8i16SExt10Imm_xform>; +// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned +// immediate constant load for v8i16 vectors. +def v8i16Uns10Imm_xform: SDNodeXForm; + +// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant +// load, works in conjunction with its transform function. +def v8i16Uns10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).Val != 0; +}], v8i16Uns10Imm_xform>; + // v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended // immediate constant load for v8i16 vectors. -def v8i16SExt16Imm_xform: SDNodeXForm; @@ -271,7 +289,7 @@ def v8i16SExt16Imm_xform: SDNodeXForm; +}], v8i16Uns16Imm_xform>; // v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended // immediate constant load for v4i32 vectors. @@ -285,6 +303,18 @@ def v4i32SExt10Imm: PatLeaf<(build_vector), [{ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).Val != 0; }], v4i32SExt10Imm_xform>; +// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned +// immediate constant load for v4i32 vectors. +def v4i32Uns10Imm_xform: SDNodeXForm; + +// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant +// load, works in conjunction with its transform function. +def v4i32Uns10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).Val != 0; +}], v4i32Uns10Imm_xform>; + // v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended // immediate constant load for v4i32 vectors. def v4i32SExt16Imm_xform: SDNodeXForm { let PrintMethod = "printU7ImmOperand"; } +def u7imm_i8: Operand { + let PrintMethod = "printU7ImmOperand"; +} + def u7imm_i32: Operand { let PrintMethod = "printU7ImmOperand"; } @@ -412,6 +446,10 @@ def u10imm: Operand { let PrintMethod = "printU10ImmOperand"; } +def u10imm_i8: Operand { + let PrintMethod = "printU10ImmOperand"; +} + def u10imm_i32: Operand { let PrintMethod = "printU10ImmOperand"; } @@ -420,6 +458,10 @@ def s16imm : Operand { let PrintMethod = "printS16ImmOperand"; } +def s16imm_i8: Operand { + let PrintMethod = "printS16ImmOperand"; +} + def s16imm_i32: Operand { let PrintMethod = "printS16ImmOperand"; } diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index af2a270472f..7822d1e3b89 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -328,7 +328,9 @@ void SPURegisterInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, /* do what loadRegFromStackSlot does here... */ } else { unsigned Opc = 0; - if (RC == SPU::R16CRegisterClass) { + if (RC == SPU::R8CRegisterClass) { + /* do brilliance here */ + } else if (RC == SPU::R16CRegisterClass) { /* Opc = PPC::LWZ; */ } else if (RC == SPU::R32CRegisterClass) { /* Opc = PPC::LD; */ @@ -369,10 +371,9 @@ void SPURegisterInfo::copyRegToReg(MachineBasicBlock &MBB, abort(); } - /* if (DestRC == SPU::R8CRegisterClass) { + if (DestRC == SPU::R8CRegisterClass) { BuildMI(MBB, MI, TII.get(SPU::ORBIr8), DestReg).addReg(SrcReg).addImm(0); - } else */ - if (DestRC == SPU::R16CRegisterClass) { + } else if (DestRC == SPU::R16CRegisterClass) { BuildMI(MBB, MI, TII.get(SPU::ORHIr16), DestReg).addReg(SrcReg).addImm(0); } else if (DestRC == SPU::R32CRegisterClass) { BuildMI(MBB, MI, TII.get(SPU::ORIr32), DestReg).addReg(SrcReg).addImm(0); diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td index 537922524c2..de64439826d 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -359,6 +359,40 @@ def R16C : RegisterClass<"SPU", [i16], 128, }]; } +// The SPU's registers as 8-bit wide (byte) "preferred slot": +def R8C : RegisterClass<"SPU", [i8], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R8CClass::iterator + R8CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R8CClass::iterator + R8CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + // The SPU's registers as vector registers: def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, [ diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll index 5c88d7ed645..f23355ee53c 100644 --- a/test/CodeGen/CellSPU/and_ops.ll +++ b/test/CodeGen/CellSPU/and_ops.ll @@ -1,9 +1,9 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep and %t1.s | count 227 +; RUN: grep and %t1.s | count 232 ; RUN: grep andc %t1.s | count 85 ; RUN: grep andi %t1.s | count 36 -; RUN: grep andhi %t1.s | count 31 -; RUN: grep andbi %t1.s | count 1 +; RUN: grep andhi %t1.s | count 30 +; RUN: grep andbi %t1.s | count 4 ; AND instruction generation: define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { @@ -258,13 +258,19 @@ define <16 x i8> @and_v16i8(<16 x i8> %in) { } define i8 @and_u8(i8 zeroext %in) zeroext { - ; ANDI generated: - %tmp37 = and i8 %in, 37 ; [#uses=1] + ; ANDBI generated: + %tmp37 = and i8 %in, 37 ret i8 %tmp37 } -define i8 @and_i8(i8 signext %in) signext { - ; ANDHI generated - %tmp38 = and i8 %in, 37 ; [#uses=1] +define i8 @and_sext8(i8 signext %in) signext { + ; ANDBI generated + %tmp38 = and i8 %in, 37 + ret i8 %tmp38 +} + +define i8 @and_i8(i8 %in) { + ; ANDBI generated + %tmp38 = and i8 %in, 205 ret i8 %tmp38 } diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll new file mode 100644 index 00000000000..091f4b2edcc --- /dev/null +++ b/test/CodeGen/CellSPU/nand.ll @@ -0,0 +1,119 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep nand %t1.s | count 90 +; RUN: grep and %t1.s | count 94 +; RUN: grep xsbh %t1.s | count 2 +; RUN: grep xshw %t1.s | count 4 + +define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1] + %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %B +} + +define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %B +} + +define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1] + %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %B +} + +define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { + %A = and <8 x i16> %arg1, %arg2 ; <<8 x i16>> [#uses=1] + %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %B +} + +define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1] + %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + ret <16 x i8> %B +} + +define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { + %A = and <16 x i8> %arg1, %arg2 ; <<16 x i8>> [#uses=1] + %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + ret <16 x i8> %B +} + +define i32 @nand_i32_1(i32 %arg1, i32 %arg2) { + %A = and i32 %arg2, %arg1 ; [#uses=1] + %B = xor i32 %A, -1 ; [#uses=1] + ret i32 %B +} + +define i32 @nand_i32_2(i32 %arg1, i32 %arg2) { + %A = and i32 %arg1, %arg2 ; [#uses=1] + %B = xor i32 %A, -1 ; [#uses=1] + ret i32 %B +} + +define i16 @nand_i16_1(i16 signext %arg1, i16 signext %arg2) signext { + %A = and i16 %arg2, %arg1 ; [#uses=1] + %B = xor i16 %A, -1 ; [#uses=1] + ret i16 %B +} + +define i16 @nand_i16_2(i16 signext %arg1, i16 signext %arg2) signext { + %A = and i16 %arg1, %arg2 ; [#uses=1] + %B = xor i16 %A, -1 ; [#uses=1] + ret i16 %B +} + +define i16 @nand_i16u_1(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = and i16 %arg2, %arg1 ; [#uses=1] + %B = xor i16 %A, -1 ; [#uses=1] + ret i16 %B +} + +define i16 @nand_i16u_2(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { + %A = and i16 %arg1, %arg2 ; [#uses=1] + %B = xor i16 %A, -1 ; [#uses=1] + ret i16 %B +} + +define i8 @nand_i8u_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %A = and i8 %arg2, %arg1 ; [#uses=1] + %B = xor i8 %A, -1 ; [#uses=1] + ret i8 %B +} + +define i8 @nand_i8u_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { + %A = and i8 %arg1, %arg2 ; [#uses=1] + %B = xor i8 %A, -1 ; [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_1(i8 signext %arg1, i8 signext %arg2) signext { + %A = and i8 %arg2, %arg1 ; [#uses=1] + %B = xor i8 %A, -1 ; [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_2(i8 signext %arg1, i8 signext %arg2) signext { + %A = and i8 %arg1, %arg2 ; [#uses=1] + %B = xor i8 %A, -1 ; [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_3(i8 %arg1, i8 %arg2) { + %A = and i8 %arg2, %arg1 ; [#uses=1] + %B = xor i8 %A, -1 ; [#uses=1] + ret i8 %B +} + +define i8 @nand_i8_4(i8 %arg1, i8 %arg2) { + %A = and i8 %arg1, %arg2 ; [#uses=1] + %B = xor i8 %A, -1 ; [#uses=1] + ret i8 %B +} -- 2.11.0