From 06eabdeb15ce46ab4a331d6d760ce919fe249a71 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Sat, 27 Dec 2008 04:51:36 +0000 Subject: [PATCH] - Remove Tilmann's custom truncate lowering: it completely hosed over DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp | 8 +- lib/Target/CellSPU/SPU64InstrInfo.td | 77 ++ lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 66 +- lib/Target/CellSPU/SPUISelLowering.cpp | 797 +++++++++++---------- lib/Target/CellSPU/SPUISelLowering.h | 6 +- lib/Target/CellSPU/SPUInstrFormats.td | 5 +- lib/Target/CellSPU/SPUInstrInfo.cpp | 55 +- lib/Target/CellSPU/SPUInstrInfo.td | 726 ++++++++++++++----- lib/Target/CellSPU/SPUNodes.td | 14 +- lib/Target/CellSPU/SPUOperands.td | 10 +- lib/Target/CellSPU/SPURegisterInfo.cpp | 5 - lib/Target/CellSPU/SPUTargetAsmInfo.cpp | 7 + test/CodeGen/CellSPU/call_indirect.ll | 2 +- test/CodeGen/CellSPU/icmp64.ll | 144 ++++ test/CodeGen/CellSPU/stores.ll | 60 ++ test/CodeGen/CellSPU/struct_1.ll | 2 +- test/CodeGen/CellSPU/trunc.ll | 41 +- .../CellSPU/useful-harnesses/i32operations.c | 69 ++ .../CellSPU/useful-harnesses/i64operations.c | 68 ++ 19 files changed, 1509 insertions(+), 653 deletions(-) create mode 100644 lib/Target/CellSPU/SPU64InstrInfo.td create mode 100644 test/CodeGen/CellSPU/icmp64.ll create mode 100644 test/CodeGen/CellSPU/useful-harnesses/i32operations.c create mode 100644 test/CodeGen/CellSPU/useful-harnesses/i64operations.c diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 589a2600050..98aa084d504 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -117,7 +117,7 @@ namespace { } void - printMemRegImmS7(const MachineInstr *MI, unsigned OpNo) + printShufAddr(const MachineInstr *MI, unsigned OpNo) { char value = MI->getOperand(OpNo).getImm(); O << (int) value; @@ -183,16 +183,16 @@ namespace { } void - printMemRegImmS10(const MachineInstr *MI, unsigned OpNo) + printDFormAddr(const MachineInstr *MI, unsigned OpNo) { const MachineOperand &MO = MI->getOperand(OpNo); assert(MO.isImm() && - "printMemRegImmS10 first operand is not immedate"); + "printDFormAddr first operand is not immedate"); int64_t value = int64_t(MI->getOperand(OpNo).getImm()); int16_t value16 = int16_t(value); assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) && "Invalid dform s10 offset argument"); - O << value16 << "("; + O << (value16 & ~0xf) << "("; printOperand(MI, OpNo+1); O << ")"; } diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td new file mode 100644 index 00000000000..6d679bac724 --- /dev/null +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -0,0 +1,77 @@ +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// 64-bit comparisons: +// +// 1. The instruction sequences for vector vice scalar differ by a +// constant. +// +// 2. There are no "immediate" forms, since loading 64-bit constants +// could be a constant pool load. +// +// 3. i64 setcc results are i32, which are subsequently converted to a FSM +// mask when used in a select pattern. +// +// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask +// (TODO) +// +// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!) +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// selb instruction definition for i64. Note that the selection mask is +// a vector, produced by various forms of FSM: +def SELBr64_cond: + SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), + [/* no pattern */]>; + +class CodeFrag { + dag Fragment = frag; +} + +class I64SELECTNegCond: + Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), + (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>; + +class I64SETCCNegCond: + Pat<(cond R64C:$rA, R64C:$rB), + (XORIr32 cmpare.Fragment, -1)>; + +// The i64 seteq fragment that does the scalar->vector conversion and +// comparison: +def CEQr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB))), + 0x0000000c)>; + + +// The i64 seteq fragment that does the vector comparison +def CEQv2i64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), + 0x0000000f)>; + +// i64 seteq (equality): the setcc result is i32, which is converted to a +// vector FSM mask when used in a select pattern. +// +// v2i64 seteq (equality): the setcc result is v4i32 +multiclass CompareEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>; + def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>; + def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>; +} + +defm I64EQ: CompareEqual64; + +def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; + +def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), + I64EQv2i64.Fragment>; + +def I64Select: + Pat<(select R32C:$rC, R64C:$rB, R64C:$rA), + (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>; + +def : I64SETCCNegCond; + +def : I64SELECTNegCond; \ No newline at end of file diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9ac0e2e256c..f51aba2fda6 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -165,24 +165,23 @@ namespace { MVT VT; unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) bool ldresult_imm; /// LDRESULT instruction requires immediate? - int prefslot_byte; /// Byte offset of the "preferred" slot + unsigned lrinst; /// LR instruction }; const valtype_map_s valtype_map[] = { - { MVT::i1, 0, false, 3 }, - { MVT::i8, SPU::ORBIr8, true, 3 }, - { MVT::i16, SPU::ORHIr16, true, 2 }, - { MVT::i32, SPU::ORIr32, true, 0 }, - { MVT::i64, SPU::ORr64, false, 0 }, - { MVT::f32, SPU::ORf32, false, 0 }, - { MVT::f64, SPU::ORf64, false, 0 }, + { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 }, + { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 }, + { MVT::i32, SPU::ORIr32, true, SPU::LRr32 }, + { MVT::i64, SPU::ORr64, false, SPU::LRr64 }, + { MVT::f32, SPU::ORf32, false, SPU::LRf32 }, + { MVT::f64, SPU::ORf64, false, SPU::LRf64 }, // vector types... (sigh!) - { MVT::v16i8, 0, false, 0 }, - { MVT::v8i16, 0, false, 0 }, - { MVT::v4i32, 0, false, 0 }, - { MVT::v2i64, 0, false, 0 }, - { MVT::v4f32, 0, false, 0 }, - { MVT::v2f64, 0, false, 0 } + { MVT::v16i8, 0, false, SPU::LRv16i8 }, + { MVT::v8i16, 0, false, SPU::LRv8i16 }, + { MVT::v4i32, 0, false, SPU::LRv4i32 }, + { MVT::v2i64, 0, false, SPU::LRv2i64 }, + { MVT::v4f32, 0, false, SPU::LRv4f32 }, + { MVT::v2f64, 0, false, SPU::LRv2f64 } }; const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); @@ -686,31 +685,32 @@ SPUDAGToDAGISel::Select(SDValue Op) { Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain); } - Chain = SDValue(Result, 1); - return Result; } else if (Opc == SPUISD::IndirectAddr) { - SDValue Op0 = Op.getOperand(0); - if (Op0.getOpcode() == SPUISD::LDRESULT) { - /* || Op0.getOpcode() == SPUISD::AFormAddr) */ - // (IndirectAddr (LDRESULT, imm)) - SDValue Op1 = Op.getOperand(1); - MVT VT = Op.getValueType(); - - DEBUG(cerr << "CellSPU: IndirectAddr(LDRESULT, imm):\nOp0 = "); - DEBUG(Op.getOperand(0).getNode()->dump(CurDAG)); - DEBUG(cerr << "\nOp1 = "); - DEBUG(Op.getOperand(1).getNode()->dump(CurDAG)); - DEBUG(cerr << "\n"); - + // Look at the operands: SelectCode() will catch the cases that aren't + // specifically handled here. + // + // SPUInstrInfo catches the following patterns: + // (SPUindirect (SPUhi ...), (SPUlo ...)) + // (SPUindirect $sp, imm) + MVT VT = Op.getValueType(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + RegisterSDNode *RN; + + if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) + || (Op0.getOpcode() == ISD::Register + && ((RN = dyn_cast(Op0.getNode())) != 0 + && RN->getReg() != SPU::R1))) { + NewOpc = SPU::Ar32; if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *CN = cast(Op1); - Op1 = CurDAG->getTargetConstant(CN->getZExtValue(), VT); + Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32); - Ops[0] = Op0; - Ops[1] = Op1; - n_ops = 2; } + Ops[0] = Op0; + Ops[1] = Op1; + n_ops = 2; } } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index c3c31e0f470..e975d0d039c 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameInfo.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -77,37 +78,6 @@ namespace { return retval; } - - //! Predicate that returns true if operand is a memory target - /*! - \arg Op Operand to test - \return true if the operand is a memory target (i.e., global - address, external symbol, constant pool) or an A-form - address. - */ - bool isMemoryOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::GlobalAddress - || Opc == ISD::GlobalTLSAddress - || Opc == ISD::JumpTable - || Opc == ISD::ConstantPool - || Opc == ISD::ExternalSymbol - || Opc == ISD::TargetGlobalAddress - || Opc == ISD::TargetGlobalTLSAddress - || Opc == ISD::TargetJumpTable - || Opc == ISD::TargetConstantPool - || Opc == ISD::TargetExternalSymbol - || Opc == SPUISD::AFormAddr); - } - - //! Predicate that returns true if the operand is an indirect target - bool isIndirectOperand(const SDValue &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::Register - || Opc == SPUISD::LDRESULT); - } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) @@ -135,20 +105,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); - setTruncStoreAction(MVT::i8, MVT::i8, Custom); - setTruncStoreAction(MVT::i16, MVT::i8, Custom); - setTruncStoreAction(MVT::i32, MVT::i8, Custom); - setTruncStoreAction(MVT::i64, MVT::i8, Custom); - setTruncStoreAction(MVT::i128, MVT::i8, Custom); - - setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); // SPU constant load actions are custom lowered: setOperationAction(ISD::Constant, MVT::i64, Custom); @@ -160,11 +118,33 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) ++sctype) { MVT VT = (MVT::SimpleValueType)sctype; - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } + + for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; + ++sctype) { + MVT VT = (MVT::SimpleValueType) sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { + MVT StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } } - // Custom lower BRCOND for i8 to "promote" the result to i16 + // Custom lower BRCOND for i8 to "promote" the result to whatever the result + // operand happens to be: setOperationAction(ISD::BRCOND, MVT::Other, Custom); // Expand the jumptable branches @@ -176,14 +156,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); -#if 0 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); -#endif // SPU has no intrinsics for these particular operations: setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - // PowerPC has no SREM/UREM instructions + // SPU has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); @@ -232,14 +210,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::MUL, MVT::i32, Custom); setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall - // SMUL_LOHI, UMUL_LOHI -#if 0 - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); -#endif - // Need to custom handle (some) common i8, i64 math ops setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i8, Custom); @@ -265,12 +235,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT, MVT::i8, Legal); setOperationAction(ISD::SELECT, MVT::i16, Legal); setOperationAction(ISD::SELECT, MVT::i32, Legal); - setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Legal); setOperationAction(ISD::SETCC, MVT::i8, Legal); setOperationAction(ISD::SETCC, MVT::i16, Legal); - setOperationAction(ISD::SETCC, MVT::i32, Legal); - setOperationAction(ISD::SETCC, MVT::i64, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Custom); // Zero extension and sign extension for i64 have to be // custom legalized @@ -278,10 +248,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); - // Custom lower truncates - setOperationAction(ISD::TRUNCATE, MVT::i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::i32, Custom); + // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); // SPU has a legal FP -> signed INT instruction @@ -292,7 +259,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // FDIV on SPU requires custom lowering setOperationAction(ISD::FDIV, MVT::f32, Custom); - //setOperationAction(ISD::FDIV, MVT::f64, Custom); + setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall // SPU has [U|S]INT_TO_FP setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); @@ -402,7 +369,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setShiftAmountType(MVT::i32); - setBooleanContents(ZeroOrOneBooleanContent); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -435,7 +402,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; + node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR"; node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; @@ -471,9 +438,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const return ((i != node_names.end()) ? i->second : 0); } +//===----------------------------------------------------------------------===// +// Return the Cell SPU's SETCC result type +//===----------------------------------------------------------------------===// + MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { MVT VT = Op.getValueType(); - return (VT.isInteger() ? VT : MVT(MVT::i32)); + // i16 and i32 are valid SETCC result types + return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32); } //===----------------------------------------------------------------------===// @@ -486,105 +458,6 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { // LowerOperation implementation //===----------------------------------------------------------------------===// -/// Aligned load common code for CellSPU -/*! - \param[in] Op The SelectionDAG load or store operand - \param[in] DAG The selection DAG - \param[in] ST CellSPU subtarget information structure - \param[in,out] alignment Caller initializes this to the load or store node's - value from getAlignment(), may be updated while generating the aligned load - \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned - offset (divisible by 16, modulo 16 == 0) - \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the - offset of the preferred slot (modulo 16 != 0) - \param[in,out] VT Caller initializes this value type to the the load or store - node's loaded or stored value type; may be updated if an i1-extended load or - store. - \param[out] was16aligned true if the base pointer had 16-byte alignment, - otherwise false. Can help to determine if the chunk needs to be rotated. - - Both load and store lowering load a block of data aligned on a 16-byte - boundary. This is the common aligned load code shared between both. - */ -static SDValue -AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST, - LSBaseSDNode *LSN, - unsigned &alignment, int &alignOffs, int &prefSlotOffs, - MVT &VT, bool &was16aligned) -{ - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - const valtype_map_s *vtm = getValueTypeMapEntry(VT); - SDValue basePtr = LSN->getBasePtr(); - SDValue chain = LSN->getChain(); - - if (basePtr.getOpcode() == ISD::ADD) { - SDValue Op1 = basePtr.getNode()->getOperand(1); - - if (Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) { - const ConstantSDNode *CN = cast(basePtr.getOperand(1)); - - alignOffs = (int) CN->getZExtValue(); - prefSlotOffs = (int) (alignOffs & 0xf); - - // Adjust the rotation amount to ensure that the final result ends up in - // the preferred slot: - prefSlotOffs -= vtm->prefslot_byte; - basePtr = basePtr.getOperand(0); - - // Loading from memory, can we adjust alignment? - if (basePtr.getOpcode() == SPUISD::AFormAddr) { - SDValue APtr = basePtr.getOperand(0); - if (APtr.getOpcode() == ISD::TargetGlobalAddress) { - GlobalAddressSDNode *GSDN = cast(APtr); - alignment = GSDN->getGlobal()->getAlignment(); - } - } - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - } else if (basePtr.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast(basePtr); - alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize()); - prefSlotOffs = (int) (alignOffs & 0xf); - prefSlotOffs -= vtm->prefslot_byte; - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - - if (alignment == 16) { - // Realign the base pointer as a D-Form address: - if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { - basePtr = DAG.getNode(ISD::ADD, PtrVT, - basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - } - - // Emit the vector load: - was16aligned = true; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); - } - - // Unaligned load or we're using the "large memory" model, which means that - // we have to be very pessimistic: - if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Add the offset - basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - was16aligned = false; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); -} - /// Custom lower loads for CellSPU /*! All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements @@ -605,43 +478,110 @@ static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast(Op); SDValue the_chain = LN->getChain(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); MVT InVT = LN->getMemoryVT(); MVT OutVT = Op.getValueType(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - SDValue Ops[8]; + const valtype_map_s *vtm = getValueTypeMapEntry(InVT); switch (LN->getAddressingMode()) { case ISD::UNINDEXED: { - int offset, rotamt; - bool was16aligned; - SDValue result = - AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT, - was16aligned); - - if (result.getNode() == 0) - return result; - - the_chain = result.getValue(1); - // Rotate the chunk if necessary - if (rotamt < 0) - rotamt += 16; - if (rotamt != 0 || !was16aligned) { - SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); - - Ops[0] = result; - if (was16aligned) { - Ops[1] = DAG.getConstant(rotamt, MVT::i16); + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); + + if (rotamt < 0) + rotamt += 16; + + rotate = DAG.getConstant(rotamt, MVT::i16); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); } else { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - LoadSDNode *LN1 = cast(result); - Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(), + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + int64_t rotamt = -vtm->prefslot_byte; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getNode(ISD::ADD, PtrVT, + basePtr, DAG.getConstant(rotamt, PtrVT)); } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2); + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, PtrVT, + basePtr, + DAG.getConstant(-vtm->prefslot_byte, PtrVT)); } + // Re-emit as a v16i8 vector load + result = DAG.getLoad(MVT::v16i8, the_chain, basePtr, + LN->getSrcValue(), LN->getSrcValueOffset(), + LN->isVolatile(), 16); + + // Update the chain + the_chain = result.getValue(1); + + // Rotate into the preferred slot: + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, + result.getValue(0), rotate); + // Convert the loaded v16i8 vector to the appropriate vector type // specified by the operand: MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); @@ -704,23 +644,86 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { - int chunk_offset, slot_offset; - bool was16aligned; - // The vector type we really want to load from the 16-byte chunk. MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); - SDValue alignLoadVec = - AlignedLoad(Op, DAG, ST, SN, alignment, - chunk_offset, slot_offset, VT, was16aligned); + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; + + if (alignment == 16) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); + + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Re-emit as a v16i8 vector load + alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr, + SN->getSrcValue(), SN->getSrcValueOffset(), + SN->isVolatile(), 16); - if (alignLoadVec.getNode() == 0) - return alignLoadVec; + // Update the chain + the_chain = alignLoadVec.getValue(1); LoadSDNode *LN = cast(alignLoadVec); - SDValue basePtr = LN->getBasePtr(); - SDValue the_chain = alignLoadVec.getValue(1); SDValue theValue = SN->getValue(); SDValue result; @@ -732,29 +735,20 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { theValue = theValue.getOperand(0); } - chunk_offset &= 0xf; - - SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT); - SDValue insertEltPtr; - // If the base pointer is already a D-form address, then just create // a new D-form address with a slot offset and the orignal base pointer. // Otherwise generate a D-form address with the slot offset relative // to the stack pointer, which is always aligned. - DEBUG(cerr << "CellSPU LowerSTORE: basePtr = "); - DEBUG(basePtr.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); - - if (basePtr.getOpcode() == SPUISD::IndirectAddr || - (basePtr.getOpcode() == ISD::ADD - && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) { - insertEltPtr = basePtr; - } else { - insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs); - } +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "CellSPU LowerSTORE: basePtr = "; + basePtr.getNode()->dump(&DAG); + cerr << "\n"; + } +#endif SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr); + DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs); SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue); @@ -919,22 +913,31 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) static SDValue -LowerBRCOND(SDValue Op, SelectionDAG &DAG) -{ +LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { SDValue Cond = Op.getOperand(1); MVT CondVT = Cond.getValueType(); - MVT CondNVT; + unsigned CondOpc; if (CondVT == MVT::i8) { - CondNVT = MVT::i16; + SDValue CondOp0 = Cond.getOperand(0); + if (Cond.getOpcode() == ISD::TRUNCATE) { + // Use the truncate's value type and ANY_EXTEND the condition (DAGcombine + // will then remove the truncate) + CondVT = CondOp0.getValueType(); + CondOpc = ISD::ANY_EXTEND; + } else { + CondVT = MVT::i32; // default to something reasonable + CondOpc = ISD::ZERO_EXTEND; + } + + Cond = DAG.getNode(CondOpc, CondVT, Op.getOperand(1)); + return DAG.getNode(ISD::BRCOND, Op.getValueType(), - Op.getOperand(0), - DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)), - Op.getOperand(2)); - } else - return SDValue(); // Unchanged + Op.getOperand(0), Cond, Op.getOperand(2)); + } + + return SDValue(); // Unchanged } static SDValue @@ -1896,7 +1899,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { case MVT::i64: case MVT::f32: case MVT::f64: - return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0); + return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0); } } @@ -2274,9 +2277,11 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { return result; } -static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) +static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, + const TargetLowering &TLI) { SDValue N0 = Op.getOperand(0); // Everything has at least one operand + MVT ShiftVT = TLI.getShiftAmountTy(); assert(Op.getValueType() == MVT::i8); switch (Opc) { @@ -2290,11 +2295,11 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) SDValue N1 = Op.getOperand(1); N0 = (N0.getOpcode() != ISD::Constant ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), + : DAG.getConstant(cast(N0)->getSExtValue(), MVT::i16)); N1 = (N1.getOpcode() != ISD::Constant ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), + : DAG.getConstant(cast(N1)->getSExtValue(), MVT::i16)); return DAG.getNode(ISD::TRUNCATE, MVT::i8, DAG.getNode(Opc, MVT::i16, N0, N1)); @@ -2307,13 +2312,13 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) : DAG.getConstant(cast(N0)->getZExtValue(), MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i32) + N1Opc = N1.getValueType().bitsLT(ShiftVT) ? ISD::ZERO_EXTEND : ISD::TRUNCATE; N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i32, N1) + ? DAG.getNode(N1Opc, ShiftVT, N1) : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i32)); + TLI.getShiftAmountTy())); SDValue ExpandArg = DAG.getNode(ISD::OR, MVT::i16, N0, DAG.getNode(ISD::SHL, MVT::i16, @@ -2328,14 +2333,13 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) N0 = (N0.getOpcode() != ISD::Constant ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) : DAG.getConstant(cast(N0)->getZExtValue(), - MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) + MVT::i32)); + N1Opc = N1.getValueType().bitsLT(ShiftVT) ? ISD::ZERO_EXTEND : ISD::TRUNCATE; N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); + ? DAG.getNode(N1Opc, ShiftVT, N1) + : DAG.getConstant(cast(N1)->getZExtValue(), ShiftVT)); return DAG.getNode(ISD::TRUNCATE, MVT::i8, DAG.getNode(Opc, MVT::i16, N0, N1)); } @@ -2344,15 +2348,15 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) unsigned N1Opc; N0 = (N0.getOpcode() != ISD::Constant ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getZExtValue(), + : DAG.getConstant(cast(N0)->getSExtValue(), MVT::i16)); - N1Opc = N1.getValueType().bitsLT(MVT::i16) + N1Opc = N1.getValueType().bitsLT(ShiftVT) ? ISD::SIGN_EXTEND : ISD::TRUNCATE; N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) + ? DAG.getNode(N1Opc, ShiftVT, N1) : DAG.getConstant(cast(N1)->getZExtValue(), - MVT::i16)); + ShiftVT)); return DAG.getNode(ISD::TRUNCATE, MVT::i8, DAG.getNode(Opc, MVT::i16, N0, N1)); } @@ -2366,7 +2370,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE; N1 = (N1.getOpcode() != ISD::Constant ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getZExtValue(), + : DAG.getConstant(cast(N1)->getSExtValue(), MVT::i16)); return DAG.getNode(ISD::TRUNCATE, MVT::i8, DAG.getNode(Opc, MVT::i16, N0, N1)); @@ -2397,7 +2401,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n"); SDValue PromoteScalar = - DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0); + DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); if (Opc != ISD::SIGN_EXTEND) { // Use a shuffle to zero extend the i32 to i64 directly: @@ -2438,9 +2442,9 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) // Turn operands into vectors to satisfy type checking (shufb works on // vectors) SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); + DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); SDValue Op1 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); + DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); SmallVector ShufBytes; // Create the shuffle mask for "rotating" the borrow up one register slot @@ -2467,9 +2471,9 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) // Turn operands into vectors to satisfy type checking (shufb works on // vectors) SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); + DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); SDValue Op1 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); + DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); SmallVector ShufBytes; // Create the shuffle mask for "rotating" the borrow up one register slot @@ -2495,7 +2499,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) case ISD::SHL: { SDValue ShiftAmt = Op.getOperand(1); MVT ShiftAmtVT = ShiftAmt.getValueType(); - SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0); + SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0); SDValue MaskLower = DAG.getNode(SPUISD::SELB, VecVT, Op0Vec, @@ -2540,7 +2544,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) case ISD::SRA: { // Promote Op0 to vector SDValue Op0 = - DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); + DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); SDValue ShiftAmt = Op.getOperand(1); MVT ShiftVT = ShiftAmt.getValueType(); @@ -2669,7 +2673,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue N = Op.getOperand(0); SDValue Elt0 = DAG.getConstant(0, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N); SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0); @@ -2686,7 +2690,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); SDValue Shift1 = DAG.getConstant(8, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N); SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers @@ -2720,7 +2724,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { SDValue Shift1 = DAG.getConstant(16, MVT::i32); SDValue Shift2 = DAG.getConstant(8, MVT::i32); - SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N); SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers @@ -2760,6 +2764,32 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +//! Lower ISD::SETCC +/*! + Lower i64 condition code handling. + */ + +static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + SDValue condition = Op.getOperand(2); + + if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) { + // Expand the i64 comparisons to what Cell can actually support, + // which is eq, ugt and sgt: +#if 0 + CondCodeSDNode *ccvalue = dyn_cast(condition); + + switch (ccvalue->get()) { + case + } +#endif + } + + return SDValue(); +} + //! Lower ISD::SELECT_CC /*! ISD::SELECT_CC can (generally) be implemented directly on the SPU using the @@ -2772,7 +2802,8 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { assumption, given the simplisitc uses so far. */ -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { MVT VT = Op.getValueType(); SDValue lhs = Op.getOperand(0); SDValue rhs = Op.getOperand(1); @@ -2780,12 +2811,20 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SDValue falseval = Op.getOperand(3); SDValue condition = Op.getOperand(4); + // NOTE: SELB's arguments: $rA, $rB, $mask + // + // SELB selects bits from $rA where bits in $mask are 0, bits from $rB + // where bits in $mask are 1. CCond will be inverted, having 1s where the + // condition was true and 0s where the condition was false. Hence, the + // arguments to SELB get reversed. + // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up // with another "cannot select select_cc" assert: - SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition); - return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare); + SDValue compare = DAG.getNode(ISD::SETCC, TLI.getSetCCResultType(Op), + lhs, rhs, condition); + return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare); } //! Custom lower ISD::TRUNCATE @@ -2799,89 +2838,29 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) MVT Op0VT = Op0.getValueType(); MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); - SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0); + // Create shuffle mask + if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { + // least significant doubleword of quadword + unsigned maskHigh = 0x08090a0b; + unsigned maskLow = 0x0c0d0e0f; + // Use a shuffle to perform the truncation + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32), + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32)); - unsigned maskLow; - unsigned maskHigh; - // Create shuffle mask - switch (Op0VT.getSimpleVT()) { - case MVT::i128: - switch (simpleVT) { - case MVT::i64: - // least significant doubleword of quadword - maskHigh = 0x08090a0b; - maskLow = 0x0c0d0e0f; - break; - case MVT::i32: - // least significant word of quadword - maskHigh = maskLow = 0x0c0d0e0f; - break; - case MVT::i16: - // least significant halfword of quadword - maskHigh = maskLow = 0x0e0f0e0f; - break; - case MVT::i8: - // least significant byte of quadword - maskHigh = maskLow = 0x0f0f0f0f; - break; - default: - cerr << "Truncation to illegal type!"; - abort(); - } - break; - case MVT::i64: - switch (simpleVT) { - case MVT::i32: - // least significant word of doubleword - maskHigh = maskLow = 0x04050607; - break; - case MVT::i16: - // least significant halfword of doubleword - maskHigh = maskLow = 0x06070607; - break; - case MVT::i8: - // least significant byte of doubleword - maskHigh = maskLow = 0x07070707; - break; - default: - cerr << "Truncation to illegal type!"; - abort(); - } - break; - case MVT::i32: - case MVT::i16: - switch (simpleVT) { - case MVT::i16: - // least significant halfword of word - maskHigh = maskLow = 0x02030203; - break; - case MVT::i8: - // least significant byte of word/halfword - maskHigh = maskLow = 0x03030303; - break; - default: - cerr << "Truncation to illegal type!"; - abort(); - } - break; - default: - cerr << "Trying to lower truncation from illegal type!"; - abort(); - } + SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); - // Use a shuffle to perform the truncation - SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32), - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32)); + SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, + PromoteScalar, PromoteScalar, shufMask); - SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, - PromoteScalar, PromoteScalar, shufMask); + return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, + DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle)); + } - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle)); + return SDValue(); // Leave the truncate unmolested } //! Custom (target-specific) lowering entry point @@ -2921,7 +2900,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::ConstantFP: return LowerConstantFP(Op, DAG); case ISD::BRCOND: - return LowerBRCOND(Op, DAG); + return LowerBRCOND(Op, DAG, *this); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); case ISD::CALL: @@ -2942,7 +2921,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SHL: case ISD::SRA: { if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc); + return LowerI8Math(Op, DAG, Opc, *this); else if (VT == MVT::i64) return LowerI64Math(Op, DAG, Opc); break; @@ -2971,7 +2950,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) if (VT.isVector()) return LowerVectorMUL(Op, DAG); else if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc); + return LowerI8Math(Op, DAG, Opc, *this); else return LowerMUL(Op, DAG, VT, Opc); @@ -2990,10 +2969,13 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return LowerCTPOP(Op, DAG); case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG); + return LowerSELECT_CC(Op, DAG, *this); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); + + case ISD::SETCC: + return LowerSETCC(Op, DAG); } return SDValue(); @@ -3036,7 +3018,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(0); // everything has at least one operand MVT NodeVT = N->getValueType(0); // The node's value type - MVT Op0VT = Op0.getValueType(); // The first operand's result + MVT Op0VT = Op0.getValueType(); // The first operand's result SDValue Result; // Initially, empty result switch (N->getOpcode()) { @@ -3044,49 +3026,53 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case ISD::ADD: { SDValue Op1 = N->getOperand(1); - if (isa(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) { - SDValue Op01 = Op0.getOperand(1); - if (Op01.getOpcode() == ISD::Constant - || Op01.getOpcode() == ISD::TargetConstant) { - // (add , (SPUindirect , )) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op1); - ConstantSDNode *CN1 = cast(Op01); - SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT); + if (Op0.getOpcode() == SPUISD::IndirectAddr + || Op1.getOpcode() == SPUISD::IndirectAddr) { + // Normalize the operands to reduce repeated code + SDValue IndirectArg = Op0, AddArg = Op1; + + if (Op1.getOpcode() == SPUISD::IndirectAddr) { + IndirectArg = Op1; + AddArg = Op0; + } + + if (isa(AddArg)) { + ConstantSDNode *CN0 = cast (AddArg); + SDValue IndOp1 = IndirectArg.getOperand(1); + + if (CN0->isNullValue()) { + // (add (SPUindirect , ), 0) -> + // (SPUindirect , ) #if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { cerr << "\n" - << "Replace: (add " << CN0->getZExtValue() << ", " - << "(SPUindirect , " << CN1->getZExtValue() << "))\n" + << "Replace: (add (SPUindirect , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return IndirectArg; + } else if (isa(IndOp1)) { + // (add (SPUindirect , ), ) -> + // (SPUindirect , ) + ConstantSDNode *CN1 = cast (IndOp1); + int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); + SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (add (SPUindirect , " << CN1->getSExtValue() + << "), " << CN0->getSExtValue() << ")\n" << "With: (SPUindirect , " - << CN0->getZExtValue() + CN1->getZExtValue() << ")\n"; - } + << combinedConst << ")\n"; + } #endif - return DAG.getNode(SPUISD::IndirectAddr, Op0VT, - Op0.getOperand(0), combinedConst); - } - } else if (isa(Op0) - && Op1.getOpcode() == SPUISD::IndirectAddr) { - SDValue Op11 = Op1.getOperand(1); - if (Op11.getOpcode() == ISD::Constant - || Op11.getOpcode() == ISD::TargetConstant) { - // (add (SPUindirect , ), ) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op0); - ConstantSDNode *CN1 = cast(Op11); - SDValue combinedConst = - DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT); - - DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", " - << "(SPUindirect , " << CN1->getZExtValue() << "))\n"); - DEBUG(cerr << "With: (SPUindirect , " - << CN0->getZExtValue() + CN1->getZExtValue() << ")\n"); - - return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(), - Op1.getOperand(0), combinedConst); + return DAG.getNode(SPUISD::IndirectAddr, Op0VT, + IndirectArg, combinedValue); + } } } break; @@ -3127,6 +3113,25 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const return Op0; } + } else if (Op0.getOpcode() == ISD::ADD) { + SDValue Op1 = N->getOperand(1); + if (ConstantSDNode *CN1 = dyn_cast(Op1)) { + // (SPUindirect (add , ), 0) -> + // (SPUindirect , ) + if (CN1->isNullValue()) { + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + cerr << "\n" + << "Replace: (SPUindirect (add , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, Op0VT, + Op0.getOperand(0), Op0.getOperand(1)); + } + } } break; } @@ -3136,19 +3141,19 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case SPUISD::VEC_SRL: case SPUISD::VEC_SRA: case SPUISD::ROTQUAD_RZ_BYTES: - case SPUISD::ROTQUAD_RZ_BITS: { + case SPUISD::ROTQUAD_RZ_BITS: + case SPUISD::ROTBYTES_LEFT: { SDValue Op1 = N->getOperand(1); - if (isa(Op1)) { - // Kill degenerate vector shifts: - ConstantSDNode *CN = cast(Op1); - if (CN->getZExtValue() == 0) { + // Kill degenerate vector shifts: + if (ConstantSDNode *CN = dyn_cast(Op1)) { + if (CN->isNullValue()) { Result = Op0; } } break; } - case SPUISD::PROMOTE_SCALAR: { + case SPUISD::PREFSLOT2VEC: { switch (Op0.getOpcode()) { default: break; @@ -3263,7 +3268,7 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case CNTB: #endif - case SPUISD::PROMOTE_SCALAR: { + case SPUISD::PREFSLOT2VEC: { SDValue Op0 = Op.getOperand(0); MVT Op0VT = Op0.getValueType(); unsigned Op0VTBits = Op0VT.getSizeInBits(); @@ -3306,7 +3311,25 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, #endif } } + +unsigned +SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + switch (Op.getOpcode()) { + default: + return 1; + case ISD::SETCC: { + MVT VT = Op.getValueType(); + + if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { + VT = MVT::i32; + } + return VT.getSizeInBits(); + } + } +} + // LowerAsmOperandForConstraint void SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index dd1f97f8d35..8d2e9945455 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -39,7 +39,7 @@ namespace llvm { SHUFB, ///< Vector shuffle (permute) SHUFFLE_MASK, ///< Shuffle mask CNTB, ///< Count leading ones in bytes - PROMOTE_SCALAR, ///< Promote scalar->vector + PREFSLOT2VEC, ///< Promote scalar->vector VEC2PREFSLOT, ///< Extract element 0 MPY, ///< 16-bit Multiply (low parts of a 32-bit) MPYU, ///< Multiply Unsigned @@ -58,6 +58,7 @@ namespace llvm { ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) SELB, ///< Select bits -> (b & mask) | (a & ~mask) + GATHER_BITS, ///< Gather bits from bytes/words/halfwords ADD_EXTENDED, ///< Add extended, with carry CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED SUB_EXTENDED, ///< Subtract extended, with borrow @@ -120,6 +121,9 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const; + virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth = 0) const; + ConstraintType getConstraintType(const std::string &ConstraintLetter) const; std::pair diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td index f423dfa3420..710196467bc 100644 --- a/lib/Target/CellSPU/SPUInstrFormats.td +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -120,9 +120,8 @@ class CVTIntFPForm opcode, dag OOL, dag IOL, string asmstr, } let RA = 0 in { - class BICondForm opcode, string asmstr, list pattern> - : RRForm + class BICondForm opcode, dag OOL, dag IOL, string asmstr, list pattern> + : RRForm { } let RT = 0 in { diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 442d49141b1..37a58705795 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -34,10 +34,14 @@ namespace { inline bool isCondBranch(const MachineInstr *I) { unsigned opc = I->getOpcode(); - return (opc == SPU::BRNZ - || opc == SPU::BRZ - || opc == SPU::BRHNZ - || opc == SPU::BRHZ); + return (opc == SPU::BRNZr32 + || opc == SPU::BRNZv4i32 + || opc == SPU::BRZr32 + || opc == SPU::BRZv4i32 + || opc == SPU::BRHNZr16 + || opc == SPU::BRHNZv8i16 + || opc == SPU::BRHZr16 + || opc == SPU::BRHZv8i16); } } @@ -103,6 +107,19 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, return true; } break; + case SPU::LRr8: + case SPU::LRr16: + case SPU::LRr32: + case SPU::LRf32: + case SPU::LRr64: + case SPU::LRf64: + case SPU::LRr128: + case SPU::LRv16i8: + case SPU::LRv8i16: + case SPU::LRv4i32: + case SPU::LRv4f32: + case SPU::LRv2i64: + case SPU::LRv2f64: case SPU::ORv16i8_i8: case SPU::ORv8i16_i16: case SPU::ORv4i32_i32: @@ -114,7 +131,18 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, case SPU::ORi32_v4i32: case SPU::ORi64_v2i64: case SPU::ORf32_v4f32: - case SPU::ORf64_v2f64: + case SPU::ORf64_v2f64: { + assert(MI.getNumOperands() == 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "invalid SPU OR_ instruction!"); + if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { + sourceReg = MI.getOperand(0).getReg(); + destReg = MI.getOperand(0).getReg(); + return true; + } + break; + } case SPU::ORv16i8: case SPU::ORv8i16: case SPU::ORv4i32: @@ -198,18 +226,14 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case SPU::STQDr8: { const MachineOperand MOp1 = MI->getOperand(1); const MachineOperand MOp2 = MI->getOperand(2); - if (MOp1.isImm() - && (MOp2.isFI() - || (MOp2.isReg() && MOp2.getReg() == SPU::R1))) { - if (MOp2.isFI()) - FrameIndex = MOp2.getIndex(); - else - FrameIndex = MOp1.getImm() / SPUFrameInfo::stackSlotSize(); + if (MOp1.isImm() && MOp2.isFI()) { + FrameIndex = MOp2.getIndex(); return MI->getOperand(0).getReg(); } break; } - case SPU::STQXv16i8: +#if 0 + case SPU::STQXv16i8: case SPU::STQXv8i16: case SPU::STQXv4i32: case SPU::STQXv4f32: @@ -226,6 +250,7 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; +#endif } return 0; } @@ -292,6 +317,8 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16); } else if (RC == SPU::R8CRegisterClass) { opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8); + } else if (RC == SPU::VECREGRegisterClass) { + opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8; } else { assert(0 && "Unknown regclass!"); abort(); @@ -366,6 +393,8 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16); } else if (RC == SPU::R8CRegisterClass) { opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8); + } else if (RC == SPU::VECREGRegisterClass) { + opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8; } else { assert(0 && "Unknown regclass in loadRegFromStackSlot!"); abort(); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 2338a0318ba..08d767684af 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1,10 +1,10 @@ //==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // Cell SPU Instructions: //===----------------------------------------------------------------------===// @@ -49,14 +49,14 @@ def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$fi let canFoldAsLoad = 1 in { class LoadDFormVec - : RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src), + : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src), "lqd\t$rT, $src", LoadStore, [(set (vectype VECREG:$rT), (load dform_addr:$src))]> { } class LoadDForm - : RI10Form<0b00101100, (outs rclass:$rT), (ins memri10:$src), + : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src), "lqd\t$rT, $src", LoadStore, [(set rclass:$rT, (load dform_addr:$src))]> @@ -161,14 +161,14 @@ let canFoldAsLoad = 1 in { // Stores: //===----------------------------------------------------------------------===// class StoreDFormVec - : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src), + : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src), "stqd\t$rT, $src", LoadStore, [(store (vectype VECREG:$rT), dform_addr:$src)]> { } class StoreDForm - : RI10Form<0b00100100, (outs), (ins rclass:$rT, memri10:$src), + : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src), "stqd\t$rT, $src", LoadStore, [(store rclass:$rT, dform_addr:$src)]> @@ -269,7 +269,7 @@ def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), // Generate Controls for Insertion: //===----------------------------------------------------------------------===// -def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src), +def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), "cbd\t$rT, $src", ShuffleOp, [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; @@ -277,7 +277,7 @@ def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src), "cbx\t$rT, $src", ShuffleOp, [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src), +def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), "chd\t$rT, $src", ShuffleOp, [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; @@ -285,7 +285,7 @@ def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src), "chx\t$rT, $src", ShuffleOp, [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src), +def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), "cwd\t$rT, $src", ShuffleOp, [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; @@ -293,7 +293,7 @@ def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), "cwx\t$rT, $src", ShuffleOp, [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src), +def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), "cwd\t$rT, $src", ShuffleOp, [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; @@ -301,7 +301,7 @@ def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), "cwx\t$rT, $src", ShuffleOp, [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src), +def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), "cdd\t$rT, $src", ShuffleOp, [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; @@ -309,7 +309,7 @@ def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), "cdx\t$rT, $src", ShuffleOp, [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; -def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src), +def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), "cdd\t$rT, $src", ShuffleOp, [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; @@ -421,6 +421,7 @@ multiclass ImmLoadAddress def f32: ILARegInst; def f64: ILARegInst; + def hi: ILARegInst; def lo: ILARegInst; def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val), @@ -481,37 +482,77 @@ multiclass FormSelectMaskBytesImm defm FSMBI : FormSelectMaskBytesImm; // fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits -def FSMB: - RRForm_1<0b01101101100, (outs VECREG:$rT), (ins R16C:$rA), - "fsmb\t$rT, $rA", SelectOp, - [(set (v16i8 VECREG:$rT), (SPUselmask R16C:$rA))]>; +class FSMBInst pattern>: + RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp, + pattern>; + +class FSMBRegInst: + FSMBInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMBVecInst: + FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskBits { + def v16i8_r16: FSMBRegInst; + def v16i8: FSMBVecInst; +} + +defm FSMB: FormSelectMaskBits; // fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is // only 8-bits wide (even though it's input as 16-bits here) -def FSMH: - RRForm_1<0b10101101100, (outs VECREG:$rT), (ins R16C:$rA), - "fsmh\t$rT, $rA", SelectOp, - [(set (v8i16 VECREG:$rT), (SPUselmask R16C:$rA))]>; + +class FSMHInst pattern>: + RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp, + pattern>; + +class FSMHRegInst: + FSMHInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMHVecInst: + FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskHalfword { + def v8i16_r16: FSMHRegInst; + def v8i16: FSMHVecInst; +} + +defm FSMH: FormSelectMaskHalfword; // fsm: Form select mask for words. Like the other fsm* instructions, // only the lower 4 bits of $rA are significant. -class FSMInst: - RRForm_1<0b00101101100, (outs VECREG:$rT), (ins rclass:$rA), - "fsm\t$rT, $rA", - SelectOp, - [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMInst pattern>: + RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp, + pattern>; + +class FSMRegInst: + FSMInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMVecInst: + FSMInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>; multiclass FormSelectMaskWord { - def r32 : FSMInst; - def r16 : FSMInst; + def v4i32: FSMVecInst; + + def r32 : FSMRegInst; + def r16 : FSMRegInst; } defm FSM : FormSelectMaskWord; // Special case when used for i64 math operations multiclass FormSelectMaskWord64 { - def r32 : FSMInst; - def r16 : FSMInst; + def r32 : FSMRegInst; + def r16 : FSMRegInst; } defm FSM64 : FormSelectMaskWord64; @@ -736,7 +777,7 @@ defm BG : BorrowGenerate; // BGX: Borrow generate, extended. def BGXvec: RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, - VECREG:$rCarry), + VECREG:$rCarry), "bgx\t$rT, $rA, $rB", IntegerOp, []>, RegConstraint<"$rCarry = $rT">, @@ -898,20 +939,31 @@ def MPYHHAUr32: []>; // clz: Count leading zeroes -def CLZv4i32: - RRForm_1<0b10100101010, (outs VECREG:$rT), (ins VECREG:$rA), - "clz\t$rT, $rA", IntegerOp, - [/* intrinsic */]>; +class CLZInst pattern>: + RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA", + IntegerOp, pattern>; -def CLZr32: - RRForm_1<0b10100101010, (outs R32C:$rT), (ins R32C:$rA), - "clz\t$rT, $rA", IntegerOp, - [(set R32C:$rT, (ctlz R32C:$rA))]>; +class CLZRegInst: + CLZInst<(outs rclass:$rT), (ins rclass:$rA), + [(set rclass:$rT, (ctlz rclass:$rA))]>; + +class CLZVecInst: + CLZInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>; + +multiclass CountLeadingZeroes { + def v4i32 : CLZVecInst; + def r32 : CLZRegInst; +} + +defm CLZ : CountLeadingZeroes; // cntb: Count ones in bytes (aka "population count") +// // NOTE: This instruction is really a vector instruction, but the custom // lowering code uses it in unorthodox ways to support CTPOP for other // data types! + def CNTBv16i8: RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), "cntb\t$rT, $rA", IntegerOp, @@ -927,26 +979,88 @@ def CNTBv4i32 : "cntb\t$rT, $rA", IntegerOp, [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>; -// gbb: Gather all low order bits from each byte in $rA into a single 16-bit -// quantity stored into $rT -def GBB: - RRForm_1<0b01001101100, (outs R16C:$rT), (ins VECREG:$rA), - "gbb\t$rT, $rA", GatherOp, - []>; +// gbb: Gather the low order bits from each byte in $rA into a single 16-bit +// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are +// slots 1-3. +// +// Note: This instruction "pairs" with the fsmb instruction for all of the +// various types defined here. +// +// Note 2: The "VecInst" and "RegInst" forms refer to the result being either +// a vector or register. + +class GBBInst pattern>: + RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>; + +class GBBRegInst: + GBBInst<(outs rclass:$rT), (ins VECREG:$rA), + [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>; + +class GBBVecInst: + GBBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), (SPUgatherbits (vectype VECREG:$rA)))]>; + +multiclass GatherBitsFromBytes { + def v16i8_r32: GBBRegInst; + def v16i8_r16: GBBRegInst; + def v16i8: GBBVecInst; +} + +defm GBB: GatherBitsFromBytes; // gbh: Gather all low order bits from each halfword in $rA into a single -// 8-bit quantity stored in $rT -def GBH: - RRForm_1<0b10001101100, (outs R16C:$rT), (ins VECREG:$rA), - "gbh\t$rT, $rA", GatherOp, - []>; +// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0 +// and slots 1-3 also set to 0. +// +// See notes for GBBInst, above. + +class GBHInst pattern>: + RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp, + pattern>; + +class GBHRegInst: + GBHInst<(outs rclass:$rT), (ins VECREG:$rA), + [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>; + +class GBHVecInst: + GBHInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUgatherbits (vectype VECREG:$rA)))]>; + +multiclass GatherBitsHalfword { + def v8i16_r32: GBHRegInst; + def v8i16_r16: GBHRegInst; + def v8i16: GBHVecInst; +} + +defm GBH: GatherBitsHalfword; // gb: Gather all low order bits from each word in $rA into a single -// 4-bit quantity stored in $rT -def GB: - RRForm_1<0b00001101100, (outs R16C:$rT), (ins VECREG:$rA), - "gb\t$rT, $rA", GatherOp, - []>; +// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0, +// as well as slots 1-3. +// +// See notes for gbb, above. + +class GBInst pattern>: + RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp, + pattern>; + +class GBRegInst: + GBInst<(outs rclass:$rT), (ins VECREG:$rA), + [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>; + +class GBVecInst: + GBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUgatherbits (vectype VECREG:$rA)))]>; + +multiclass GatherBitsWord { + def v4i32_r32: GBRegInst; + def v4i32_r16: GBRegInst; + def v4i32: GBVecInst; +} + +defm GB: GatherBitsWord; // avgb: average bytes def AVGB: @@ -976,30 +1090,26 @@ class XSBHVecInst: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>; -class XSBHRegInst: +class XSBHInRegInst: XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), [(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>; multiclass ExtendByteHalfword { def v16i8: XSBHVecInst; - def r16: XSBHRegInst; + def r16: XSBHInRegInst; + def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), + [(set R16C:$rDst, (sext R8C:$rSrc))]>; // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 // pattern below). Intentionally doesn't match a pattern because we want the // sext 8->32 pattern to do the work for us, namely because we need the extra // XSHWr32. - def r32: XSBHRegInst; + def r32: XSBHInRegInst; } defm XSBH : ExtendByteHalfword; -// Sign-extend, but take an 8-bit register to a 16-bit register (not done as -// sext_inreg) -def XSBHr8: - XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), - [(set R16C:$rDst, (sext R8C:$rSrc))]>; - // Sign extend halfwords to words: def XSHWvec: RRForm_1<0b01101101010, (outs VECREG:$rDest), (ins VECREG:$rSrc), @@ -1208,13 +1318,44 @@ class ORRegInst: ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>; +// ORCvtForm: OR conversion form +// +// This is used to "convert" the preferred slot to its vector equivalent, as +// well as convert a vector back to its preferred slot. +// +// These are effectively no-ops, but need to exist for proper type conversion +// and type coercion. + +class ORCvtForm + : SPUInstr { + bits<7> RA; + bits<7> RT; + + let Pattern = [/* no pattern */]; + + let Inst{0-10} = 0b10000010000; + let Inst{11-17} = RA; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + class ORPromoteScalar: - ORInst<(outs VECREG:$rT), (ins rclass:$rA, rclass:$rB), - [/* no pattern */]>; + ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>; class ORExtractElt: - ORInst<(outs rclass:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; + ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>; + +class ORCvtRegGPRC: + ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; + +class ORCvtVecGPRC: + ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; + +class ORCvtGPRCReg: + ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; + +class ORCvtGPRCVec: + ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; multiclass BitwiseOr { @@ -1229,7 +1370,7 @@ multiclass BitwiseOr (v4i32 VECREG:$rB)))))]>; def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v2f64 VECREG:$rT), + [(set (v2f64 VECREG:$rT), (v2f64 (bitconvert (or (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)))))]>; @@ -1260,48 +1401,115 @@ multiclass BitwiseOr def i64_v2i64: ORExtractElt; def f32_v4f32: ORExtractElt; def f64_v2f64: ORExtractElt; + + // Conversion from GPRC to register + def i128_r64: ORCvtRegGPRC; + def i128_f64: ORCvtRegGPRC; + def i128_r32: ORCvtRegGPRC; + def i128_f32: ORCvtRegGPRC; + def i128_r16: ORCvtRegGPRC; + def i128_r8: ORCvtRegGPRC; + + // Conversion from GPRC to vector + def i128_vec: ORCvtVecGPRC; + + // Conversion from register to GPRC + def r64_i128: ORCvtGPRCReg; + def f64_i128: ORCvtGPRCReg; + def r32_i128: ORCvtGPRCReg; + def f32_i128: ORCvtGPRCReg; + def r16_i128: ORCvtGPRCReg; + def r8_i128: ORCvtGPRCReg; + + // Conversion from vector to GPRC + def vec_i128: ORCvtGPRCVec; } defm OR : BitwiseOr; -// scalar->vector promotion patterns: -def : Pat<(v16i8 (SPUpromote_scalar R8C:$rA)), - (ORv16i8_i8 R8C:$rA, R8C:$rA)>; +// scalar->vector promotion patterns (preferred slot to vector): +def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)), + (ORv16i8_i8 R8C:$rA)>; -def : Pat<(v8i16 (SPUpromote_scalar R16C:$rA)), - (ORv8i16_i16 R16C:$rA, R16C:$rA)>; +def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), + (ORv8i16_i16 R16C:$rA)>; -def : Pat<(v4i32 (SPUpromote_scalar R32C:$rA)), - (ORv4i32_i32 R32C:$rA, R32C:$rA)>; +def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), + (ORv4i32_i32 R32C:$rA)>; -def : Pat<(v2i64 (SPUpromote_scalar R64C:$rA)), - (ORv2i64_i64 R64C:$rA, R64C:$rA)>; +def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), + (ORv2i64_i64 R64C:$rA)>; -def : Pat<(v4f32 (SPUpromote_scalar R32FP:$rA)), - (ORv4f32_f32 R32FP:$rA, R32FP:$rA)>; +def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)), + (ORv4f32_f32 R32FP:$rA)>; -def : Pat<(v2f64 (SPUpromote_scalar R64FP:$rA)), - (ORv2f64_f64 R64FP:$rA, R64FP:$rA)>; +def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)), + (ORv2f64_f64 R64FP:$rA)>; -// ORi*_v*: Used to extract vector element 0 (the preferred slot) +// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise +// known as converting the vector back to its preferred slot def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)), - (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>; + (ORi8_v16i8 VECREG:$rA)>; def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)), - (ORi16_v8i16 VECREG:$rA, VECREG:$rA)>; + (ORi16_v8i16 VECREG:$rA)>; def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)), - (ORi32_v4i32 VECREG:$rA, VECREG:$rA)>; + (ORi32_v4i32 VECREG:$rA)>; def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)), - (ORi64_v2i64 VECREG:$rA, VECREG:$rA)>; + (ORi64_v2i64 VECREG:$rA)>; def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)), - (ORf32_v4f32 VECREG:$rA, VECREG:$rA)>; + (ORf32_v4f32 VECREG:$rA)>; def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)), - (ORf64_v2f64 VECREG:$rA, VECREG:$rA)>; + (ORf64_v2f64 VECREG:$rA)>; + +// Load Register: This is an assembler alias for a bitwise OR of a register +// against itself. It's here because it brings some clarity to assembly +// language output. + +let hasCtrlDep = 1 in { + class LRInst + : SPUInstr { + bits<7> RA; + bits<7> RT; + + let Pattern = [/*no pattern*/]; + + let Inst{0-10} = 0b10000010000; /* It's an OR operation */ + let Inst{11-17} = RA; + let Inst{18-24} = RA; + let Inst{25-31} = RT; + } + + class LRVecInst: + LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>; + + class LRRegInst: + LRInst<(outs rclass:$rT), (ins rclass:$rA)>; + + multiclass LoadRegister { + def v2i64: LRVecInst; + def v2f64: LRVecInst; + def v4i32: LRVecInst; + def v4f32: LRVecInst; + def v8i16: LRVecInst; + def v16i8: LRVecInst; + + def r128: LRRegInst; + def r64: LRRegInst; + def f64: LRRegInst; + def r32: LRRegInst; + def f32: LRRegInst; + def r16: LRRegInst; + def r8: LRRegInst; + } + + defm LR: LoadRegister; +} // ORC: Bitwise "or" with complement (c = a | ~b) @@ -1585,12 +1793,24 @@ class SELBVecInst: (and (vnot (vectype VECREG:$rC)), (vectype VECREG:$rA))))]>; +class SELBVecCondInst: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), + [(set (vectype VECREG:$rT), + (select R32C:$rC, + (vectype VECREG:$rB), + (vectype VECREG:$rA)))]>; + class SELBRegInst: SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC), [(set rclass:$rT, (or (and rclass:$rA, rclass:$rC), (and rclass:$rB, (not rclass:$rC))))]>; +class SELBRegCondInst: + SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC), + [(set rclass:$rT, + (select rcond:$rC, rclass:$rB, rclass:$rA))]>; + multiclass SelectBits { def v16i8: SELBVecInst; @@ -1603,6 +1823,16 @@ multiclass SelectBits def r32: SELBRegInst; def r16: SELBRegInst; def r8: SELBRegInst; + + def v16i8_cond: SELBVecCondInst; + def v8i16_cond: SELBVecCondInst; + def v4i32_cond: SELBVecCondInst; + def v2i64_cond: SELBVecCondInst; + + // SELBr64_cond is defined further down, look for i64 comparisons + def r32_cond: SELBRegCondInst; + def r16_cond: SELBRegCondInst; + def r8_cond: SELBRegCondInst; } defm SELB : SelectBits; @@ -1625,14 +1855,6 @@ def : SPUselbPatReg; def : SPUselbPatReg; def : SPUselbPatReg; -class SelectConditional: - Pat<(select rclass:$rCond, rclass:$rTrue, rclass:$rFalse), - (inst rclass:$rFalse, rclass:$rTrue, rclass:$rCond)>; - -def : SelectConditional; -def : SelectConditional; -def : SelectConditional; - // EQV: Equivalence (1 for each same bit, otherwise 0) // // Note: There are a lot of ways to match this bit operator and these patterns @@ -1753,6 +1975,10 @@ class SHUFBVecInst: (resultvec VECREG:$rB), (maskvec VECREG:$rC)))]>; +class SHUFBGPRCInst: + SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC), + [/* no pattern */]>; + multiclass ShuffleBytes { def v16i8 : SHUFBVecInst; @@ -1769,6 +1995,8 @@ multiclass ShuffleBytes def v2f64 : SHUFBVecInst; def v2f64_m32 : SHUFBVecInst; + + def gprc : SHUFBGPRCInst; } defm SHUFB : ShuffleBytes; @@ -2027,7 +2255,7 @@ defm ROTHI: RotateLeftHalfwordImm; def : Pat<(SPUvec_rotl VECREG:$rA, (i32 uimm7:$val)), (ROTHIv8i16 VECREG:$rA, imm:$val)>; - + //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // Rotate word: //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ @@ -2207,7 +2435,7 @@ multiclass RotateQuadByBitCount } defm ROTQBI: RotateQuadByBitCount; - + class ROTQBIIInst pattern>: RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val", RotateShift, pattern>; @@ -2298,7 +2526,7 @@ def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)), def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)), (ROTHMIv8i16 VECREG:$rA, imm:$val)>; - + def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)), (ROTHMIv8i16 VECREG:$rA, imm:$val)>; @@ -2359,7 +2587,7 @@ def ROTMIv4i32: def : Pat<(SPUvec_srl VECREG:$rA, (i16 uimm7:$val)), (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; - + def : Pat<(SPUvec_srl VECREG:$rA, (i8 uimm7:$val)), (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; @@ -2682,7 +2910,7 @@ let isTerminator = 1, isBarrier = 1 in { "hgt\t$rA, $rB", BranchResolv, [/* no pattern to match */]>; - def HGTIr32: + def HGTIr32: RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val), "hgti\t$rA, $val", BranchResolv, [/* no pattern to match */]>; @@ -2698,9 +2926,9 @@ let isTerminator = 1, isBarrier = 1 in { [/* no pattern to match */]>; } -//------------------------------------------------------------------------ -// Comparison operators: -//------------------------------------------------------------------------ +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Comparison operators for i8, i16 and i32: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ class CEQBInst pattern> : RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB", @@ -2990,8 +3218,14 @@ defm CLGTI : CmpLGtrWordImm; // define a pattern to generate the right code, as a binary operator // (in a manner of speaking.) // -// N.B.: This only matches the setcc set of conditionals. Special pattern -// matching is used for select conditionals. +// Notes: +// 1. This only matches the setcc set of conditionals. Special pattern +// matching is used for select conditionals. +// +// 2. The "DAG" versions of these classes is almost exclusively used for +// i64 comparisons. See the tblgen fundamentals documentation for what +// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern +// class for where ResultInstrs originates. //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ class SETCCNegCondReg; -def : SETCCNegCondReg; +def : SETCCNegCondReg; def : SETCCNegCondImm; -def : SETCCNegCondReg; +def : SETCCNegCondReg; def : SETCCNegCondImm; def : SETCCNegCondReg; @@ -3128,8 +3362,8 @@ class SELECTBinOpReg: Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), - rclass:$rFalse, rclass:$rTrue), - (selinstr rclass:$rTrue, rclass:$rFalse, + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rFalse, rclass:$rTrue, (binop (cmpOp1 rclass:$rA, rclass:$rB), (cmpOp2 rclass:$rA, rclass:$rB)))>; @@ -3226,54 +3460,129 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; // Various branches: - def BRNZ: - RI16Form<0b010000100, (outs), (ins R32C:$rCond, brtarget:$dest), - "brnz\t$rCond,$dest", - BranchResolv, - [(brcond R32C:$rCond, bb:$dest)]>; - - def BRZ: - RI16Form<0b000000100, (outs), (ins R32C:$rT, brtarget:$dest), - "brz\t$rT,$dest", - BranchResolv, - [/* no pattern */]>; + class BRNZInst pattern>: + RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest", + BranchResolv, pattern>; - def BRHNZ: - RI16Form<0b011000100, (outs), (ins R16C:$rCond, brtarget:$dest), - "brhnz\t$rCond,$dest", - BranchResolv, - [(brcond R16C:$rCond, bb:$dest)]>; + class BRNZRegInst: + BRNZInst<(ins rclass:$rCond, brtarget:$dest), + [(brcond rclass:$rCond, bb:$dest)]>; - def BRHZ: - RI16Form<0b001000100, (outs), (ins R16C:$rT, brtarget:$dest), - "brhz\t$rT,$dest", - BranchResolv, - [/* no pattern */]>; - -/* - def BINZ: - BICondForm<0b10010100100, "binz\t$rA, $func", - [(SPUbinz R32C:$rA, R32C:$func)]>; - - def BIZ: - BICondForm<0b00010100100, "biz\t$rA, $func", - [(SPUbiz R32C:$rA, R32C:$func)]>; -*/ + class BRNZVecInst: + BRNZInst<(ins VECREG:$rCond, brtarget:$dest), + [(brcond (vectype VECREG:$rCond), bb:$dest)]>; + + multiclass BranchNotZero { + def v4i32 : BRNZVecInst; + def r32 : BRNZRegInst; + } + + defm BRNZ : BranchNotZero; + + class BRZInst pattern>: + RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest", + BranchResolv, pattern>; + + class BRZRegInst: + BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; + + class BRZVecInst: + BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchZero { + def v4i32: BRZVecInst; + def r32: BRZRegInst; + } + + defm BRZ: BranchZero; + + // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would + // be useful: + /* + class BINZInst pattern>: + BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>; + + class BINZRegInst: + BINZInst<(ins rclass:$rA, brtarget:$dest), + [(brcond rclass:$rA, R32C:$dest)]>; + + class BINZVecInst: + BINZInst<(ins VECREG:$rA, R32C:$dest), + [(brcond (vectype VECREG:$rA), R32C:$dest)]>; + + multiclass BranchNotZeroIndirect { + def v4i32: BINZVecInst; + def r32: BINZRegInst; + } + + defm BINZ: BranchNotZeroIndirect; + + class BIZInst pattern>: + BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>; + + class BIZRegInst: + BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>; + + class BIZVecInst: + BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>; + + multiclass BranchZeroIndirect { + def v4i32: BIZVecInst; + def r32: BIZRegInst; + } + + defm BIZ: BranchZeroIndirect; + */ + + class BRHNZInst pattern>: + RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv, + pattern>; + + class BRHNZRegInst: + BRHNZInst<(ins rclass:$rCond, brtarget:$dest), + [(brcond rclass:$rCond, bb:$dest)]>; + + class BRHNZVecInst: + BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchNotZeroHalfword { + def v8i16: BRHNZVecInst; + def r16: BRHNZRegInst; + } + + defm BRHNZ: BranchNotZeroHalfword; + + class BRHZInst pattern>: + RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv, + pattern>; + + class BRHZRegInst: + BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; + + class BRHZVecInst: + BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchZeroHalfword { + def v8i16: BRHZVecInst; + def r16: BRHZRegInst; + } + + defm BRHZ: BranchZeroHalfword; } //===----------------------------------------------------------------------===// // setcc and brcond patterns: //===----------------------------------------------------------------------===// -def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), - (BRHZ R16C:$rA, bb:$dest)>; -def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), - (BRHNZ R16C:$rA, bb:$dest)>; +def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), + (BRHZr16 R16C:$rA, bb:$dest)>; +def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), + (BRHNZr16 R16C:$rA, bb:$dest)>; -def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), - (BRZ R32C:$rA, bb:$dest)>; -def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), - (BRNZ R32C:$rA, bb:$dest)>; +def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), + (BRZr32 R32C:$rA, bb:$dest)>; +def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), + (BRNZr32 R32C:$rA, bb:$dest)>; multiclass BranchCondEQ { @@ -3290,8 +3599,8 @@ multiclass BranchCondEQ (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; } -defm BRCONDeq : BranchCondEQ; -defm BRCONDne : BranchCondEQ; +defm BRCONDeq : BranchCondEQ; +defm BRCONDne : BranchCondEQ; multiclass BranchCondLGT { @@ -3308,8 +3617,8 @@ multiclass BranchCondLGT (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; } -defm BRCONDugt : BranchCondLGT; -defm BRCONDule : BranchCondLGT; +defm BRCONDugt : BranchCondLGT; +defm BRCONDule : BranchCondLGT; multiclass BranchCondLGTEQ @@ -3335,8 +3644,8 @@ multiclass BranchCondLGTEQ; } -defm BRCONDuge : BranchCondLGTEQ; -defm BRCONDult : BranchCondLGTEQ; +defm BRCONDuge : BranchCondLGTEQ; +defm BRCONDult : BranchCondLGTEQ; multiclass BranchCondGT { @@ -3353,8 +3662,8 @@ multiclass BranchCondGT (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; } -defm BRCONDgt : BranchCondGT; -defm BRCONDle : BranchCondGT; +defm BRCONDgt : BranchCondGT; +defm BRCONDle : BranchCondGT; multiclass BranchCondGTEQ @@ -3380,8 +3689,8 @@ multiclass BranchCondGTEQ; } -defm BRCONDge : BranchCondGTEQ; -defm BRCONDlt : BranchCondGTEQ; +defm BRCONDge : BranchCondGTEQ; +defm BRCONDlt : BranchCondGTEQ; let isTerminator = 1, isBarrier = 1 in { let isReturn = 1 in { @@ -3397,10 +3706,12 @@ let isTerminator = 1, isBarrier = 1 in { class FAInst pattern>: RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB", SPrecFP, pattern>; + class FAVecInst: FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [(set (vectype VECREG:$rT), (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + multiclass SFPAdd { def v4f32: FAVecInst; @@ -3548,7 +3859,7 @@ def FSCRRf32 : // floating reciprocal absolute square root estimate (frsqest) // The following are probably just intrinsics -// status and control register write +// status and control register write // status and control register read //-------------------------------------- @@ -3603,7 +3914,7 @@ def FMSf32 : // = c - a * b // NOTE: subtraction order // fsub a b = a - b -// fs a b = b - a? +// fs a b = b - a? def FNMSf32 : RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), "fnms\t$rT, $rA, $rB, $rC", SPrecFP, @@ -3612,9 +3923,9 @@ def FNMSf32 : def FNMSv4f32 : RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), + [(set (v4f32 VECREG:$rT), + (fsub (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; //-------------------------------------- @@ -3625,7 +3936,7 @@ def CSiFv4f32: "csflt\t$rT, $rA, 0", SPrecFP, [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>; -// Convert signed integer to floating point +// Convert signed integer to floating point def CSiFf32 : CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA), "csflt\t$rT, $rA, 0", SPrecFP, @@ -3642,7 +3953,7 @@ def CUiFf32 : "cuflt\t$rT, $rA, 0", SPrecFP, [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>; -// Convert float to unsigned int +// Convert float to unsigned int // Assume that scale = 0 def CFUiv4f32 : @@ -3655,7 +3966,7 @@ def CFUif32 : "cfltu\t$rT, $rA, 0", SPrecFP, [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>; -// Convert float to signed int +// Convert float to signed int // Assume that scale = 0 def CFSiv4f32 : @@ -3788,9 +4099,9 @@ def FNMSv2f64 : RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), "dfnms\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fsub (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), + [(set (v2f64 VECREG:$rT), + (fsub (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>, RegConstraint<"$rC = $rT">, NoEncode<"$rC">; @@ -3813,9 +4124,9 @@ def FNMAv2f64 : RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), "dfnma\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fneg (fadd (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), + [(set (v2f64 VECREG:$rT), + (fneg (fadd (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))))]>, RegConstraint<"$rC = $rT">, NoEncode<"$rC">; @@ -3825,7 +4136,7 @@ def FNMAv2f64 : //===----------------------------------------------------------------------==// def : Pat<(fneg (v4f32 VECREG:$rA)), - (XORfnegvec (v4f32 VECREG:$rA), + (XORfnegvec (v4f32 VECREG:$rA), (v4f32 (ILHUv4i32 0x8000)))>; def : Pat<(fneg R32FP:$rA), @@ -3944,7 +4255,7 @@ def : Pat<(f32 fpimm:$imm), def : Pat<(v4i32 v4i32Imm:$imm), (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))), (LO16_vec v4i32Imm:$imm))>; - + // 8-bit constants def : Pat<(i8 imm:$imm), (ILHr8 imm:$imm)>; @@ -4001,6 +4312,69 @@ def : Pat<(i32 (anyext R16C:$rSrc)), (ORIi16i32 R16C:$rSrc, 0)>; //===----------------------------------------------------------------------===// +// Truncates: +// These truncates are for the SPU's supported types (i8, i16, i32). i64 and +// above are custom lowered. +//===----------------------------------------------------------------------===// + +def : Pat<(i8 (trunc GPRC:$src)), + (ORi8_v16i8 + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>; + +def : Pat<(i8 (trunc R64C:$src)), + (ORi8_v16i8 + (SHUFBv2i64_m32 + (ORv2i64_i64 R64C:$src), + (ORv2i64_i64 R64C:$src), + (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>; + +def : Pat<(i8 (trunc R32C:$src)), + (ORi8_v16i8 + (SHUFBv4i32_m32 + (ORv4i32_i32 R32C:$src), + (ORv4i32_i32 R32C:$src), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + +def : Pat<(i8 (trunc R16C:$src)), + (ORi8_v16i8 + (SHUFBv4i32_m32 + (ORv8i16_i16 R16C:$src), + (ORv8i16_i16 R16C:$src), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + +def : Pat<(i16 (trunc GPRC:$src)), + (ORi16_v8i16 + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>; + +def : Pat<(i16 (trunc R64C:$src)), + (ORi16_v8i16 + (SHUFBv2i64_m32 + (ORv2i64_i64 R64C:$src), + (ORv2i64_i64 R64C:$src), + (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>; + +def : Pat<(i16 (trunc R32C:$src)), + (ORi16_v8i16 + (SHUFBv4i32_m32 + (ORv4i32_i32 R32C:$src), + (ORv4i32_i32 R32C:$src), + (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>; + +def : Pat<(i32 (trunc GPRC:$src)), + (ORi32_v4i32 + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>; + +def : Pat<(i32 (trunc R64C:$src)), + (ORi32_v4i32 + (SHUFBv2i64_m32 + (ORv2i64_i64 R64C:$src), + (ORv2i64_i64 R64C:$src), + (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>; + +//===----------------------------------------------------------------------===// // Address generation: SPU, like PPC, has to split addresses into high and // low parts in order to load them into a register. //===----------------------------------------------------------------------===// @@ -4047,3 +4421,5 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), // Instrinsics: include "CellSDKIntrinsics.td" +// 64-bit "instructions"/support +include "SPU64InstrInfo.td" diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 1ed1e3ba51e..b22c6b5d9fe 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -66,6 +66,13 @@ def SPUselb_type: SDTypeProfile<1, 3, [ def SPUvecshift_type: SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; +// SPU gather bits: +// This instruction looks at each vector (word|halfword|byte) slot's low bit +// and forms a mask in the low order bits of the first word's preferred slot. +def SPUgatherbits_type: SDTypeProfile<1, 1, [ + /* no type constraints defined */ +]>; + //===----------------------------------------------------------------------===// // Synthetic/pseudo-instructions //===----------------------------------------------------------------------===// @@ -137,14 +144,17 @@ def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; // SPU select bits instruction def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; +// SPU gather bits instruction: +def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>; + // SPU floating point interpolate def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>; // SPU floating point reciprocal estimate (used for fdiv) def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>; -def SDTpromote_scalar: SDTypeProfile<1, 1, []>; -def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>; +def SDTprefslot2vec: SDTypeProfile<1, 1, []>; +def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; def SPU_vec_demote : SDTypeProfile<1, 1, []>; def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>; diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index d788f837fc0..802628f8996 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -609,15 +609,15 @@ def symbolLSA: Operand { let PrintMethod = "printSymbolLSA"; } -// memory s7imm(reg) operaand -def memri7 : Operand { - let PrintMethod = "printMemRegImmS7"; +// Shuffle address memory operaand [s7imm(reg) d-format] +def shufaddr : Operand { + let PrintMethod = "printShufAddr"; let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg); } // memory s10imm(reg) operand -def memri10 : Operand { - let PrintMethod = "printMemRegImmS10"; +def dformaddr : Operand { + let PrintMethod = "printDFormAddr"; let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg); } diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index beea0dfb02c..cf4089fa29e 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -403,11 +403,6 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { -#if 0 - // Save and clear the LR state. - SPUFunctionInfo *FI = MF.getInfo(); - FI->setUsesLR(MF.getRegInfo().isPhysRegUsed(LR)); -#endif // Mark LR and SP unused, since the prolog spills them to stack and // we don't want anyone else to spill them for us. // diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp index 2bc0ffdb7ef..72752555e49 100644 --- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp @@ -26,6 +26,13 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) : PrivateGlobalPrefix = ".L"; // This corresponds to what the gcc SPU compiler emits, for consistency. CStringSection = ".rodata.str"; + + // BSS section needs to be emitted as ".section" + BSSSection = "\t.section\t.bss"; + BSSSection_ = getUnnamedSection("\t.section\t.bss", + SectionFlags::Writeable | SectionFlags::BSS, + true); + } /// PreferredEHDataFormat - This hook allows the target to select data diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index 4b0a957feb2..9be714ebc9b 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -2,7 +2,7 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s ; RUN: grep bisl %t1.s | count 7 ; RUN: grep ila %t1.s | count 1 -; RUN: grep rotqbyi %t1.s | count 4 +; RUN: grep rotqby %t1.s | count 6 ; RUN: grep lqa %t1.s | count 1 ; RUN: grep lqd %t1.s | count 12 ; RUN: grep dispatch_tab %t1.s | count 5 diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll new file mode 100644 index 00000000000..d2b4fc096ee --- /dev/null +++ b/test/CodeGen/CellSPU/icmp64.ll @@ -0,0 +1,144 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ceq %t1.s | count 4 +; RUN: grep cgti %t1.s | count 4 +; RUN: grep gb %t1.s | count 4 +; RUN: grep fsm %t1.s | count 2 +; RUN: grep xori %t1.s | count 1 +; RUN: grep selb %t1.s | count 2 + +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2 +; $3 = %arg1, $4 = %val1, $5 = %val2 +; +; i64 integer comparisons: +define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp eq i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp eq i64 %arg1, %arg2 + ret i1 %A +} + +define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ne i64 %arg1, %arg2 + %B = select i1 %A, i64 %val1, i64 %val2 + ret i64 %B +} + +define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +entry: + %A = icmp ne i64 %arg1, %arg2 + ret i1 %A +} + +;; define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp ugt i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp ugt i64 %arg1, %arg2 +;; ret i1 %A +;; } +;; +;; define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp uge i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp uge i64 %arg1, %arg2 +;; ret i1 %A +;; } +;; +;; define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp ult i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp ult i64 %arg1, %arg2 +;; ret i1 %A +;; } +;; +;; define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp ule i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp ule i64 %arg1, %arg2 +;; ret i1 %A +;; } +;; +;; define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp sgt i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp sgt i64 %arg1, %arg2 +;; ret i1 %A +;; } +;; +;; define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp sge i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp sge i64 %arg1, %arg2 +;; ret i1 %A +;; } +;; +;; define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp slt i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp slt i64 %arg1, %arg2 +;; ret i1 %A +;; } +;; +;; define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp sle i64 %arg1, %arg2 +;; %B = select i1 %A, i64 %val1, i64 %val2 +;; ret i64 %B +;; } +;; +;; define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind { +;; entry: +;; %A = icmp sle i64 %arg1, %arg2 +;; ret i1 %A +;; } diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll index 28d2e5b0a89..f2f35ef4dbc 100644 --- a/test/CodeGen/CellSPU/stores.ll +++ b/test/CodeGen/CellSPU/stores.ll @@ -3,8 +3,17 @@ ; RUN: grep {stqd.*16(\$3)} %t1.s | count 4 ; RUN: grep 16256 %t1.s | count 2 ; RUN: grep 16384 %t1.s | count 1 +; RUN: grep 771 %t1.s | count 4 +; RUN: grep 515 %t1.s | count 2 +; RUN: grep 1799 %t1.s | count 2 +; RUN: grep 1543 %t1.s | count 5 +; RUN: grep 1029 %t1.s | count 3 ; RUN: grep {shli.*, 4} %t1.s | count 4 ; RUN: grep stqx %t1.s | count 4 +; RUN: grep ilhu %t1.s | count 11 +; RUN: grep iohl %t1.s | count 8 +; RUN: grep shufb %t1.s | count 15 +; RUN: grep frds %t1.s | count 1 ; ModuleID = 'stores.bc' target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -89,3 +98,54 @@ entry: store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx ret void } + +; Test truncating stores: + +define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind { +entry: + %conv = trunc i16 %val to i8 + store i8 %conv, i8* %dest + ret i8 %conv +} + +define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind { +entry: + %conv = trunc i32 %val to i8 + store i8 %conv, i8* %dest + ret i8 %conv +} + +define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind { +entry: + %conv = trunc i32 %val to i16 + store i16 %conv, i16* %dest + ret i16 %conv +} + +define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind { +entry: + %conv = trunc i64 %val to i8 + store i8 %conv, i8* %dest + ret i8 %conv +} + +define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind { +entry: + %conv = trunc i64 %val to i16 + store i16 %conv, i16* %dest + ret i16 %conv +} + +define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind { +entry: + %conv = trunc i64 %val to i32 + store i32 %conv, i32* %dest + ret i32 %conv +} + +define float @tstore_f64_f32(double %val, float* %dest) nounwind { +entry: + %conv = fptrunc double %val to float + store float %conv, float* %dest + ret float %conv +} diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll index 3df7267ff27..82d319dd105 100644 --- a/test/CodeGen/CellSPU/struct_1.ll +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -35,7 +35,7 @@ target triple = "spu" ; int i2; // offset 12 [ignored] ; unsigned char c4; // offset 16 [ignored] ; unsigned char c5; // offset 17 [ignored] -; unsigned char c6; // offset 18 [ignored] +; unsigned char c6; // offset 18 (rotate left by 14 bytes to byte 3) ; unsigned char c7; // offset 19 (no rotate, in preferred slot) ; int i3; // offset 20 [ignored] ; int i4; // offset 24 [ignored] diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll index 845feed8b33..1c6e1f6cb14 100644 --- a/test/CodeGen/CellSPU/trunc.ll +++ b/test/CodeGen/CellSPU/trunc.ll @@ -1,16 +1,12 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep shufb %t1.s | count 9 +; RUN: grep shufb %t1.s | count 10 ; RUN: grep {ilhu.*1799} %t1.s | count 1 -; RUN: grep {ilhu.*771} %t1.s | count 3 +; RUN: grep {ilhu.*771} %t1.s | count 1 ; RUN: grep {ilhu.*1543} %t1.s | count 1 ; RUN: grep {ilhu.*1029} %t1.s | count 1 -; RUN: grep {ilhu.*515} %t1.s | count 1 -; RUN: grep {iohl.*1799} %t1.s | count 1 -; RUN: grep {iohl.*771} %t1.s | count 3 -; RUN: grep {iohl.*1543} %t1.s | count 2 -; RUN: grep {iohl.*515} %t1.s | count 1 -; RUN: grep xsbh %t1.s | count 6 -; RUN: grep sfh %t1.s | count 5 +; RUN: grep {ilhu.*515} %t1.s | count 2 +; RUN: grep xsbh %t1.s | count 2 +; RUN: grep sfh %t1.s | count 1 ; ModuleID = 'trunc.bc' target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128" @@ -41,23 +37,22 @@ target triple = "spu" ; ret i64 %0 ;} -define i8 @trunc_i64_i8(i64 %u, i8 %v) nounwind readnone { +define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) nounwind readnone { entry: %0 = trunc i64 %u to i8 - %1 = sub i8 %0, %v - ret i8 %1 + %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10 + ret <16 x i8> %tmp1 } -define i16 @trunc_i64_i16(i64 %u, i16 %v) nounwind readnone { +define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) nounwind readnone { entry: %0 = trunc i64 %u to i16 - %1 = sub i16 %0, %v - ret i16 %1 + %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6 + ret <8 x i16> %tmp1 } define i32 @trunc_i64_i32(i64 %u, i32 %v) nounwind readnone { entry: %0 = trunc i64 %u to i32 - %1 = sub i32 %0, %v - ret i32 %1 + ret i32 %0 } define i8 @trunc_i32_i8(i32 %u, i8 %v) nounwind readnone { @@ -66,16 +61,16 @@ entry: %1 = sub i8 %0, %v ret i8 %1 } -define i16 @trunc_i32_i16(i32 %u, i16 %v) nounwind readnone { +define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) nounwind readnone { entry: %0 = trunc i32 %u to i16 - %1 = sub i16 %0, %v - ret i16 %1 + %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3 + ret <8 x i16> %tmp1 } -define i8 @trunc_i16_i8(i16 %u, i8 %v) nounwind readnone { +define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) nounwind readnone { entry: %0 = trunc i16 %u to i8 - %1 = sub i8 %0, %v - ret i8 %1 + %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5 + ret <16 x i8> %tmp1 } diff --git a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c new file mode 100644 index 00000000000..12fc30bf65d --- /dev/null +++ b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c @@ -0,0 +1,69 @@ +#include + +typedef unsigned int uint32_t; +typedef int int32_t; + +const char *boolstring(int val) { + return val ? "true" : "false"; +} + +int i32_eq(int32_t a, int32_t b) { + return (a == b); +} + +int i32_neq(int32_t a, int32_t b) { + return (a != b); +} + +int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) { + return ((a == b) ? c : d); +} + +int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) { + return ((a != b) ? c : d); +} + +struct pred_s { + const char *name; + int (*predfunc)(int32_t, int32_t); + int (*selfunc)(int32_t, int32_t, int32_t, int32_t); +}; + +struct pred_s preds[] = { + { "eq", i32_eq, i32_eq_select }, + { "neq", i32_neq, i32_neq_select } +}; + +int main(void) { + int i; + int32_t a = 1234567890; + int32_t b = 345678901; + int32_t c = 1234500000; + int32_t d = 10001; + int32_t e = 10000; + + printf("a = %12d (0x%08x)\n", a, a); + printf("b = %12d (0x%08x)\n", b, b); + printf("c = %12d (0x%08x)\n", c, c); + printf("d = %12d (0x%08x)\n", d, d); + printf("e = %12d (0x%08x)\n", e, e); + printf("----------------------------------------\n"); + + for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { + printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a))); + printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a))); + printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b))); + printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c))); + printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e))); + printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e))); + + printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d)); + printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c)); + printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d)); + printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d)); + + printf("----------------------------------------\n"); + } + + return 0; +} diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c new file mode 100644 index 00000000000..7b86070095f --- /dev/null +++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c @@ -0,0 +1,68 @@ +#include + +typedef unsigned long long int uint64_t; +typedef long long int int64_t; + +const char *boolstring(int val) { + return val ? "true" : "false"; +} + +int i64_eq(int64_t a, int64_t b) { + return (a == b); +} + +int i64_neq(int64_t a, int64_t b) { + return (a != b); +} + +int64_t i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d) { + return ((a == b) ? c : d); +} + +int64_t i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d) { + return ((a != b) ? c : d); +} + +struct pred_s { + const char *name; + int (*predfunc)(int64_t, int64_t); + int64_t (*selfunc)(int64_t, int64_t, int64_t, int64_t); +}; + +struct pred_s preds[] = { + { "eq", i64_eq, i64_eq_select }, + { "neq", i64_neq, i64_neq_select } +}; + +int main(void) { + int i; + int64_t a = 1234567890000LL; + int64_t b = 2345678901234LL; + int64_t c = 1234567890001LL; + int64_t d = 10001LL; + int64_t e = 10000LL; + + printf("a = %16lld (0x%016llx)\n", a, a); + printf("b = %16lld (0x%016llx)\n", b, b); + printf("c = %16lld (0x%016llx)\n", c, c); + printf("d = %16lld (0x%016llx)\n", d, d); + printf("e = %16lld (0x%016llx)\n", e, e); + printf("----------------------------------------\n"); + + for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) { + printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a))); + printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b))); + printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c))); + printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e))); + printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e))); + + printf("a %s a ? c : d = %lld\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d)); + printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c)); + printf("a %s b ? c : d = %lld\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d)); + printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d)); + + printf("----------------------------------------\n"); + } + + return 0; +} -- 2.11.0