From 564427eabdc4cb25e1bc9eb15c589e9451fc29a2 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Wed, 5 Dec 2007 01:24:05 +0000 Subject: [PATCH] Main CellSPU backend files checked in. Intrinsics and autoconf files remain. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44595 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPUMachineFunction.h | 45 ++ lib/Target/CellSPU/SPUNodes.td | 219 ++++++++ lib/Target/CellSPU/SPUOperands.td | 574 +++++++++++++++++++++ lib/Target/CellSPU/SPURegisterInfo.cpp | 863 ++++++++++++++++++++++++++++++++ lib/Target/CellSPU/SPURegisterInfo.h | 137 +++++ lib/Target/CellSPU/SPURegisterInfo.td | 393 +++++++++++++++ lib/Target/CellSPU/SPUSchedule.td | 59 +++ lib/Target/CellSPU/SPUSubtarget.cpp | 42 ++ lib/Target/CellSPU/SPUSubtarget.h | 95 ++++ lib/Target/CellSPU/SPUTargetAsmInfo.cpp | 56 +++ lib/Target/CellSPU/SPUTargetAsmInfo.h | 32 ++ lib/Target/CellSPU/SPUTargetMachine.cpp | 87 ++++ lib/Target/CellSPU/SPUTargetMachine.h | 95 ++++ 13 files changed, 2697 insertions(+) create mode 100644 lib/Target/CellSPU/SPUMachineFunction.h create mode 100644 lib/Target/CellSPU/SPUNodes.td create mode 100644 lib/Target/CellSPU/SPUOperands.td create mode 100644 lib/Target/CellSPU/SPURegisterInfo.cpp create mode 100644 lib/Target/CellSPU/SPURegisterInfo.h create mode 100644 lib/Target/CellSPU/SPURegisterInfo.td create mode 100644 lib/Target/CellSPU/SPUSchedule.td create mode 100644 lib/Target/CellSPU/SPUSubtarget.cpp create mode 100644 lib/Target/CellSPU/SPUSubtarget.h create mode 100644 lib/Target/CellSPU/SPUTargetAsmInfo.cpp create mode 100644 lib/Target/CellSPU/SPUTargetAsmInfo.h create mode 100644 lib/Target/CellSPU/SPUTargetMachine.cpp create mode 100644 lib/Target/CellSPU/SPUTargetMachine.h diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h new file mode 100644 index 00000000000..8c98d5a3508 --- /dev/null +++ b/lib/Target/CellSPU/SPUMachineFunction.h @@ -0,0 +1,45 @@ +//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_MACHINE_FUNCTION_INFO_H +#define SPU_MACHINE_FUNCTION_INFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// SPUFunctionInfo - Cell SPU target-specific information for each +/// MachineFunction +class SPUFunctionInfo : public MachineFunctionInfo { +private: + /// UsesLR - Indicates whether LR is used in the current function. + /// + bool UsesLR; + +public: + SPUFunctionInfo(MachineFunction& MF) + : UsesLR(false) + {} + + void setUsesLR(bool U) { UsesLR = U; } + bool usesLR() { return UsesLR; } + +}; + +} // end of namespace llvm + + +#endif + diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td new file mode 100644 index 00000000000..eaf2f49b23e --- /dev/null +++ b/lib/Target/CellSPU/SPUNodes.td @@ -0,0 +1,219 @@ +//=- SPUNodes.h - Specialized SelectionDAG nodes used for CellSPU -*- C++ -*-=// +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +//===----------------------------------------------------------------------===// +// +// Type profiles and SelectionDAG nodes used by CellSPU +// +//===----------------------------------------------------------------------===// + +// Type profile for a call sequence +def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; + +// SPU_GenControl: Type profile for generating control words for insertions +def SPU_GenControl : SDTypeProfile<1, 1, []>; +def SPUvecinsmask : SDNode<"SPUISD::INSERT_MASK", SPU_GenControl, []>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, + [SDNPHasChain, SDNPOutFlag]>; +//===----------------------------------------------------------------------===// +// Operand constraints: +//===----------------------------------------------------------------------===// + +def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; +def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +// Operand type constraints for vector shuffle/permute operations +def SDT_SPUshuffle : SDTypeProfile<1, 3, [ + SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> +]>; + +// Unary, binary v16i8 operator type constraints: +def SPUv16i8_unop: SDTypeProfile<1, 1, [ + SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>]>; + +def SPUv16i8_binop: SDTypeProfile<1, 2, [ + SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; + +// Binary v8i16 operator type constraints: +def SPUv8i16_unop: SDTypeProfile<1, 1, [ + SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>]>; + +def SPUv8i16_binop: SDTypeProfile<1, 2, [ + SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; + +// Binary v4i32 operator type constraints: +def SPUv4i32_unop: SDTypeProfile<1, 1, [ + SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>]>; + +def SPUv4i32_binop: SDTypeProfile<1, 2, [ + SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; + +// FSMBI type constraints: There are several variations for the various +// vector types (this avoids having to bit_convert all over the place.) +def SPUfsmbi_type_v16i8: SDTypeProfile<1, 1, [ + SDTCisVT<0, v16i8>, SDTCisVT<1, i32>]>; + +def SPUfsmbi_type_v8i16: SDTypeProfile<1, 1, [ + SDTCisVT<0, v8i16>, SDTCisVT<1, i32>]>; + +def SPUfsmbi_type_v4i32: SDTypeProfile<1, 1, [ + SDTCisVT<0, v4i32>, SDTCisVT<1, i32>]>; + +// SELB type constraints: +def SPUselb_type_v16i8: SDTypeProfile<1, 3, [ + SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisSameAs<0, 3> ]>; + +def SPUselb_type_v8i16: SDTypeProfile<1, 3, [ + SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisSameAs<0, 3> ]>; + +def SPUselb_type_v4i32: SDTypeProfile<1, 3, [ + SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisSameAs<0, 3> ]>; + +// SPU Vector shift pseudo-instruction type constraints +def SPUvecshift_type_v16i8: SDTypeProfile<1, 2, [ + SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisInt<2>]>; + +def SPUvecshift_type_v8i16: SDTypeProfile<1, 2, [ + SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisInt<2>]>; + +def SPUvecshift_type_v4i32: SDTypeProfile<1, 2, [ + SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisInt<2>]>; + +//===----------------------------------------------------------------------===// +// Synthetic/pseudo-instructions +//===----------------------------------------------------------------------===// + +// SPU CNTB: +def SPUcntb_v16i8: SDNode<"SPUISD::CNTB", SPUv16i8_unop, []>; +def SPUcntb_v8i16: SDNode<"SPUISD::CNTB", SPUv8i16_unop, []>; +def SPUcntb_v4i32: SDNode<"SPUISD::CNTB", SPUv4i32_unop, []>; + +// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see +// SPUISelLowering.h): +def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; + +// SPU 16-bit multiply +def SPUmpy_v16i8: SDNode<"SPUISD::MPY", SPUv16i8_binop, []>; +def SPUmpy_v8i16: SDNode<"SPUISD::MPY", SPUv8i16_binop, []>; +def SPUmpy_v4i32: SDNode<"SPUISD::MPY", SPUv4i32_binop, []>; + +// SPU multiply unsigned, used in instruction lowering for v4i32 +// multiplies: +def SPUmpyu_v4i32: SDNode<"SPUISD::MPYU", SPUv4i32_binop, []>; +def SPUmpyu_i32: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>; + +// SPU 16-bit multiply high x low, shift result 16-bits +// Used to compute intermediate products for 32-bit multiplies +def SPUmpyh_v4i32: SDNode<"SPUISD::MPYH", SPUv4i32_binop, []>; +def SPUmpyh_i32: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>; + +// SPU 16-bit multiply high x high, 32-bit product +// Used to compute intermediate products for 16-bit multiplies +def SPUmpyhh_v8i16: SDNode<"SPUISD::MPYHH", SPUv8i16_binop, []>; + +// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): +def SPUvec_shl_v8i16: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type_v8i16, []>; +def SPUvec_srl_v8i16: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type_v8i16, []>; +def SPUvec_sra_v8i16: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type_v8i16, []>; + +def SPUvec_shl_v4i32: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type_v4i32, []>; +def SPUvec_srl_v4i32: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type_v4i32, []>; +def SPUvec_sra_v4i32: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type_v4i32, []>; + +def SPUvec_rotl_v8i16: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type_v8i16, []>; +def SPUvec_rotl_v4i32: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type_v4i32, []>; + +def SPUvec_rotr_v8i16: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type_v8i16, []>; +def SPUvec_rotr_v4i32: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type_v4i32, []>; + +def SPUrotbytes_right_zfill: SDNode<"SPUISD::ROTBYTES_RIGHT_Z", + SPUvecshift_type_v16i8, []>; +def SPUrotbytes_right_sfill: SDNode<"SPUISD::ROTBYTES_RIGHT_S", + SPUvecshift_type_v16i8, []>; +def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", + SPUvecshift_type_v16i8, []>; + +def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED", + SPUvecshift_type_v16i8, [SDNPHasChain]>; + +// SPU form select mask for bytes, immediate +def SPUfsmbi_v16i8: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v16i8, []>; +def SPUfsmbi_v8i16: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v8i16, []>; +def SPUfsmbi_v4i32: SDNode<"SPUISD::FSMBI", SPUfsmbi_type_v4i32, []>; + +// SPU select bits instruction +def SPUselb_v16i8: SDNode<"SPUISD::SELB", SPUselb_type_v16i8, []>; +def SPUselb_v8i16: SDNode<"SPUISD::SELB", SPUselb_type_v8i16, []>; +def SPUselb_v4i32: SDNode<"SPUISD::SELB", SPUselb_type_v4i32, []>; + +// SPU single precision floating point constant load +def SPUFPconstant: SDNode<"SPUISD::SFPConstant", SDTFPUnaryOp, []>; + +// SPU floating point interpolate +def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>; + +// SPU floating point reciprocal estimate (used for fdiv) +def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>; + +def SDT_vec_promote : SDTypeProfile<1, 1, []>; +def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDT_vec_promote, []>; + +def SPU_vec_demote : SDTypeProfile<1, 1, []>; +def SPUextract_elt0: SDNode<"SPUISD::EXTRACT_ELT0", SPU_vec_demote, []>; +def SPU_vec_demote_chained : SDTypeProfile<1, 2, []>; +def SPUextract_elt0_chained: SDNode<"SPUISD::EXTRACT_ELT0_CHAINED", + SPU_vec_demote_chained, [SDNPHasChain]>; +def SPUextract_i1_sext: SDNode<"SPUISD::EXTRACT_I1_SEXT", SPU_vec_demote, []>; +def SPUextract_i1_zext: SDNode<"SPUISD::EXTRACT_I1_ZEXT", SPU_vec_demote, []>; +def SPUextract_i8_sext: SDNode<"SPUISD::EXTRACT_I8_SEXT", SPU_vec_demote, []>; +def SPUextract_i8_zext: SDNode<"SPUISD::EXTRACT_I8_ZEXT", SPU_vec_demote, []>; + +// Address high and low components, used for [r+r] type addressing +def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>; +def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; + +// PC-relative address +def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; + +// D-Form "imm($reg)" addresses +def SPUdform : SDNode<"SPUISD::DFormAddr", SDTIntBinOp, []>; + +// SPU 32-bit sign-extension to 64-bits +def SPUsext32_to_64: SDNode<"SPUISD::SEXT32TO64", SDTIntExtendOp, []>; + +// Branches: + +def SPUbrnz : SDNode<"SPUISD::BR_NOTZERO", SDTBrcond, [SDNPHasChain]>; +def SPUbrz : SDNode<"SPUISD::BR_ZERO", SDTBrcond, [SDNPHasChain]>; +/* def SPUbinz : SDNode<"SPUISD::BR_NOTZERO", SDTBrind, [SDNPHasChain]>; +def SPUbiz : SDNode<"SPUISD::BR_ZERO", SPUBrind, [SDNPHasChain]>; */ + +//===----------------------------------------------------------------------===// +// Constraints: (taken from PPCInstrInfo.td) +//===----------------------------------------------------------------------===// + +class RegConstraint { + string Constraints = C; +} + +class NoEncode { + string DisableEncoding = E; +} + +//===----------------------------------------------------------------------===// +// Return (flag isn't quite what it means: the operations are flagged so that +// instruction scheduling doesn't disassociate them.) +//===----------------------------------------------------------------------===// + +def retflag : SDNode<"SPUISD::RET_FLAG", SDTRet, + [SDNPHasChain, SDNPOptInFlag]>; diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td new file mode 100644 index 00000000000..af445666ac1 --- /dev/null +++ b/lib/Target/CellSPU/SPUOperands.td @@ -0,0 +1,574 @@ +//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by The Aerospace Corporation. +// +//===----------------------------------------------------------------------===// +// Cell SPU Instruction Operands: +//===----------------------------------------------------------------------===// + +def LO16 : SDNodeXFormgetValue(); + // Transformation function: get the low 16 bits. + return getI32Imm(val & 0xffff); +}]>; + +def LO16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR + && "LO16_vec got something other than a BUILD_VECTOR"); + + // Get first constant operand... + for (unsigned i = 0, e = N->getNumOperands(); OpVal.Val == 0 && i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.Val == 0) + OpVal = N->getOperand(i); + } + + assert(OpVal.Val != 0 && "LO16_vec did not locate a node"); + ConstantSDNode *CN = dyn_cast(OpVal); + return getI32Imm((unsigned)CN->getValue() & 0xffff); +}]>; + +// Transform an immediate, returning the high 16 bits shifted down: +def HI16 : SDNodeXFormgetValue() >> 16); +}]>; + +// Transformation function: shift the high 16 bit immediate from a build_vector +// node into the low 16 bits, and return a 16-bit constant. +def HI16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR + && "HI16_vec got something other than a BUILD_VECTOR"); + + // Get first constant operand... + for (unsigned i = 0, e = N->getNumOperands(); OpVal.Val == 0 && i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.Val == 0) + OpVal = N->getOperand(i); + } + + assert(OpVal.Val != 0 && "HI16_vec did not locate a node"); + ConstantSDNode *CN = dyn_cast(OpVal); + return getI32Imm((unsigned)CN->getValue() >> 16); +}]>; + +// simm7 predicate - True if the immediate fits in an 7-bit signed +// field. +def simm7: PatLeaf<(imm), [{ + int sextVal = ((((int) N->getValue()) << 25) >> 25); + return (sextVal >= -64 && sextVal <= 63); +}]>; + +// uimm7 predicate - True if the immediate fits in an 7-bit unsigned +// field. +def uimm7: PatLeaf<(imm), [{ + return (N->getValue() <= 0x7f); +}]>; + +// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended +// field. +def immSExt8 : PatLeaf<(imm), [{ + int Value = (int) N->getValue(); + int Value8 = (Value << 24) >> 24; + return (Value < 0xff && (Value8 >= -128 && Value8 < 127)); +}]>; + +// immU8: immediate, unsigned 8-bit quantity +def immU8 : PatLeaf<(imm), [{ + return (N->getValue() <= 0xff); +}]>; + +// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i64ImmSExt10 : PatLeaf<(imm), [{ + return isI64IntS10Immediate(N); +}]>; + +// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i32ImmSExt10 : PatLeaf<(imm), [{ + return isI32IntS10Immediate(N); +}]>; + +// i16ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i16ImmSExt10 : PatLeaf<(imm), [{ + return isI16IntS10Immediate(N); +}]>; + +def immSExt16 : PatLeaf<(imm), [{ + // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + short Ignored; + return isIntS16Immediate(N, Ignored); +}]>; + +def immZExt16 : PatLeaf<(imm), [{ + // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended + // field. + return (uint64_t)N->getValue() == (unsigned short)N->getValue(); +}], LO16>; + +def immU16 : PatLeaf<(imm), [{ + // immU16 predicate- True if the immediate fits into a 16-bit unsigned field. + return (uint64_t)N->getValue() == (N->getValue() & 0xffff); +}]>; + +def imm18 : PatLeaf<(imm), [{ + // imm18 predicate: True if the immediate fits into an 18-bit unsigned field. + int Value = (int) N->getValue(); + return ((Value & ((1 << 19) - 1)) == Value); +}]>; + +def hi16 : PatLeaf<(imm), [{ + // hi16 predicate - returns true if the immediate has all zeros in the + // low order bits and is a 32-bit constant: + if (N->getValueType(0) == MVT::i32) { + uint32_t val = N->getValue(); + return ((val & 0xffff0000) == val); + } + + return false; +}], HI16>; + +//===----------------------------------------------------------------------===// +// Floating point operands: +//===----------------------------------------------------------------------===// + +// Transform a float, returning the high 16 bits shifted down, as if +// the float was really an unsigned integer: +def HI16_f32 : SDNodeXFormgetValueAPF(); + float fval = apf.convertToFloat(); + unsigned val = *((unsigned *) &fval); + return getI32Imm(val >> 16); +}]>; + +// Transformation function on floats: get the low 16 bits as if the float was +// an unsigned integer. +def LO16_f32 : SDNodeXFormgetValueAPF(); + float fval = apf.convertToFloat(); + unsigned val = *((unsigned *) &fval); + return getI32Imm(val & 0xffff); +}]>; + +def FPimm_sext16 : SDNodeXFormgetValueAPF(); + float fval = apf.convertToFloat(); + unsigned val = *((unsigned *) &fval); + return getI32Imm((int) ((val << 16) >> 16)); +}]>; + +def FPimm_u18 : SDNodeXFormgetValueAPF(); + float fval = apf.convertToFloat(); + unsigned val = *((unsigned *) &fval); + return getI32Imm(val & ((1 << 19) - 1)); +}]>; + +def fpimmSExt16 : PatLeaf<(fpimm), [{ + short Ignored; + return isFPS16Immediate(N, Ignored); +}], FPimm_sext16>; + +// Does the SFP constant only have upp 16 bits set? +def hi16_f32 : PatLeaf<(fpimm), [{ + if (N->getValueType(0) == MVT::f32) { + const APFloat &apf = N->getValueAPF(); + float fval = apf.convertToFloat(); + uint32_t val = *((unsigned *) &fval); + return ((val & 0xffff0000) == val); + } + + return false; +}], HI16_f32>; + +// Does the SFP constant fit into 18 bits? +def fpimm18 : PatLeaf<(fpimm), [{ + if (N->getValueType(0) == MVT::f32) { + const APFloat &apf = N->getValueAPF(); + float fval = apf.convertToFloat(); + uint32_t Value = *((uint32_t *) &fval); + return ((Value & ((1 << 19) - 1)) == Value); + } + + return false; +}], FPimm_u18>; + +//===----------------------------------------------------------------------===// +// 64-bit operands: +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// build_vector operands: +//===----------------------------------------------------------------------===// + +// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended +// immediate constant load for v16i8 vectors. N.B.: The incoming constant has +// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). +def v16i8SExt8Imm_xform: SDNodeXForm; + +// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant +// load, works in conjunction with its transform function. N.B.: This relies the +// incoming constant being a 16-bit quantity, where the upper and lower bytes +// are EXACTLY the same (e.g., 0x2a2a) +def v16i8SExt8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).Val != 0; +}], v16i8SExt8Imm_xform>; + +// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit +// immediate constant load for v16i8 vectors. N.B.: The incoming constant has +// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). +def v16i8U8Imm_xform: SDNodeXForm; + +// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant +// load, works in conjunction with its transform function. N.B.: This relies the +// incoming constant being a 16-bit quantity, where the upper and lower bytes +// are EXACTLY the same (e.g., 0x2a2a) +def v16i8U8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).Val != 0; +}], v16i8U8Imm_xform>; + +// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16SExt8Imm_xform: SDNodeXForm; + +// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).Val != 0; +}], v8i16SExt8Imm_xform>; + +// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16SExt10Imm_xform: SDNodeXForm; + +// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).Val != 0; +}], v8i16SExt10Imm_xform>; + +// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16SExt16Imm_xform: SDNodeXForm; + +// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).Val != 0; +}], v8i16SExt16Imm_xform>; + +// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended +// immediate constant load for v4i32 vectors. +def v4i32SExt10Imm_xform: SDNodeXForm; + +// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v4i32SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).Val != 0; +}], v4i32SExt10Imm_xform>; + +// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v4i32 vectors. +def v4i32SExt16Imm_xform: SDNodeXForm; + +// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v4i32SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).Val != 0; +}], v4i32SExt16Imm_xform>; + +// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned +// immediate constant load for v4i32 vectors. +def v4i32Uns18Imm_xform: SDNodeXForm; + +// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, +// works in conjunction with its transform function. +def v4i32Uns18Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).Val != 0; +}], v4i32Uns18Imm_xform>; + +// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant +// load. +def ILHUvec_get_imm: SDNodeXForm; + +/// immILHUvec: Predicate test for a ILHU constant vector. +def immILHUvec: PatLeaf<(build_vector), [{ + return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).Val != 0; +}], ILHUvec_get_imm>; + +// Catch-all for any other i32 vector constants +def v4i32_get_imm: SDNodeXForm; + +def v4i32Imm: PatLeaf<(build_vector), [{ + return SPU::get_v4i32_imm(N, *CurDAG).Val != 0; +}], v4i32_get_imm>; + +// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended +// immediate constant load for v2i64 vectors. +def v2i64SExt10Imm_xform: SDNodeXForm; + +// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v2i64SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).Val != 0; +}], v2i64SExt10Imm_xform>; + +// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v2i64 vectors. +def v2i64SExt16Imm_xform: SDNodeXForm; + +// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v2i64SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).Val != 0; +}], v2i64SExt16Imm_xform>; + +// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned +// immediate constant load for v2i64 vectors. +def v2i64Uns18Imm_xform: SDNodeXForm; + +// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, +// works in conjunction with its transform function. +def v2i64Uns18Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).Val != 0; +}], v2i64Uns18Imm_xform>; + +/// immILHUvec: Predicate test for a ILHU constant vector. +def immILHUvec_i64: PatLeaf<(build_vector), [{ + return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).Val != 0; +}], ILHUvec_get_imm>; + +// Catch-all for any other i32 vector constants +def v2i64_get_imm: SDNodeXForm; + +def v2i64Imm: PatLeaf<(build_vector), [{ + return SPU::get_v2i64_imm(N, *CurDAG).Val != 0; +}], v2i64_get_imm>; + +//===----------------------------------------------------------------------===// +// Operand Definitions. + +def s7imm: Operand { + let PrintMethod = "printS7ImmOperand"; +} + +def u7imm: Operand { + let PrintMethod = "printU7ImmOperand"; +} + +def u7imm_i32: Operand { + let PrintMethod = "printU7ImmOperand"; +} + +// Halfword, signed 10-bit constant +def s10imm : Operand { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i32: Operand { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i64: Operand { + let PrintMethod = "printS10ImmOperand"; +} + +// Unsigned 10-bit integers: +def u10imm: Operand { + let PrintMethod = "printU10ImmOperand"; +} + +def u10imm_i32: Operand { + let PrintMethod = "printU10ImmOperand"; +} + +def s16imm : Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i32: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i64: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_f32: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_f64: Operand { + let PrintMethod = "printS16ImmOperand"; +} + +def u16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def f16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def s18imm : Operand { + let PrintMethod = "printS18ImmOperand"; +} + +def u18imm : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +def u18imm_i64 : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +def f18imm : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +def f18imm_f64 : Operand { + let PrintMethod = "printU18ImmOperand"; +} + +// Negated 7-bit halfword rotate immediate operands +def rothNeg7imm : Operand { + let PrintMethod = "printROTHNeg7Imm"; +} + +def rothNeg7imm_i16 : Operand { + let PrintMethod = "printROTHNeg7Imm"; +} + +// Negated 7-bit word rotate immediate operands +def rotNeg7imm : Operand { + let PrintMethod = "printROTNeg7Imm"; +} + +def rotNeg7imm_i16 : Operand { + let PrintMethod = "printROTNeg7Imm"; +} + +// Floating point immediate operands +def f32imm : Operand; + +def target : Operand { + let PrintMethod = "printBranchOperand"; +} + +// Absolute address call target +def calltarget : Operand { + let PrintMethod = "printCallOperand"; + let MIOperandInfo = (ops u18imm:$calldest); +} + +// Relative call target +def relcalltarget : Operand { + let PrintMethod = "printPCRelativeOperand"; + let MIOperandInfo = (ops s16imm:$calldest); +} + +// Branch targets: +def brtarget : Operand { + let PrintMethod = "printPCRelativeOperand"; +} + +// Indirect call target +def indcalltarget : Operand { + let PrintMethod = "printCallOperand"; + let MIOperandInfo = (ops ptr_rc:$calldest); +} + +def symbolHi: Operand { + let PrintMethod = "printSymbolHi"; +} + +def symbolLo: Operand { + let PrintMethod = "printSymbolLo"; +} + +def symbolLSA: Operand { + let PrintMethod = "printSymbolLSA"; +} + +// memory s7imm(reg) operaand +def memri7 : Operand { + let PrintMethod = "printMemRegImmS7"; + let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg); +} + +// memory s10imm(reg) operand +def memri10 : Operand { + let PrintMethod = "printMemRegImmS10"; + let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg); +} + +// 256K local store address +// N.B.: The tblgen code generator expects to have two operands, an offset +// and a pointer. Of these, only the immediate is actually used. +def addr256k : Operand { + let PrintMethod = "printAddr256K"; + let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg); +} + +// memory s18imm(reg) operand +def memri18 : Operand { + let PrintMethod = "printMemRegImmS18"; + let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg); +} + +// memory register + register operand +def memrr : Operand { + let PrintMethod = "printMemRegReg"; + let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b); +} + +// Define SPU-specific addressing modes: These come in three basic +// flavors: +// +// D-form : [r+I10] (10-bit signed offset + reg) +// X-form : [r+r] (reg+reg) +// A-form : abs (256K LSA offset) +// D-form(2): [r+I7] (7-bit signed offset + reg) + +def dform_addr : ComplexPattern; +def xform_addr : ComplexPattern; +def aform_addr : ComplexPattern; +def dform2_addr : ComplexPattern; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp new file mode 100644 index 00000000000..546b91cab7e --- /dev/null +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -0,0 +1,863 @@ +//===- SPURegisterInfo.cpp - Cell SPU Register Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reginfo" +#include "SPU.h" +#include "SPURegisterInfo.h" +#include "SPURegisterNames.h" +#include "SPUInstrBuilder.h" +#include "SPUSubtarget.h" +#include "SPUMachineFunction.h" +#include "SPUFrameInfo.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include +#include + +using namespace llvm; + +/// getRegisterNumbering - Given the enum value for some register, e.g. +/// PPC::F14, return the number that it corresponds to (e.g. 14). +unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { + using namespace SPU; + switch (RegEnum) { + case SPU::R0: return 0; + case SPU::R1: return 1; + case SPU::R2: return 2; + case SPU::R3: return 3; + case SPU::R4: return 4; + case SPU::R5: return 5; + case SPU::R6: return 6; + case SPU::R7: return 7; + case SPU::R8: return 8; + case SPU::R9: return 9; + case SPU::R10: return 10; + case SPU::R11: return 11; + case SPU::R12: return 12; + case SPU::R13: return 13; + case SPU::R14: return 14; + case SPU::R15: return 15; + case SPU::R16: return 16; + case SPU::R17: return 17; + case SPU::R18: return 18; + case SPU::R19: return 19; + case SPU::R20: return 20; + case SPU::R21: return 21; + case SPU::R22: return 22; + case SPU::R23: return 23; + case SPU::R24: return 24; + case SPU::R25: return 25; + case SPU::R26: return 26; + case SPU::R27: return 27; + case SPU::R28: return 28; + case SPU::R29: return 29; + case SPU::R30: return 30; + case SPU::R31: return 31; + case SPU::R32: return 32; + case SPU::R33: return 33; + case SPU::R34: return 34; + case SPU::R35: return 35; + case SPU::R36: return 36; + case SPU::R37: return 37; + case SPU::R38: return 38; + case SPU::R39: return 39; + case SPU::R40: return 40; + case SPU::R41: return 41; + case SPU::R42: return 42; + case SPU::R43: return 43; + case SPU::R44: return 44; + case SPU::R45: return 45; + case SPU::R46: return 46; + case SPU::R47: return 47; + case SPU::R48: return 48; + case SPU::R49: return 49; + case SPU::R50: return 50; + case SPU::R51: return 51; + case SPU::R52: return 52; + case SPU::R53: return 53; + case SPU::R54: return 54; + case SPU::R55: return 55; + case SPU::R56: return 56; + case SPU::R57: return 57; + case SPU::R58: return 58; + case SPU::R59: return 59; + case SPU::R60: return 60; + case SPU::R61: return 61; + case SPU::R62: return 62; + case SPU::R63: return 63; + case SPU::R64: return 64; + case SPU::R65: return 65; + case SPU::R66: return 66; + case SPU::R67: return 67; + case SPU::R68: return 68; + case SPU::R69: return 69; + case SPU::R70: return 70; + case SPU::R71: return 71; + case SPU::R72: return 72; + case SPU::R73: return 73; + case SPU::R74: return 74; + case SPU::R75: return 75; + case SPU::R76: return 76; + case SPU::R77: return 77; + case SPU::R78: return 78; + case SPU::R79: return 79; + case SPU::R80: return 80; + case SPU::R81: return 81; + case SPU::R82: return 82; + case SPU::R83: return 83; + case SPU::R84: return 84; + case SPU::R85: return 85; + case SPU::R86: return 86; + case SPU::R87: return 87; + case SPU::R88: return 88; + case SPU::R89: return 89; + case SPU::R90: return 90; + case SPU::R91: return 91; + case SPU::R92: return 92; + case SPU::R93: return 93; + case SPU::R94: return 94; + case SPU::R95: return 95; + case SPU::R96: return 96; + case SPU::R97: return 97; + case SPU::R98: return 98; + case SPU::R99: return 99; + case SPU::R100: return 100; + case SPU::R101: return 101; + case SPU::R102: return 102; + case SPU::R103: return 103; + case SPU::R104: return 104; + case SPU::R105: return 105; + case SPU::R106: return 106; + case SPU::R107: return 107; + case SPU::R108: return 108; + case SPU::R109: return 109; + case SPU::R110: return 110; + case SPU::R111: return 111; + case SPU::R112: return 112; + case SPU::R113: return 113; + case SPU::R114: return 114; + case SPU::R115: return 115; + case SPU::R116: return 116; + case SPU::R117: return 117; + case SPU::R118: return 118; + case SPU::R119: return 119; + case SPU::R120: return 120; + case SPU::R121: return 121; + case SPU::R122: return 122; + case SPU::R123: return 123; + case SPU::R124: return 124; + case SPU::R125: return 125; + case SPU::R126: return 126; + case SPU::R127: return 127; + default: + std::cerr << "Unhandled reg in SPURegisterInfo::getRegisterNumbering!\n"; + abort(); + } +} + +SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, + const TargetInstrInfo &tii) : + SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), + Subtarget(subtarget), + TII(tii) +{ +} + +void +SPURegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, int FrameIdx, + const TargetRegisterClass *RC) const +{ + MachineOpCode opc; + if (RC == SPU::GPRCRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::STQDr128 + : SPU::STQXr128; + } else if (RC == SPU::R64CRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::STQDr64 + : SPU::STQXr64; + } else if (RC == SPU::R64FPRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::STQDr64 + : SPU::STQXr64; + } else if (RC == SPU::R32CRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::STQDr32 + : SPU::STQXr32; + } else if (RC == SPU::R32FPRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::STQDr32 + : SPU::STQXr32; + } else if (RC == SPU::R16CRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) ? + SPU::STQDr16 + : SPU::STQXr16; + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + + addFrameReference(BuildMI(MBB, MI, TII.get(opc)).addReg(SrcReg), FrameIdx); +} + +void SPURegisterInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + cerr << "storeRegToAddr() invoked!\n"; + abort(); + + if (Addr[0].isFrameIndex()) { + /* do what storeRegToStackSlot does here */ + } else { + unsigned Opc = 0; + if (RC == SPU::GPRCRegisterClass) { + /* Opc = PPC::STW; */ + } else if (RC == SPU::R16CRegisterClass) { + /* Opc = PPC::STD; */ + } else if (RC == SPU::R32CRegisterClass) { + /* Opc = PPC::STFD; */ + } else if (RC == SPU::R32FPRegisterClass) { + /* Opc = PPC::STFD; */ + } else if (RC == SPU::R64FPRegisterClass) { + /* Opc = PPC::STFS; */ + } else if (RC == SPU::VECREGRegisterClass) { + /* Opc = PPC::STVX; */ + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + MachineInstrBuilder MIB = BuildMI(TII.get(Opc)) + .addReg(SrcReg, false, false, true); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) { + MachineOperand &MO = Addr[i]; + if (MO.isRegister()) + MIB.addReg(MO.getReg()); + else if (MO.isImmediate()) + MIB.addImm(MO.getImmedValue()); + else + MIB.addFrameIndex(MO.getFrameIndex()); + } + NewMIs.push_back(MIB); + } +} + +void +SPURegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const +{ + MachineOpCode opc; + if (RC == SPU::GPRCRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::LQDr128 + : SPU::LQXr128; + } else if (RC == SPU::R64CRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::LQDr64 + : SPU::LQXr64; + } else if (RC == SPU::R64FPRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::LQDr64 + : SPU::LQXr64; + } else if (RC == SPU::R32CRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::LQDr32 + : SPU::LQXr32; + } else if (RC == SPU::R32FPRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::LQDr32 + : SPU::LQXr32; + } else if (RC == SPU::R16CRegisterClass) { + opc = (FrameIdx < SPUFrameInfo::maxFrameOffset()) + ? SPU::LQDr16 + : SPU::LQXr16; + } else { + assert(0 && "Unknown regclass in loadRegFromStackSlot!"); + abort(); + } + + addFrameReference(BuildMI(MBB, MI, TII.get(opc)).addReg(DestReg), FrameIdx); +} + +/*! + \note We are really pessimistic here about what kind of a load we're doing. + */ +void SPURegisterInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) + const { + cerr << "loadRegToAddr() invoked!\n"; + abort(); + + if (Addr[0].isFrameIndex()) { + /* do what loadRegFromStackSlot does here... */ + } else { + unsigned Opc = 0; + if (RC == SPU::R16CRegisterClass) { + /* Opc = PPC::LWZ; */ + } else if (RC == SPU::R32CRegisterClass) { + /* Opc = PPC::LD; */ + } else if (RC == SPU::R32FPRegisterClass) { + /* Opc = PPC::LFD; */ + } else if (RC == SPU::R64FPRegisterClass) { + /* Opc = PPC::LFS; */ + } else if (RC == SPU::VECREGRegisterClass) { + /* Opc = PPC::LVX; */ + } else if (RC == SPU::GPRCRegisterClass) { + /* Opc = something else! */ + } else { + assert(0 && "Unknown regclass!"); + abort(); + } + MachineInstrBuilder MIB = BuildMI(TII.get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) { + MachineOperand &MO = Addr[i]; + if (MO.isRegister()) + MIB.addReg(MO.getReg()); + else if (MO.isImmediate()) + MIB.addImm(MO.getImmedValue()); + else + MIB.addFrameIndex(MO.getFrameIndex()); + } + NewMIs.push_back(MIB); + } +} + +void SPURegisterInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const +{ + if (DestRC != SrcRC) { + cerr << "SPURegisterInfo::copyRegToReg(): DestRC != SrcRC not supported!\n"; + abort(); + } + + /* if (DestRC == SPU::R8CRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORBIr8), DestReg).addReg(SrcReg).addImm(0); + } else */ + if (DestRC == SPU::R16CRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORHIr16), DestReg).addReg(SrcReg).addImm(0); + } else if (DestRC == SPU::R32CRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORIr32), DestReg).addReg(SrcReg).addImm(0); + } else if (DestRC == SPU::R32FPRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORIf32), DestReg).addReg(SrcReg).addImm(0); + } else if (DestRC == SPU::R64CRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORIr64), DestReg).addReg(SrcReg).addImm(0); + } else if (DestRC == SPU::R64FPRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORIf64), DestReg).addReg(SrcReg).addImm(0); + } else if (DestRC == SPU::GPRCRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORgprc), DestReg).addReg(SrcReg) + .addReg(SrcReg); + } else if (DestRC == SPU::VECREGRegisterClass) { + BuildMI(MBB, MI, TII.get(SPU::ORv4i32), DestReg).addReg(SrcReg) + .addReg(SrcReg); + } else { + std::cerr << "Attempt to copy unknown/unsupported register class!\n"; + abort(); + } +} + +void SPURegisterInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { + MachineInstr *MI = Orig->clone(); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); +} + +// SPU's 128-bit registers used for argument passing: +static const unsigned SPU_ArgRegs[] = { + SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, + SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, + SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, + SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, + SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, + SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, + SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, + SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, + SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, + SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, + SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 +}; + +const unsigned * +SPURegisterInfo::getArgRegs() +{ + return SPU_ArgRegs; +} + +const unsigned +SPURegisterInfo::getNumArgRegs() +{ + return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]); +} + +const unsigned * +SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const +{ + // Cell ABI calling convention + static const unsigned SPU_CalleeSaveRegs[] = { + SPU::R80, SPU::R81, SPU::R82, SPU::R83, + SPU::R84, SPU::R85, SPU::R86, SPU::R87, + SPU::R88, SPU::R89, SPU::R90, SPU::R91, + SPU::R92, SPU::R93, SPU::R94, SPU::R95, + SPU::R96, SPU::R97, SPU::R98, SPU::R99, + SPU::R100, SPU::R101, SPU::R102, SPU::R103, + SPU::R104, SPU::R105, SPU::R106, SPU::R107, + SPU::R108, SPU::R109, SPU::R110, SPU::R111, + SPU::R112, SPU::R113, SPU::R114, SPU::R115, + SPU::R116, SPU::R117, SPU::R118, SPU::R119, + SPU::R120, SPU::R121, SPU::R122, SPU::R123, + SPU::R124, SPU::R125, SPU::R126, SPU::R127, + SPU::R2, /* environment pointer */ + SPU::R1, /* stack pointer */ + SPU::R0, /* link register */ + 0 /* end */ + }; + + return SPU_CalleeSaveRegs; +} + +const TargetRegisterClass* const* +SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const +{ + // Cell ABI Calling Convention + static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = { + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, + &SPU::GPRCRegClass, /* environment pointer */ + &SPU::GPRCRegClass, /* stack pointer */ + &SPU::GPRCRegClass, /* link register */ + 0 /* end */ + }; + + return SPU_CalleeSaveRegClasses; +} + +/*! + R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is + generally unused) are the Cell's reserved registers + */ +BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(SPU::R0); // LR + Reserved.set(SPU::R1); // SP + Reserved.set(SPU::R2); // environment pointer + return Reserved; +} + +/// foldMemoryOperand - SPU, like PPC, can only fold spills into +/// copy instructions, turning them into load/store instructions. +MachineInstr * +SPURegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNum, + int FrameIndex) const +{ +#if SOMEDAY_SCOTT_LOOKS_AT_ME_AGAIN + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = 0; + + if ((Opc == SPU::ORr32 + || Opc == SPU::ORv4i32) + && MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + if (OpNum == 0) { // move -> store + unsigned InReg = MI->getOperand(1).getReg(); + if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { + NewMI = addFrameReference(BuildMI(TII.get(SPU::STQDr32)).addReg(InReg), + FrameIndex); + } + } else { // move -> load + unsigned OutReg = MI->getOperand(0).getReg(); + Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) ? SPU::STQDr32 : SPU::STQXr32; + NewMI = addFrameReference(BuildMI(TII.get(Opc), OutReg), FrameIndex); + } + } + + if (NewMI) + NewMI->copyKillDeadInfo(MI); + + return NewMI; +#else + return 0; +#endif +} + +/// General-purpose load/store fold to operand code +MachineInstr * +SPURegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNum, + MachineInstr *LoadMI) const +{ + return 0; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +// needsFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +static bool needsFP(const MachineFunction &MF) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +//-------------------------------------------------------------------------- +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. This is true if the function needs a frame pointer and has +// a non-zero stack size. +bool +SPURegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getStackSize() && needsFP(MF); +} + +//-------------------------------------------------------------------------- +void +SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) + const +{ + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +void +SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS) const +{ + assert(SPAdj == 0 && "Unexpected SP adjacency == 0"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + while (!MI.getOperand(i).isFrameIndex()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + MachineOperand &SPOp = MI.getOperand(i); + int FrameIndex = SPOp.getFrameIndex(); + + // Now add the frame object offset to the offset from r1. + int Offset = MFI->getObjectOffset(FrameIndex); + + // Most instructions, except for generated FrameIndex additions using AIr32, + // have the immediate in operand 1. AIr32, in this case, has the immediate + // in operand 2. + unsigned OpNo = (MI.getOpcode() != SPU::AIr32 ? 1 : 2); + MachineOperand &MO = MI.getOperand(OpNo); + + // Offset is biased by $lr's slot at the bottom. + Offset += MO.getImmedValue() + MFI->getStackSize() + + SPUFrameInfo::minStackSize(); + assert((Offset & 0xf) == 0 + && "16-byte alignment violated in SPURegisterInfo::eliminateFrameIndex"); + + // Replace the FrameIndex with base register with $sp (aka $r1) + SPOp.ChangeToRegister(SPU::R1, false); + if (Offset > SPUFrameInfo::maxFrameOffset() + || Offset < SPUFrameInfo::minFrameOffset()) { + cerr << "Large stack adjustment (" + << Offset + << ") in SPURegisterInfo::eliminateFrameIndex."; + } else { + MO.ChangeToImmediate(Offset); + } +} + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void +SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const +{ + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment()); + assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); + unsigned AlignMask = Align - 1; + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} + +void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) + const { +#if 0 + // Save and clear the LR state. + SPUFunctionInfo *FI = MF.getInfo(); + FI->setUsesLR(MF.isPhysRegUsed(LR)); +#endif + // Mark LR and SP unused, since the prolog spills them to stack and + // we don't want anyone else to spill them for us. + // + // Also, unless R2 is really used someday, don't spill it automatically. + MF.setPhysRegUnused(SPU::R0); + MF.setPhysRegUnused(SPU::R1); + MF.setPhysRegUnused(SPU::R2); +} + +void SPURegisterInfo::emitPrologue(MachineFunction &MF) const +{ + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); + + // Prepare for debug frame info. + bool hasDebugInfo = MMI && MMI->hasDebugInfo(); + unsigned FrameLabelId = 0; + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + + // Work out frame sizes. + determineFrameLayout(MF); + int FrameSize = MFI->getStackSize(); + + assert((FrameSize & 0xf) == 0 + && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); + + if (FrameSize > 0) { + FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); + if (hasDebugInfo) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, TII.get(ISD::LABEL)).addImm(FrameLabelId); + } + + // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) + // for the ABI + BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R0).addImm(16) + .addReg(SPU::R1); + if (isS10Constant(FrameSize)) { + // Spill $sp to adjusted $sp + BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) + .addReg(SPU::R1); + // Adjust $sp by required amout + BuildMI(MBB, MBBI, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) + .addImm(FrameSize); + } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, TII.get(SPU::STQDr128), SPU::R2) + .addImm(-16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R1) + .addReg(SPU::R2) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, TII.get(SPU::SFIr32), SPU::R2) + .addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + cerr << "Unhandled frame size: " << FrameSize << "\n"; + abort(); + } + + if (hasDebugInfo) { + std::vector &Moves = MMI->getFrameMoves(); + + // Show update of SP. + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + + // Add callee saved registers to move list. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == SPU::R0) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); + } + + // Mark effective beginning of when frame pointer is ready. + unsigned ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, TII.get(ISD::LABEL)).addImm(ReadyLabelId); + + MachineLocation FPDst(SPU::R1); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + } + } else { + // This is a leaf function -- insert a branch hint iff there are + // sufficient number instructions in the basic block. Note that + // this is just a best guess based on the basic block's size. + if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + // Insert terminator label + unsigned BranchLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, TII.get(SPU::LABEL)).addImm(BranchLabelId); + } + } +} + +void +SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const +{ + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int FrameSize = MFI->getStackSize(); + int LinkSlotOffset = SPUFrameInfo::stackSlotSize(); + + assert(MBBI->getOpcode() == SPU::RET && + "Can only insert epilog into returning blocks"); + assert((FrameSize & 0xf) == 0 + && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); + if (FrameSize > 0) { + FrameSize = FrameSize + SPUFrameInfo::minStackSize(); + if (isS10Constant(FrameSize + LinkSlotOffset)) { + // Reload $lr, adjust $sp by required amount + // Note: We do this to slightly improve dual issue -- not by much, but it + // is an opportunity for dual issue. + BuildMI(MBB, MBBI, TII.get(SPU::LQDr128), SPU::R0) + .addImm(FrameSize + LinkSlotOffset) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, TII.get(SPU::AIr32), SPU::R1) + .addReg(SPU::R1) + .addImm(FrameSize); + } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, TII.get(SPU::STQDr128), SPU::R2) + .addImm(16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, TII.get(SPU::LQDr128), SPU::R0) + .addImm(16) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, TII.get(SPU::SFIr32), SPU::R2). + addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + cerr << "Unhandled frame size: " << FrameSize << "\n"; + abort(); + } + } +} + +unsigned +SPURegisterInfo::getRARegister() const +{ + return SPU::R0; +} + +unsigned +SPURegisterInfo::getFrameRegister(MachineFunction &MF) const +{ + return SPU::R1; +} + +void +SPURegisterInfo::getInitialFrameState(std::vector &Moves) const +{ + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(SPU::R1, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + + +int +SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + // FIXME: Most probably dwarf numbers differs for Linux and Darwin + return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); +} + +#include "SPUGenRegisterInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h new file mode 100644 index 00000000000..a3006a933fe --- /dev/null +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -0,0 +1,137 @@ +//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by The Aerospace Corporation. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Cell SPU implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_REGISTERINFO_H +#define SPU_REGISTERINFO_H + +#include "SPU.h" +#include "SPUGenRegisterInfo.h.inc" + +namespace llvm { + class SPUSubtarget; + class TargetInstrInfo; + class Type; + + class SPURegisterInfo : public SPUGenRegisterInfo { + private: + const SPUSubtarget &Subtarget; + const TargetInstrInfo &TII; + + //! Predicate: Does the machine function use the link register? + bool usesLR(MachineFunction &MF) const; + + public: + SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii); + + //! Translate a register's enum value to a register number + /*! + This method translates a register's enum value to it's regiser number, + e.g. SPU::R14 -> 14. + */ + static unsigned getRegisterNumbering(unsigned RegEnum); + + //! Store a register to a stack slot, based on its register class. + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + //! Store a register to an address, based on its register class + void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + //! Load a register from a stack slot, based on its register class. + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + //! Loqad a register from an address, based on its register class + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + //! Copy a register to another + void copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, const MachineInstr *Orig) const; + + //! Fold spills into load/store instructions + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum, + int FrameIndex) const; + + //! Fold any load/store to an operand + virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum, + MachineInstr* LoadMI) const; + + //! Return the array of callee-saved registers + virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; + + //! Return the register class array of the callee-saved registers + virtual const TargetRegisterClass* const * + getCalleeSavedRegClasses(const MachineFunction *MF) const; + + //! Return the reserved registers + BitVector getReservedRegs(const MachineFunction &MF) const; + + //! Prediate: Target has dedicated frame pointer + bool hasFP(const MachineFunction &MF) const; + //! Eliminate the call frame setup pseudo-instructions + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + //! Convert frame indicies into machine operands + void eliminateFrameIndex(MachineBasicBlock::iterator II, int, + RegScavenger *RS) const; + //! Determine the frame's layour + void determineFrameLayout(MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + //! Emit the function prologue + void emitPrologue(MachineFunction &MF) const; + //! Emit the function epilogue + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + //! Get return address register (LR, aka R0) + unsigned getRARegister() const; + //! Get the stack frame register (SP, aka R1) + unsigned getFrameRegister(MachineFunction &MF) const; + //! Perform target-specific stack frame setup. + void getInitialFrameState(std::vector &Moves) const; + + //------------------------------------------------------------------------ + // New methods added: + //------------------------------------------------------------------------ + + //! Return the array of argument passing registers + /*! + \note The size of this array is returned by getArgRegsSize(). + */ + static const unsigned *getArgRegs(); + + //! Return the size of the argument passing register array + static const unsigned getNumArgRegs(); + + //! Get DWARF debugging register number + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td new file mode 100644 index 00000000000..f7156a5d534 --- /dev/null +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -0,0 +1,393 @@ +//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by The Aerospace Corporation. No distribution rights +// yet determined... +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +class SPUReg : Register { + let Namespace = "SPU"; +} + +// The SPU's register are all 128-bits wide, which makes specifying the +// registers relatively easy, if relatively mundane: + +class SPUVecReg num, string n> : SPUReg { + field bits<7> Num = num; +} + +def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>; +def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>; +def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>; +def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>; +def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>; +def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>; +def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>; +def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>; +def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>; +def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>; +def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>; +def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>; +def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>; +def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>; +def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>; +def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>; +def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>; +def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>; +def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>; +def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>; +def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>; +def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>; +def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>; +def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>; +def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>; +def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>; +def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>; +def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>; +def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>; +def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>; +def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>; +def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>; +def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>; +def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>; +def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>; +def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>; +def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>; +def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>; +def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>; +def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>; +def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>; +def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>; +def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>; +def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>; +def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>; +def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>; +def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>; +def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>; +def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>; +def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>; +def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>; +def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>; +def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>; +def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>; +def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>; +def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>; +def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>; +def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>; +def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>; +def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>; +def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>; +def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>; +def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>; +def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>; +def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>; +def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>; +def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>; +def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>; +def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>; +def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>; +def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>; +def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>; +def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>; +def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>; +def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>; +def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>; +def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>; +def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>; +def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>; +def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>; +def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>; +def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>; +def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>; +def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>; +def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>; +def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>; +def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>; +def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>; +def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>; +def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>; +def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>; +def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>; +def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>; +def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>; +def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>; +def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>; +def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>; +def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>; +def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>; +def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>; +def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>; +def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>; +def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>; +def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>; +def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>; +def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>; +def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>; +def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>; +def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>; +def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>; +def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>; +def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>; +def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>; +def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>; +def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>; +def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>; +def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>; +def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>; +def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>; +def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>; +def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>; +def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>; +def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>; +def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>; +def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>; +def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>; +def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>; +def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>; + +/* Need floating point status register here: */ +/* def FPCSR : ... */ + +// The SPU's registers as 128-bit wide entities, and can function as general +// purpose registers, where the operands are in the "preferred slot": +def GPRC : RegisterClass<"SPU", [i128], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GPRCClass::iterator + GPRCClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + GPRCClass::iterator + GPRCClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 64-bit wide (double word integer) "preferred slot": +def R64C : RegisterClass<"SPU", [i64], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R64CClass::iterator + R64CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R64CClass::iterator + R64CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 64-bit wide (double word) FP "preferred slot": +def R64FP : RegisterClass<"SPU", [f64], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R64FPClass::iterator + R64FPClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R64FPClass::iterator + R64FPClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 32-bit wide (word) "preferred slot": +def R32C : RegisterClass<"SPU", [i32], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R32CClass::iterator + R32CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R32CClass::iterator + R32CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as single precision floating point "preferred slot": +def R32FP : RegisterClass<"SPU", [f32], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R32FPClass::iterator + R32FPClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R32FPClass::iterator + R32FPClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as 16-bit wide (halfword) "preferred slot": +def R16C : RegisterClass<"SPU", [i16], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + R16CClass::iterator + R16CClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + R16CClass::iterator + R16CClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} + +// The SPU's registers as vector registers: +def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, + [ + /* volatile register */ + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, + R77, R78, R79, + /* non-volatile register: take hint from PPC and allocate in reverse order */ + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, + R86, R85, R84, R83, R82, R81, R80, + /* environment ptr, SP, LR */ + R2, R1, R0 ]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VECREGClass::iterator + VECREGClass::allocation_order_begin(const MachineFunction &MF) const { + return begin(); + } + VECREGClass::iterator + VECREGClass::allocation_order_end(const MachineFunction &MF) const { + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) + } + }]; +} diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td new file mode 100644 index 00000000000..06e3d20e37f --- /dev/null +++ b/lib/Target/CellSPU/SPUSchedule.td @@ -0,0 +1,59 @@ +//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Even pipeline: + +def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000) +def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100) + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for Cell SPU +//===----------------------------------------------------------------------===// + +def LoadStore : InstrItinClass; // ODD_UNIT +def BranchHints : InstrItinClass; // ODD_UNIT +def BranchResolv : InstrItinClass; // ODD_UNIT +def ChanOpSPR : InstrItinClass; // ODD_UNIT +def ShuffleOp : InstrItinClass; // ODD_UNIT +def SelectOp : InstrItinClass; // ODD_UNIT +def GatherOp : InstrItinClass; // ODD_UNIT +def LoadNOP : InstrItinClass; // ODD_UNIT +def ExecNOP : InstrItinClass; // EVEN_UNIT +def SPrecFP : InstrItinClass; // EVEN_UNIT +def DPrecFP : InstrItinClass; // EVEN_UNIT +def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer) +def ByteOp : InstrItinClass; // EVEN_UNIT +def IntegerOp : InstrItinClass; // EVEN_UNIT +def IntegerMulDiv: InstrItinClass; // EVEN_UNIT +def RotateShift : InstrItinClass; // EVEN_UNIT +def ImmLoad : InstrItinClass; // EVEN_UNIT + +/* Note: The itinerary for the Cell SPU is somewhat contrived... */ +def SPUItineraries : ProcessorItineraries<[ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> + ]>; diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp new file mode 100644 index 00000000000..6786f7f8646 --- /dev/null +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -0,0 +1,42 @@ +//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CellSPU-specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "SPUSubtarget.h" +#include "SPU.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "SPUGenSubtarget.inc" + +using namespace llvm; + +SPUSubtarget::SPUSubtarget(const TargetMachine &tm, const Module &M, + const std::string &FS) : + TM(tm), + StackAlignment(16), + ProcDirective(SPU::DEFAULT_PROC), + UseLargeMem(false) +{ + // Should be the target SPU processor type. For now, since there's only + // one, simply default to the current "v0" default: + std::string default_cpu("v0"); + + // Parse features string. + ParseSubtargetFeatures(FS, default_cpu); +} + +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void SPUSubtarget::SetJITMode() { +} diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h new file mode 100644 index 00000000000..d0172b1770e --- /dev/null +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -0,0 +1,95 @@ +//=====-- SPUSubtarget.h - Define Subtarget for the Cell SPU -----*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Cell SPU-specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPCSUBTARGET_H +#define POWERPCSUBTARGET_H + +#include "llvm/Target/TargetInstrItineraries.h" +#include "llvm/Target/TargetSubtarget.h" + +#include + +namespace llvm { + class Module; + class GlobalValue; + class TargetMachine; + + namespace SPU { + enum { + DEFAULT_PROC + }; + } + + class SPUSubtarget : public TargetSubtarget { + protected: + const TargetMachine &TM; + + /// stackAlignment - The minimum alignment known to hold of the stack frame + /// on entry to the function and which must be maintained by every function. + unsigned StackAlignment; + + /// Selected instruction itineraries (one entry per itinerary class.) + InstrItineraryData InstrItins; + + /// Which SPU processor (this isn't really used, but it's there to keep + /// the C compiler happy) + unsigned ProcDirective; + + /// Use (assume) large memory -- effectively disables the LQA/STQA + /// instructions that assume 259K local store. + bool UseLargeMem; + + public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + SPUSubtarget(const TargetMachine &TM, const Module &M, + const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + + /// SetJITMode - This is called to inform the subtarget info that we are + /// producing code for the JIT. + void SetJITMode(); + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by + /// every function for this subtarget. + unsigned getStackAlignment() const { return StackAlignment; } + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { + return InstrItins; + } + + /// Use large memory addressing predicate + bool usingLargeMem() const { + return UseLargeMem; + } + + /// getTargetDataString - Return the pointer size and type alignment + /// properties of this subtarget. + const char *getTargetDataString() const { + return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" + "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v128:128:128" + "-s:128:128"; + } + }; +} // End llvm namespace + +#endif diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp new file mode 100644 index 00000000000..027d73e27be --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp @@ -0,0 +1,56 @@ +//===-- SPUTargetAsmInfo.cpp - Cell SPU asm properties ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SPUTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SPUTargetAsmInfo.h" +#include "SPUTargetMachine.h" +#include "llvm/Function.h" +using namespace llvm; + +SPUTargetAsmInfo::SPUTargetAsmInfo(const SPUTargetMachine &TM) { + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + ZeroDirective = "\t.space\t"; + SetDirective = "\t.set"; + Data64bitsDirective = "\t.quad\t"; + AlignmentIsInBytes = false; + SwitchToSectionDirective = "\t.section\t"; + ConstantPoolSection = "\t.const\t"; + JumpTableDataSection = ".const"; + CStringSection = "\t.cstring"; + LCOMMDirective = "\t.lcomm\t"; + StaticCtorsSection = ".mod_init_func"; + StaticDtorsSection = ".mod_term_func"; + FourByteConstantSection = ".const"; + SixteenByteConstantSection = "\t.section\t.rodata.cst16,\"aM\",@progbits,16"; + UsedDirective = "\t.no_dead_strip\t"; + WeakRefDirective = "\t.weak_reference\t"; + InlineAsmStart = "# InlineAsm Start"; + InlineAsmEnd = "# InlineAsm End"; + + NeedsSet = true; + /* FIXME: Need actual assembler syntax for DWARF info: */ + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; + DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; + DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; + DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; + DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; + DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; + DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; + DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; +} diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.h b/lib/Target/CellSPU/SPUTargetAsmInfo.h new file mode 100644 index 00000000000..868abfbbdb3 --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.h @@ -0,0 +1,32 @@ +//=====-- SPUTargetAsmInfo.h - Cell SPU asm properties --------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SPUTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCTARGETASMINFO_H +#define PPCTARGETASMINFO_H + +#include "llvm/Target/TargetAsmInfo.h" + +namespace llvm { + + // Forward declaration. + class SPUTargetMachine; + + struct SPUTargetAsmInfo : public TargetAsmInfo { + SPUTargetAsmInfo(const SPUTargetMachine &TM); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp new file mode 100644 index 00000000000..068fd00348c --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -0,0 +1,87 @@ +//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the Cell SPU target. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPURegisterNames.h" +#include "SPUTargetAsmInfo.h" +#include "SPUTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachineRegistry.h" + +using namespace llvm; + +namespace { + // Register the targets + RegisterTarget + CELLSPU("cellspu", " STI CBEA Cell SPU"); +} + +const std::pair * +SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const { + NumEntries = 1; + return &LR[0]; +} + +const TargetAsmInfo * +SPUTargetMachine::createTargetAsmInfo() const +{ + return new SPUTargetAsmInfo(*this); +} + +unsigned +SPUTargetMachine::getModuleMatchQuality(const Module &M) +{ + // We strongly match "spu-*" or "cellspu-*". + std::string TT = M.getTargetTriple(); + if ((TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "spu") + || (TT.size() == 7 && std::string(TT.begin(), TT.begin()+7) == "cellspu") + || (TT.size() >= 4 && std::string(TT.begin(), TT.begin()+4) == "spu-") + || (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "cellspu-")) + return 20; + + return 0; // No match at all... +} + +SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS) + : Subtarget(*this, M, FS), + DataLayout(Subtarget.getTargetDataString()), + InstrInfo(*this), + FrameInfo(*this), + TLInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) +{ + // For the time being, use static relocations, since there's really no + // support for PIC yet. + setRelocationModel(Reloc::Static); +} + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +bool +SPUTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) +{ + // Install an instruction selector. + PM.add(createSPUISelDag(*this)); + return false; +} + +bool SPUTargetMachine::addAssemblyEmitter(FunctionPassManager &PM, bool Fast, + std::ostream &Out) { + PM.add(createSPUAsmPrinterPass(Out, *this)); + return false; +} diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h new file mode 100644 index 00000000000..53a00eaf149 --- /dev/null +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -0,0 +1,95 @@ +//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by a team from the Computer Systems Research +// Department at The Aerospace Corporation. +// +// See README.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the CellSPU-specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_TARGETMACHINE_H +#define SPU_TARGETMACHINE_H + +#include "SPUSubtarget.h" +#include "SPUInstrInfo.h" +#include "SPUISelLowering.h" +#include "SPUFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { +class PassManager; +class GlobalValue; +class TargetFrameInfo; + +/// SPUTargetMachine +/// +class SPUTargetMachine : public LLVMTargetMachine { + SPUSubtarget Subtarget; + const TargetData DataLayout; + SPUInstrInfo InstrInfo; + SPUFrameInfo FrameInfo; + SPUTargetLowering TLInfo; + InstrItineraryData InstrItins; + +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; + +public: + SPUTargetMachine(const Module &M, const std::string &FS); + + /// Return the subtarget implementation object + virtual const SPUSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + virtual const SPUInstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual const TargetFrameInfo *getFrameInfo() const { + return &FrameInfo; + } + /*! + \note Cell SPU does not support JIT today. It could support JIT at some + point. + */ + virtual TargetJITInfo *getJITInfo() { + return NULL; + } + + //! Module match function + /*! + Module matching function called by TargetMachineRegistry(). + */ + static unsigned getModuleMatchQuality(const Module &M); + + virtual SPUTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + virtual const MRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const TargetData *getTargetData() const { + return &DataLayout; + } + + virtual const InstrItineraryData getInstrItineraryData() const { + return InstrItins; + } + + // Pass Pipeline Configuration + virtual bool addInstSelector(FunctionPassManager &PM, bool Fast); + virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast, + std::ostream &Out); +}; + +} // end namespace llvm + +#endif -- 2.11.0