X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCISelLowering.h;h=97422c6eda36060eab2df96629e35810b4179403;hb=46ff7007477a327809ca1172eee18ab5124f930f;hp=bdd658a841398f1b31ad4220d4c37770e44333fe;hpb=d0e875cdad4ad13facadf6d1219bde72527158c9;p=android-x86%2Fexternal-llvm.git diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index bdd658a8413..97422c6eda3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -1,9 +1,8 @@ //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,15 +14,35 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H -#include "PPC.h" #include "PPCInstrInfo.h" -#include "PPCRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/MachineValueType.h" +#include namespace llvm { + namespace PPCISD { + + // When adding a NEW PPCISD node please add it to the correct position in + // the enum. The order of elements in this enum matters! + // Values that are added after this entry: + // STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE + // are considered memory opcodes and are treated differently than entries + // that come before it. For example, ADD or MUL should be placed before + // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come + // after it. enum NodeType : unsigned { // Start the numbering where the builtin ops and target ops leave off. FIRST_NUMBER = ISD::BUILTIN_OP_END, @@ -47,13 +66,20 @@ namespace llvm { FCTIDZ, FCTIWZ, /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for - /// unsigned integers. + /// unsigned integers with round toward zero. FCTIDUZ, FCTIWUZ, + /// Floating-point-to-interger conversion instructions + FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR, + /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. VEXTS, + /// SExtVElems, takes an input vector of a smaller type and sign + /// extends to an output vector of a larger type. + SExtVElems, + /// Reciprocal estimate instructions (unary FP ops). FRE, FRSQRTE, @@ -69,14 +95,22 @@ namespace llvm { /// XXSPLT, - /// XXINSERT - The PPC VSX insert instruction + /// VECINSERT - The PPC vector insert instruction + /// + VECINSERT, + + /// XXREVERSE - The PPC VSX reverse instruction /// - XXINSERT, + XXREVERSE, - /// VECSHL - The PPC VSX shift left instruction + /// VECSHL - The PPC vector shift left instruction /// VECSHL, + /// XXPERMDI - The PPC XXPERMDI instruction + /// + XXPERMDI, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -104,11 +138,19 @@ namespace llvm { /// at function entry, used for PIC code. GlobalBaseReg, - /// These nodes represent the 32-bit PPC shifts that operate on 6-bit - /// shift amounts. These nodes are generated by the multi-precision shift - /// code. + /// These nodes represent PPC shifts. + /// + /// For scalar types, only the last `n + 1` bits of the shift amounts + /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. + /// for exact behaviors. + /// + /// For vector types, only the last n bits are used. See vsld. SRL, SRA, SHL, + /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign + /// word and shift left immediate. + EXTSWSLI, + /// The combination of sra[wd]i and addze used to implemented signed /// integer division by a power of 2. The first operand is the dividend, /// and the second is the constant shift amount (representing the @@ -117,7 +159,7 @@ namespace llvm { /// CALL - A direct function call. /// CALL_NOP is a call with the special NOP which follows 64-bit - /// SVR4 calls. + /// SVR4 calls and 32-bit/64-bit AIX calls. CALL, CALL_NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a @@ -149,6 +191,18 @@ namespace llvm { /// Direct move from a GPR to a VSX register (zero) MTVSRZ, + /// Direct move of 2 consecutive GPR to a VSX register. + BUILD_FP128, + + /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and + /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is + /// unsupported for this target. + /// Merge 2 GPRs to a single SPE register. + BUILD_SPE64, + + /// Extract SPE register component, second argument is high or low. + EXTRACT_SPE, + /// Extract a subvector from signed integer vector and convert to FP. /// It is primarily used to convert a (widened) illegal integer vector /// type to a legal floating point vector type. @@ -218,14 +272,14 @@ namespace llvm { CR6UNSET, /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS - /// on PPC32. + /// for non-position independent code on PPC32. PPC32_GOT, /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and - /// local dynamic TLS on PPC32. + /// local dynamic TLS and position indendepent code on PPC32. PPC32_PICGOT, - /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec + /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. ADDIS_GOT_TPREL_HA, @@ -244,18 +298,18 @@ namespace llvm { /// TLS sequence. ADD_TLS, - /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS + /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, - /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by /// ADDIS_TLSGD_L_ADDR until after register assignment. ADDI_TLSGD_L, - /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by /// ADDIS_TLSGD_L_ADDR until after register assignment. GET_TLS_ADDR, @@ -265,18 +319,18 @@ namespace llvm { /// register assignment. ADDI_TLSGD_L_ADDR, - /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS + /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, - /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by /// ADDIS_TLSLD_L_ADDR until after register assignment. ADDI_TLSLD_L, - /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by /// ADDIS_TLSLD_L_ADDR until after register assignment. GET_TLSLD_ADDR, @@ -286,7 +340,7 @@ namespace llvm { /// following register assignment. ADDI_TLSLD_L_ADDR, - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS + /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds X3 to /// sym\@dtprel\@ha. ADDIS_DTPREL_HA, @@ -326,6 +380,21 @@ namespace llvm { /// An SDNode for swaps that are not associated with any loads/stores /// and thereby have no chain. SWAP_NO_CHAIN, + + /// An SDNode for Power9 vector absolute value difference. + /// operand #0 vector + /// operand #1 vector + /// operand #2 constant i32 0 or 1, to indicate whether needs to patch + /// the most significant bit for signed i32 + /// + /// Power9 VABSD* instructions are designed to support unsigned integer + /// vectors (byte/halfword/word), if we want to make use of them for signed + /// integer vectors, we have to flip their sign bits first. To flip sign bit + /// for byte/halfword integer vector would become inefficient, but for word + /// integer vector, we can leverage XVNEGSP to make it efficiently. eg: + /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) + /// => VABSDUW((XVNEGSP a), (XVNEGSP b)) + VABSD, /// QVFPERM = This corresponds to the QPX qvfperm instruction. QVFPERM, @@ -343,6 +412,9 @@ namespace llvm { /// representation. QBFLT, + /// Custom extend v4f32 to v2f64. + FP_EXTEND_LH, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -384,24 +456,38 @@ namespace llvm { /// an xxswapd. LXVD2X, + /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a + /// v2f32 value into the lower half of a VSR register. + LD_VSX_LH, + /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. STXVD2X, + /// Store scalar integers from VSR. + ST_VSR_SCAL_INT, + /// QBRC, CHAIN = QVLFSb CHAIN, Ptr /// The 4xf32 load used for v4i1 constants. QVLFSb, + /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes + /// except they ensure that the compare input is zero-extended for + /// sub-word versions because the atomic loads zero-extend. + ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16, + /// GPRC = TOC_ENTRY GA, TOC /// Loads the entry for GA from the TOC, where the TOC base is given by /// the last operand. TOC_ENTRY }; - } + + } // end namespace PPCISD /// Define some predicates that are used for node matching. namespace PPC { + /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, @@ -431,7 +517,32 @@ namespace llvm { /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); - + /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXSLDWI instruction. + bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + + /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRH instruction. + bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRW instruction. + bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRD instruction. + bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRQ instruction. + bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); + + /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXPERMDI instruction. + bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, @@ -465,7 +576,8 @@ namespace llvm { /// If this is a qvaligni shuffle mask, return the shift /// amount, otherwise return -1. int isQVALIGNIShuffleMask(SDNode *N); - } + + } // end namespace PPC class PPCTargetLowering : public TargetLowering { const PPCSubtarget &Subtarget; @@ -478,6 +590,11 @@ namespace llvm { /// DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + bool isSelectSupported(SelectSupportKind Kind) const override { + // PowerPC does not support scalar condition selects on vectors. + return (Kind != SelectSupportKind::ScalarCondVectorVal); + } + /// getPreferredVectorAction - The code we generate when vector types are /// legalized by promoting the integer element type is often much worse /// than code we generate if we widen the type for applicable vector types. @@ -486,14 +603,17 @@ namespace llvm { /// of v4i8's and shuffle them. This will turn into a mess of 8 extending /// loads, moves back into VSR's (or memory ops if we don't have moves) and /// then the VPERM for the shuffle. All in all a very slow sequence. - TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { if (VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } + bool useSoftFloat() const override; + bool hasSPE() const; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } @@ -514,10 +634,16 @@ namespace llvm { return true; } + bool preferIncOfAddToSubOfNot(EVT VT) const override; + + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + bool supportSplitCSR(MachineFunction *MF) const override { return - MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction().hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; @@ -543,18 +669,27 @@ namespace llvm { ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; + /// SelectAddressEVXRegReg - Given the specified addressed, check to see if + /// it can be more efficiently represented as [r+imm]. + bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + /// SelectAddressRegReg - Given the specified addressed, check to see if it - /// can be represented as an indexed [r+r] operation. Returns false if it - /// can be more efficiently represented with [r+imm]. + /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment + /// is non-zero, only accept displacement which is not suitable for [r+imm]. + /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, - SelectionDAG &DAG) const; + SelectionDAG &DAG, + unsigned EncodingAlignment = 0) const; /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it - /// is not better represented as reg+reg. If Aligned is true, only accept - /// displacements suitable for STD and friends, i.e. multiples of 4. + /// is not better represented as reg+reg. If \p EncodingAlignment is + /// non-zero, only accept displacements suitable for instruction encoding + /// requirement, i.e. multiples of 4 for DS form. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG, bool Aligned) const; + SelectionDAG &DAG, + unsigned EncodingAlignment) const; /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. @@ -579,14 +714,14 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector *Created) const override; + SmallVectorImpl &Created) const override; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -596,10 +731,10 @@ namespace llvm { return true; } - Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; - Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; + Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, @@ -665,7 +800,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can @@ -687,17 +823,25 @@ namespace llvm { bool isZExtFree(SDValue Val, EVT VT2) const override; - bool isFPExtFree(EVT VT) const override; + bool isFPExtFree(EVT DestVT, EVT SrcVT) const override; - /// \brief Returns true if it is beneficial to convert a load of a constant + /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + bool convertSelectOfConstantsToMath(EVT VT) const override { + return true; + } + + // Returns true if the address of the global is stored in TOC entry. + bool isAccessedAsGotIndirect(SDValue N) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const override; /// getOptimalMemOpType - Returns the target specific optimal type for load @@ -714,14 +858,14 @@ namespace llvm { EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const override; + const AttributeList &FuncAttributes) const override; /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - bool allowsMisalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - unsigned Align = 1, - bool *Fast = nullptr) const override; + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be @@ -741,7 +885,7 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override; - /// \brief Returns true if an argument of type Ty needs to be passed in a + /// Returns true if an argument of type Ty needs to be passed in a /// contiguous block of registers in calling convention CallConv. bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override { @@ -769,21 +913,30 @@ namespace llvm { bool useLoadStackGuardNode() const override; void insertSSPDeclarations(Module &M) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + unsigned getJumpTableEncoding() const override; + bool isJumpTableRelative() const override; + SDValue getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const override; + const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI, + MCContext &Ctx) const override; + private: struct ReuseLoadInfo { SDValue Ptr; SDValue Chain; SDValue ResChain; MachinePointerInfo MPI; - bool IsDereferenceable; - bool IsInvariant; - unsigned Alignment; + bool IsDereferenceable = false; + bool IsInvariant = false; + unsigned Alignment = 0; AAMDNodes AAInfo; - const MDNode *Ranges; + const MDNode *Ranges = nullptr; - ReuseLoadInfo() - : IsDereferenceable(false), IsInvariant(false), Alignment(0), - Ranges(nullptr) {} + ReuseLoadInfo() = default; MachineMemOperand::Flags MMOFlags() const { MachineMemOperand::Flags F = MachineMemOperand::MONone; @@ -795,6 +948,11 @@ namespace llvm { } }; + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } + bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; @@ -805,9 +963,16 @@ namespace llvm { SelectionDAG &DAG, const SDLoc &dl) const; SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const; + + bool directMoveIsProfitable(const SDValue &Op) const; SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const; + SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, + const SDLoc &dl) const; + + SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const; + SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; @@ -822,7 +987,7 @@ namespace llvm { IsEligibleForTailCallOptimization_64SVR4( SDValue Callee, CallingConv::ID CalleeCC, - ImmutableCallSite *CS, + ImmutableCallSite CS, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, @@ -866,9 +1031,15 @@ namespace llvm { SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; @@ -886,7 +1057,7 @@ namespace llvm { SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, - ImmutableCallSite *CS) const; + ImmutableCallSite CS) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -894,15 +1065,13 @@ namespace llvm { const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; - SDValue - LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; - bool - CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, @@ -939,7 +1108,7 @@ namespace llvm { const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, - ImmutableCallSite *CS) const; + ImmutableCallSite CS) const; SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, @@ -948,7 +1117,7 @@ namespace llvm { const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, - ImmutableCallSite *CS) const; + ImmutableCallSite CS) const; SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, @@ -957,54 +1126,81 @@ namespace llvm { const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, - ImmutableCallSite *CS) const; + ImmutableCallSite CS) const; + SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, + ImmutableCallSite CS) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; - - SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps, - bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; + + /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces + /// SETCC with integer subtraction when (1) there is a legal way of doing it + /// (2) keeping the result of comparison in GPR has performance benefit. + SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const; + + SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps, bool &UseOneConstNR, + bool Reciprocal) const override; + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; unsigned combineRepeatedFPDivisors() const override; - CCAssignFn *useFastISelCCs(unsigned Flag) const; - }; + SDValue + combineElementTruncationToVectorTruncation(SDNode *N, + DAGCombinerInfo &DCI) const; + + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be + /// handled by the VINSERTH instruction introduced in ISA 3.0. This is + /// essentially any shuffle of v8i16 vectors that just inserts one element + /// from one vector into the other. + SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + + /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be + /// handled by the VINSERTB instruction introduced in ISA 3.0. This is + /// essentially v16i8 vector version of VINSERTH. + SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + + // Return whether the call instruction can potentially be optimized to a + // tail call. This will cause the optimizers to attempt to move, or + // duplicate return instructions to help enable tail call optimizations. + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + bool hasBitPreservingFPLogic(EVT VT) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + }; // end class PPCTargetLowering namespace PPC { + FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo); - } - - bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - - bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - - bool - CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - - bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -} - -#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H + + } // end namespace PPC + + bool isIntS16Immediate(SDNode *N, int16_t &Imm); + bool isIntS16Immediate(SDValue Op, int16_t &Imm); + +} // end namespace llvm + +#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H