PowerPC/SPE: Fix load/store handling for SPE

[android-x86/external-llvm.git] / lib / Target / PowerPC / PPCISelLowering.h
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index bdd658a..97422c6 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -1,9 +1,8 @@
  //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
  //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  //
  //===----------------------------------------------------------------------===//
  //
@@ -15,15 +14,35 @@
  #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
  #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
  
-#include "PPC.h"
  #include "PPCInstrInfo.h"
-#include "PPCRegisterInfo.h"
  #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
  #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/MachineValueType.h"
+#include <utility>
  
  namespace llvm {
+
    namespace PPCISD {
+
+    // When adding a NEW PPCISD node please add it to the correct position in
+    // the enum. The order of elements in this enum matters!
+    // Values that are added after this entry:
+    //     STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
+    // are considered memory opcodes and are treated differently than entries
+    // that come before it. For example, ADD or MUL should be placed before
+    // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
+    // after it.
      enum NodeType : unsigned {
        // Start the numbering where the builtin ops and target ops leave off.
        FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -47,13 +66,20 @@ namespace llvm {
        FCTIDZ, FCTIWZ,
  
        /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
-      /// unsigned integers.
+      /// unsigned integers with round toward zero.
        FCTIDUZ, FCTIWUZ,
  
+      /// Floating-point-to-interger conversion instructions
+      FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR,
+
        /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
        /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
        VEXTS,
  
+      /// SExtVElems, takes an input vector of a smaller type and sign
+      /// extends to an output vector of a larger type.
+      SExtVElems,
+
        /// Reciprocal estimate instructions (unary FP ops).
        FRE, FRSQRTE,
  
@@ -69,14 +95,22 @@ namespace llvm {
        ///
        XXSPLT,
  
-      /// XXINSERT - The PPC VSX insert instruction
+      /// VECINSERT - The PPC vector insert instruction
+      ///
+      VECINSERT,
+
+      /// XXREVERSE - The PPC VSX reverse instruction
        ///
-      XXINSERT,
+      XXREVERSE,
  
-      /// VECSHL - The PPC VSX shift left instruction
+      /// VECSHL - The PPC vector shift left instruction
        ///
        VECSHL,
  
+      /// XXPERMDI - The PPC XXPERMDI instruction
+      ///
+      XXPERMDI,
+
        /// The CMPB instruction (takes two operands of i32 or i64).
        CMPB,
  
@@ -104,11 +138,19 @@ namespace llvm {
        /// at function entry, used for PIC code.
        GlobalBaseReg,
  
-      /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
-      /// shift amounts.  These nodes are generated by the multi-precision shift
-      /// code.
+      /// These nodes represent PPC shifts.
+      ///
+      /// For scalar types, only the last `n + 1` bits of the shift amounts
+      /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
+      /// for exact behaviors.
+      ///
+      /// For vector types, only the last n bits are used. See vsld.
        SRL, SRA, SHL,
  
+      /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+      /// word and shift left immediate.
+      EXTSWSLI,
+
        /// The combination of sra[wd]i and addze used to implemented signed
        /// integer division by a power of 2. The first operand is the dividend,
        /// and the second is the constant shift amount (representing the
@@ -117,7 +159,7 @@ namespace llvm {
  
        /// CALL - A direct function call.
        /// CALL_NOP is a call with the special NOP which follows 64-bit
-      /// SVR4 calls.
+      /// SVR4 calls and 32-bit/64-bit AIX calls.
        CALL, CALL_NOP,
  
        /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
@@ -149,6 +191,18 @@ namespace llvm {
        /// Direct move from a GPR to a VSX register (zero)
        MTVSRZ,
  
+      /// Direct move of 2 consecutive GPR to a VSX register.
+      BUILD_FP128,
+
+      /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+      /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+      /// unsupported for this target.
+      /// Merge 2 GPRs to a single SPE register.
+      BUILD_SPE64,
+
+      /// Extract SPE register component, second argument is high or low.
+      EXTRACT_SPE,
+
        /// Extract a subvector from signed integer vector and convert to FP.
        /// It is primarily used to convert a (widened) illegal integer vector
        /// type to a legal floating point vector type.
@@ -218,14 +272,14 @@ namespace llvm {
        CR6UNSET,
  
        /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
-      /// on PPC32.
+      /// for non-position independent code on PPC32.
        PPC32_GOT,
  
        /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
-      /// local dynamic TLS on PPC32.
+      /// local dynamic TLS and position indendepent code on PPC32.
        PPC32_PICGOT,
  
-      /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+      /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
        /// TLS model, produces an ADDIS8 instruction that adds the GOT
        /// base to sym\@got\@tprel\@ha.
        ADDIS_GOT_TPREL_HA,
@@ -244,18 +298,18 @@ namespace llvm {
        /// TLS sequence.
        ADD_TLS,
  
-      /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+      /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
        /// model, produces an ADDIS8 instruction that adds the GOT base
        /// register to sym\@got\@tlsgd\@ha.
        ADDIS_TLSGD_HA,
  
-      /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+      /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
        /// model, produces an ADDI8 instruction that adds G8RReg to
        /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
        /// ADDIS_TLSGD_L_ADDR until after register assignment.
        ADDI_TLSGD_L,
  
-      /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+      /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
        /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
        /// ADDIS_TLSGD_L_ADDR until after register assignment.
        GET_TLS_ADDR,
@@ -265,18 +319,18 @@ namespace llvm {
        /// register assignment.
        ADDI_TLSGD_L_ADDR,
  
-      /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+      /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
        /// model, produces an ADDIS8 instruction that adds the GOT base
        /// register to sym\@got\@tlsld\@ha.
        ADDIS_TLSLD_HA,
  
-      /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+      /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
        /// model, produces an ADDI8 instruction that adds G8RReg to
        /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
        /// ADDIS_TLSLD_L_ADDR until after register assignment.
        ADDI_TLSLD_L,
  
-      /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+      /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
        /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
        /// ADDIS_TLSLD_L_ADDR until after register assignment.
        GET_TLSLD_ADDR,
@@ -286,7 +340,7 @@ namespace llvm {
        /// following register assignment.
        ADDI_TLSLD_L_ADDR,
  
-      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
+      /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
        /// model, produces an ADDIS8 instruction that adds X3 to
        /// sym\@dtprel\@ha.
        ADDIS_DTPREL_HA,
@@ -326,6 +380,21 @@ namespace llvm {
        /// An SDNode for swaps that are not associated with any loads/stores
        /// and thereby have no chain.
        SWAP_NO_CHAIN,
+      
+      /// An SDNode for Power9 vector absolute value difference.
+      /// operand #0 vector
+      /// operand #1 vector
+      /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+      /// the most significant bit for signed i32
+      ///
+      /// Power9 VABSD* instructions are designed to support unsigned integer
+      /// vectors (byte/halfword/word), if we want to make use of them for signed
+      /// integer vectors, we have to flip their sign bits first. To flip sign bit
+      /// for byte/halfword integer vector would become inefficient, but for word
+      /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+      /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) 
+      ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
+      VABSD,
  
        /// QVFPERM = This corresponds to the QPX qvfperm instruction.
        QVFPERM,
@@ -343,6 +412,9 @@ namespace llvm {
        /// representation.
        QBFLT,
  
+      /// Custom extend v4f32 to v2f64.
+      FP_EXTEND_LH,
+
        /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
        /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
        /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -384,24 +456,38 @@ namespace llvm {
        /// an xxswapd.
        LXVD2X,
  
+      /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+      /// v2f32 value into the lower half of a VSR register.
+      LD_VSX_LH,
+
        /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
        /// Maps directly to an stxvd2x instruction that will be preceded by
        /// an xxswapd.
        STXVD2X,
  
+      /// Store scalar integers from VSR.
+      ST_VSR_SCAL_INT,
+
        /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
        /// The 4xf32 load used for v4i1 constants.
        QVLFSb,
  
+      /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+      /// except they ensure that the compare input is zero-extended for
+      /// sub-word versions because the atomic loads zero-extend.
+      ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
+
        /// GPRC = TOC_ENTRY GA, TOC
        /// Loads the entry for GA from the TOC, where the TOC base is given by
        /// the last operand.
        TOC_ENTRY
      };
-  }
+
+  } // end namespace PPCISD
  
    /// Define some predicates that are used for node matching.
    namespace PPC {
+
      /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
      /// VPKUHUM instruction.
      bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
@@ -431,7 +517,32 @@ namespace llvm {
      /// a VMRGEW or VMRGOW instruction
      bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
                               unsigned ShuffleKind, SelectionDAG &DAG);
-  
+    /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
+    /// for a XXSLDWI instruction.
+    bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                              bool &Swap, bool IsLE);
+
+    /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
+    /// for a XXBRH instruction.
+    bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
+
+    /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
+    /// for a XXBRW instruction.
+    bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
+
+    /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
+    /// for a XXBRD instruction.
+    bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
+
+    /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
+    /// for a XXBRQ instruction.
+    bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
+
+    /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
+    /// for a XXPERMDI instruction.
+    bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                              bool &Swap, bool IsLE);
+
      /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
      /// shift amount, otherwise return -1.
      int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
@@ -465,7 +576,8 @@ namespace llvm {
      /// If this is a qvaligni shuffle mask, return the shift
      /// amount, otherwise return -1.
      int isQVALIGNIShuffleMask(SDNode *N);
-  }
+
+  } // end namespace PPC
  
    class PPCTargetLowering : public TargetLowering {
      const PPCSubtarget &Subtarget;
@@ -478,6 +590,11 @@ namespace llvm {
      /// DAG node.
      const char *getTargetNodeName(unsigned Opcode) const override;
  
+    bool isSelectSupported(SelectSupportKind Kind) const override {
+      // PowerPC does not support scalar condition selects on vectors.
+      return (Kind != SelectSupportKind::ScalarCondVectorVal);
+    }
+
      /// getPreferredVectorAction - The code we generate when vector types are
      /// legalized by promoting the integer element type is often much worse
      /// than code we generate if we widen the type for applicable vector types.
@@ -486,14 +603,17 @@ namespace llvm {
      /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
      /// loads, moves back into VSR's (or memory ops if we don't have moves) and
      /// then the VPERM for the shuffle. All in all a very slow sequence.
-    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
        const override {
        if (VT.getScalarSizeInBits() % 8 == 0)
          return TypeWidenVector;
        return TargetLoweringBase::getPreferredVectorAction(VT);
      }
+
      bool useSoftFloat() const override;
  
+    bool hasSPE() const;
+
      MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
        return MVT::i32;
      }
@@ -514,10 +634,16 @@ namespace llvm {
        return true;
      }
  
+    bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
+    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+      return VT.isScalarInteger();
+    }
+
      bool supportSplitCSR(MachineFunction *MF) const override {
        return
-        MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
-        MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+        MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
+        MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
      }
  
      void initializeSplitCSR(MachineBasicBlock *Entry) const override;
@@ -543,18 +669,27 @@ namespace llvm {
                                     ISD::MemIndexedMode &AM,
                                     SelectionDAG &DAG) const override;
  
+    /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+    /// it can be more efficiently represented as [r+imm].
+    bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                                SelectionDAG &DAG) const;
+
      /// SelectAddressRegReg - Given the specified addressed, check to see if it
-    /// can be represented as an indexed [r+r] operation.  Returns false if it
-    /// can be more efficiently represented with [r+imm].
+    /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
+    /// is non-zero, only accept displacement which is not suitable for [r+imm].
+    /// Returns false if it can be represented by [r+imm], which are preferred.
      bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
-                             SelectionDAG &DAG) const;
+                             SelectionDAG &DAG,
+                             unsigned EncodingAlignment = 0) const;
  
      /// SelectAddressRegImm - Returns true if the address N can be represented
      /// by a base register plus a signed 16-bit displacement [r+imm], and if it
-    /// is not better represented as reg+reg.  If Aligned is true, only accept
-    /// displacements suitable for STD and friends, i.e. multiples of 4.
+    /// is not better represented as reg+reg. If \p EncodingAlignment is
+    /// non-zero, only accept displacements suitable for instruction encoding
+    /// requirement, i.e. multiples of 4 for DS form.
      bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
-                             SelectionDAG &DAG, bool Aligned) const;
+                             SelectionDAG &DAG,
+                             unsigned EncodingAlignment) const;
  
      /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
      /// represented as an indexed [r+r] operation.
@@ -579,14 +714,14 @@ namespace llvm {
      SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
  
      SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
-                          std::vector<SDNode *> *Created) const override;
+                          SmallVectorImpl<SDNode *> &Created) const override;
  
      unsigned getRegisterByName(const char* RegName, EVT VT,
                                 SelectionDAG &DAG) const override;
  
      void computeKnownBitsForTargetNode(const SDValue Op,
-                                       APInt &KnownZero,
-                                       APInt &KnownOne,
+                                       KnownBits &Known,
+                                       const APInt &DemandedElts,
                                         const SelectionDAG &DAG,
                                         unsigned Depth = 0) const override;
  
@@ -596,10 +731,10 @@ namespace llvm {
        return true;
      }
  
-    Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
-                                  bool IsStore, bool IsLoad) const override;
-    Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
-                                   bool IsStore, bool IsLoad) const override;
+    Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+                                  AtomicOrdering Ord) const override;
+    Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+                                   AtomicOrdering Ord) const override;
  
      MachineBasicBlock *
      EmitInstrWithCustomInserter(MachineInstr &MI,
@@ -665,7 +800,8 @@ namespace llvm {
      /// isLegalAddressingMode - Return true if the addressing mode represented
      /// by AM is legal for this target, for a load/store of the specified type.
      bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
-                               Type *Ty, unsigned AS) const override;
+                               Type *Ty, unsigned AS,
+                               Instruction *I = nullptr) const override;
  
      /// isLegalICmpImmediate - Return true if the specified immediate is legal
      /// icmp immediate, that is the target has icmp instructions which can
@@ -687,17 +823,25 @@ namespace llvm {
  
      bool isZExtFree(SDValue Val, EVT VT2) const override;
  
-    bool isFPExtFree(EVT VT) const override;
+    bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
  
-    /// \brief Returns true if it is beneficial to convert a load of a constant
+    /// Returns true if it is beneficial to convert a load of a constant
      /// to just the constant itself.
      bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                             Type *Ty) const override;
  
+    bool convertSelectOfConstantsToMath(EVT VT) const override {
+      return true;
+    }
+
+    // Returns true if the address of the global is stored in TOC entry.
+    bool isAccessedAsGotIndirect(SDValue N) const;
+
      bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
  
      bool getTgtMemIntrinsic(IntrinsicInfo &Info,
                              const CallInst &I,
+                            MachineFunction &MF,
                              unsigned Intrinsic) const override;
  
      /// getOptimalMemOpType - Returns the target specific optimal type for load
@@ -714,14 +858,14 @@ namespace llvm {
      EVT
      getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                        MachineFunction &MF) const override;
+                        const AttributeList &FuncAttributes) const override;
  
      /// Is unaligned memory access allowed for the given type, and is it fast
      /// relative to software emulation.
-    bool allowsMisalignedMemoryAccesses(EVT VT,
-                                        unsigned AddrSpace,
-                                        unsigned Align = 1,
-                                        bool *Fast = nullptr) const override;
+    bool allowsMisalignedMemoryAccesses(
+        EVT VT, unsigned AddrSpace, unsigned Align = 1,
+        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+        bool *Fast = nullptr) const override;
  
      /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
      /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -741,7 +885,7 @@ namespace llvm {
      FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
                               const TargetLibraryInfo *LibInfo) const override;
  
-    /// \brief Returns true if an argument of type Ty needs to be passed in a
+    /// Returns true if an argument of type Ty needs to be passed in a
      /// contiguous block of registers in calling convention CallConv.
      bool functionArgumentNeedsConsecutiveRegisters(
        Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
@@ -769,21 +913,30 @@ namespace llvm {
      bool useLoadStackGuardNode() const override;
      void insertSSPDeclarations(Module &M) const override;
  
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize) const override;
+
+    unsigned getJumpTableEncoding() const override;
+    bool isJumpTableRelative() const override;
+    SDValue getPICJumpTableRelocBase(SDValue Table,
+                                     SelectionDAG &DAG) const override;
+    const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                               unsigned JTI,
+                                               MCContext &Ctx) const override;
+
    private:
      struct ReuseLoadInfo {
        SDValue Ptr;
        SDValue Chain;
        SDValue ResChain;
        MachinePointerInfo MPI;
-      bool IsDereferenceable;
-      bool IsInvariant;
-      unsigned Alignment;
+      bool IsDereferenceable = false;
+      bool IsInvariant = false;
+      unsigned Alignment = 0;
        AAMDNodes AAInfo;
-      const MDNode *Ranges;
+      const MDNode *Ranges = nullptr;
  
-      ReuseLoadInfo()
-          : IsDereferenceable(false), IsInvariant(false), Alignment(0),
-            Ranges(nullptr) {}
+      ReuseLoadInfo() = default;
  
        MachineMemOperand::Flags MMOFlags() const {
          MachineMemOperand::Flags F = MachineMemOperand::MONone;
@@ -795,6 +948,11 @@ namespace llvm {
        }
      };
  
+    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+      // Addrspacecasts are always noops.
+      return true;
+    }
+
      bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
                               SelectionDAG &DAG,
                               ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
@@ -805,9 +963,16 @@ namespace llvm {
                                  SelectionDAG &DAG, const SDLoc &dl) const;
      SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
                                       const SDLoc &dl) const;
+
+    bool directMoveIsProfitable(const SDValue &Op) const;
      SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
                                       const SDLoc &dl) const;
  
+    SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+                                 const SDLoc &dl) const;
+
+    SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+
      SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
      SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
  
@@ -822,7 +987,7 @@ namespace llvm {
      IsEligibleForTailCallOptimization_64SVR4(
                                      SDValue Callee,
                                      CallingConv::ID CalleeCC,
-                                    ImmutableCallSite *CS,
+                                    ImmutableCallSite CS,
                                      bool isVarArg,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -866,9 +1031,15 @@ namespace llvm {
      SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
  
      SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -886,7 +1057,7 @@ namespace llvm {
                         SDValue &Callee, int SPDiff, unsigned NumBytes,
                         const SmallVectorImpl<ISD::InputArg> &Ins,
                         SmallVectorImpl<SDValue> &InVals,
-                       ImmutableCallSite *CS) const;
+                       ImmutableCallSite CS) const;
  
      SDValue
      LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -894,15 +1065,13 @@ namespace llvm {
                           const SDLoc &dl, SelectionDAG &DAG,
                           SmallVectorImpl<SDValue> &InVals) const override;
  
-    SDValue
-      LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                SmallVectorImpl<SDValue> &InVals) const override;
+    SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                      SmallVectorImpl<SDValue> &InVals) const override;
  
-    bool
-      CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
-                   bool isVarArg,
-                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                   LLVMContext &Context) const override;
+    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+                        bool isVarArg,
+                        const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        LLVMContext &Context) const override;
  
      SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
                          const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -939,7 +1108,7 @@ namespace llvm {
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               const SDLoc &dl, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &InVals,
-                             ImmutableCallSite *CS) const;
+                             ImmutableCallSite CS) const;
      SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                               CallingConv::ID CallConv, bool isVarArg,
                               bool isTailCall, bool isPatchPoint,
@@ -948,7 +1117,7 @@ namespace llvm {
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               const SDLoc &dl, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &InVals,
-                             ImmutableCallSite *CS) const;
+                             ImmutableCallSite CS) const;
      SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                               CallingConv::ID CallConv, bool isVarArg,
                               bool isTailCall, bool isPatchPoint,
@@ -957,54 +1126,81 @@ namespace llvm {
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               const SDLoc &dl, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &InVals,
-                             ImmutableCallSite *CS) const;
+                             ImmutableCallSite CS) const;
+    SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
+                          CallingConv::ID CallConv, bool isVarArg,
+                          bool isTailCall, bool isPatchPoint,
+                          const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          const SmallVectorImpl<SDValue> &OutVals,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          const SDLoc &dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals,
+                          ImmutableCallSite CS) const;
  
      SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
      SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
  
      SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
      SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
      SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
      SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
-
-    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                             unsigned &RefinementSteps,
-                             bool &UseOneConstNR) const override;
-    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                             unsigned &RefinementSteps) const override;
+    SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
+    /// SETCC with integer subtraction when (1) there is a legal way of doing it
+    /// (2) keeping the result of comparison in GPR has performance benefit.
+    SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                            int &RefinementSteps, bool &UseOneConstNR,
+                            bool Reciprocal) const override;
+    SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                             int &RefinementSteps) const override;
      unsigned combineRepeatedFPDivisors() const override;
  
-    CCAssignFn *useFastISelCCs(unsigned Flag) const;
-  };
+    SDValue
+    combineElementTruncationToVectorTruncation(SDNode *N,
+                                               DAGCombinerInfo &DCI) const;
+
+    /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
+    /// handled by the VINSERTH instruction introduced in ISA 3.0. This is
+    /// essentially any shuffle of v8i16 vectors that just inserts one element
+    /// from one vector into the other.
+    SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
+    /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
+    /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
+    /// essentially v16i8 vector version of VINSERTH.
+    SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
+    // Return whether the call instruction can potentially be optimized to a
+    // tail call. This will cause the optimizers to attempt to move, or
+    // duplicate return instructions to help enable tail call optimizations.
+    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+    bool hasBitPreservingFPLogic(EVT VT) const override;
+    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+  }; // end class PPCTargetLowering
  
    namespace PPC {
+
      FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
                               const TargetLibraryInfo *LibInfo);
-  }
-
-  bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                  CCValAssign::LocInfo &LocInfo,
-                                  ISD::ArgFlagsTy &ArgFlags,
-                                  CCState &State);
-
-  bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                         MVT &LocVT,
-                                         CCValAssign::LocInfo &LocInfo,
-                                         ISD::ArgFlagsTy &ArgFlags,
-                                         CCState &State);
-
-  bool 
-  CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
-                                                 MVT &LocVT,
-                                                 CCValAssign::LocInfo &LocInfo,
-                                                 ISD::ArgFlagsTy &ArgFlags,
-                                                 CCState &State);
-
-  bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                           MVT &LocVT,
-                                           CCValAssign::LocInfo &LocInfo,
-                                           ISD::ArgFlagsTy &ArgFlags,
-                                           CCState &State);
-}
-
-#endif   // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+  } // end namespace PPC
+
+  bool isIntS16Immediate(SDNode *N, int16_t &Imm);
+  bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H