lib/Target/Hexagon/HexagonISelLowering.cpp

   1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the interfaces that Hexagon uses to lower LLVM code
  11 // into a selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "HexagonISelLowering.h"
  16 #include "Hexagon.h"
  17 #include "HexagonMachineFunctionInfo.h"
  18 #include "HexagonRegisterInfo.h"
  19 #include "HexagonSubtarget.h"
  20 #include "HexagonTargetMachine.h"
  21 #include "HexagonTargetObjectFile.h"
  22 #include "llvm/ADT/APInt.h"
  23 #include "llvm/ADT/ArrayRef.h"
  24 #include "llvm/ADT/SmallVector.h"
  25 #include "llvm/CodeGen/CallingConvLower.h"
  26 #include "llvm/CodeGen/MachineFrameInfo.h"
  27 #include "llvm/CodeGen/MachineFunction.h"
  28 #include "llvm/CodeGen/MachineMemOperand.h"
  29 #include "llvm/CodeGen/MachineRegisterInfo.h"
  30 #include "llvm/CodeGen/RuntimeLibcalls.h"
  31 #include "llvm/CodeGen/SelectionDAG.h"
  32 #include "llvm/CodeGen/TargetCallingConv.h"
  33 #include "llvm/CodeGen/ValueTypes.h"
  34 #include "llvm/IR/BasicBlock.h"
  35 #include "llvm/IR/CallingConv.h"
  36 #include "llvm/IR/DataLayout.h"
  37 #include "llvm/IR/DerivedTypes.h"
  38 #include "llvm/IR/Function.h"
  39 #include "llvm/IR/GlobalValue.h"
  40 #include "llvm/IR/InlineAsm.h"
  41 #include "llvm/IR/Instructions.h"
  42 #include "llvm/IR/Intrinsics.h"
  43 #include "llvm/IR/Module.h"
  44 #include "llvm/IR/Type.h"
  45 #include "llvm/IR/Value.h"
  46 #include "llvm/MC/MCRegisterInfo.h"
  47 #include "llvm/Support/Casting.h"
  48 #include "llvm/Support/CodeGen.h"
  49 #include "llvm/Support/CommandLine.h"
  50 #include "llvm/Support/Debug.h"
  51 #include "llvm/Support/ErrorHandling.h"
  52 #include "llvm/Support/MathExtras.h"
  53 #include "llvm/Support/raw_ostream.h"
  54 #include "llvm/Target/TargetMachine.h"
  55 #include <algorithm>
  56 #include <cassert>
  57 #include <cstddef>
  58 #include <cstdint>
  59 #include <limits>
  60 #include <utility>
  61
  62 using namespace llvm;
  63
  64 #define DEBUG_TYPE "hexagon-lowering"
  65
  66 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
  67   cl::init(true), cl::Hidden,
  68   cl::desc("Control jump table emission on Hexagon target"));
  69
  70 static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
  71   cl::Hidden, cl::ZeroOrMore, cl::init(false),
  72   cl::desc("Enable Hexagon SDNode scheduling"));
  73
  74 static cl::opt<bool> EnableFastMath("ffast-math",
  75   cl::Hidden, cl::ZeroOrMore, cl::init(false),
  76   cl::desc("Enable Fast Math processing"));
  77
  78 static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
  79   cl::Hidden, cl::ZeroOrMore, cl::init(5),
  80   cl::desc("Set minimum jump tables"));
  81
  82 static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
  83   cl::Hidden, cl::ZeroOrMore, cl::init(6),
  84   cl::desc("Max #stores to inline memcpy"));
  85
  86 static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
  87   cl::Hidden, cl::ZeroOrMore, cl::init(4),
  88   cl::desc("Max #stores to inline memcpy"));
  89
  90 static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
  91   cl::Hidden, cl::ZeroOrMore, cl::init(6),
  92   cl::desc("Max #stores to inline memmove"));
  93
  94 static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
  95   cl::Hidden, cl::ZeroOrMore, cl::init(4),
  96   cl::desc("Max #stores to inline memmove"));
  97
  98 static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
  99   cl::Hidden, cl::ZeroOrMore, cl::init(8),
 100   cl::desc("Max #stores to inline memset"));
 101
 102 static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
 103   cl::Hidden, cl::ZeroOrMore, cl::init(4),
 104   cl::desc("Max #stores to inline memset"));
 105
 106 static cl::opt<bool> AlignLoads("hexagon-align-loads",
 107   cl::Hidden, cl::init(false),
 108   cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
 109
 110
 111 namespace {
 112
 113   class HexagonCCState : public CCState {
 114     unsigned NumNamedVarArgParams = 0;
 115
 116   public:
 117     HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
 118                    SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
 119                    unsigned NumNamedArgs)
 120         : CCState(CC, IsVarArg, MF, locs, C),
 121           NumNamedVarArgParams(NumNamedArgs) {}
 122     unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
 123   };
 124
 125 } // end anonymous namespace
 126
 127
 128 // Implement calling convention for Hexagon.
 129
 130 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 131                        CCValAssign::LocInfo &LocInfo,
 132                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
 133   static const MCPhysReg ArgRegs[] = {
 134     Hexagon::R0, Hexagon::R1, Hexagon::R2,
 135     Hexagon::R3, Hexagon::R4, Hexagon::R5
 136   };
 137   const unsigned NumArgRegs = array_lengthof(ArgRegs);
 138   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 139
 140   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
 141   if (RegNum != NumArgRegs && RegNum % 2 == 1)
 142     State.AllocateReg(ArgRegs[RegNum]);
 143
 144   // Always return false here, as this function only makes sure that the first
 145   // unallocated register has an even register number and does not actually
 146   // allocate a register for the current argument.
 147   return false;
 148 }
 149
 150 #include "HexagonGenCallingConv.inc"
 151
 152
 153 void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
 154   if (VT != PromotedLdStVT) {
 155     setOperationAction(ISD::LOAD, VT, Promote);
 156     AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT);
 157
 158     setOperationAction(ISD::STORE, VT, Promote);
 159     AddPromotedToType(ISD::STORE, VT, PromotedLdStVT);
 160   }
 161 }
 162
 163 SDValue
 164 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
 165       const {
 166   return SDValue();
 167 }
 168
 169 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 170 /// by "Src" to address "Dst" of size "Size".  Alignment information is
 171 /// specified by the specific parameter attribute. The copy will be passed as
 172 /// a byval function parameter.  Sometimes what we are copying is the end of a
 173 /// larger object, the part that does not fit in registers.
 174 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
 175                                          SDValue Chain, ISD::ArgFlagsTy Flags,
 176                                          SelectionDAG &DAG, const SDLoc &dl) {
 177   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
 178   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
 179                        /*isVolatile=*/false, /*AlwaysInline=*/false,
 180                        /*isTailCall=*/false,
 181                        MachinePointerInfo(), MachinePointerInfo());
 182 }
 183
 184 bool
 185 HexagonTargetLowering::CanLowerReturn(
 186     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
 187     const SmallVectorImpl<ISD::OutputArg> &Outs,
 188     LLVMContext &Context) const {
 189   SmallVector<CCValAssign, 16> RVLocs;
 190   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
 191
 192   if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
 193     return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
 194   return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
 195 }
 196
 197 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
 198 // passed by value, the function prototype is modified to return void and
 199 // the value is stored in memory pointed by a pointer passed by caller.
 200 SDValue
 201 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 202                                    bool IsVarArg,
 203                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
 204                                    const SmallVectorImpl<SDValue> &OutVals,
 205                                    const SDLoc &dl, SelectionDAG &DAG) const {
 206   // CCValAssign - represent the assignment of the return value to locations.
 207   SmallVector<CCValAssign, 16> RVLocs;
 208
 209   // CCState - Info about the registers and stack slot.
 210   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
 211                  *DAG.getContext());
 212
 213   // Analyze return values of ISD::RET
 214   if (Subtarget.useHVXOps())
 215     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
 216   else
 217     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
 218
 219   SDValue Flag;
 220   SmallVector<SDValue, 4> RetOps(1, Chain);
 221
 222   // Copy the result values into the output registers.
 223   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 224     CCValAssign &VA = RVLocs[i];
 225
 226     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
 227
 228     // Guarantee that all emitted copies are stuck together with flags.
 229     Flag = Chain.getValue(1);
 230     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
 231   }
 232
 233   RetOps[0] = Chain;  // Update chain.
 234
 235   // Add the flag if we have it.
 236   if (Flag.getNode())
 237     RetOps.push_back(Flag);
 238
 239   return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
 240 }
 241
 242 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
 243   // If either no tail call or told not to tail call at all, don't.
 244   auto Attr =
 245       CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
 246   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
 247     return false;
 248
 249   return true;
 250 }
 251
 252 /// LowerCallResult - Lower the result values of an ISD::CALL into the
 253 /// appropriate copies out of appropriate physical registers.  This assumes that
 254 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
 255 /// being lowered. Returns a SDNode with the same number of values as the
 256 /// ISD::CALL.
 257 SDValue HexagonTargetLowering::LowerCallResult(
 258     SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
 259     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
 260     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
 261     const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
 262   // Assign locations to each value returned by this call.
 263   SmallVector<CCValAssign, 16> RVLocs;
 264
 265   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
 266                  *DAG.getContext());
 267
 268   if (Subtarget.useHVXOps())
 269     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
 270   else
 271     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
 272
 273   // Copy all of the result registers out of their specified physreg.
 274   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 275     SDValue RetVal;
 276     if (RVLocs[i].getValVT() == MVT::i1) {
 277       // Return values of type MVT::i1 require special handling. The reason
 278       // is that MVT::i1 is associated with the PredRegs register class, but
 279       // values of that type are still returned in R0. Generate an explicit
 280       // copy into a predicate register from R0, and treat the value of the
 281       // predicate register as the call result.
 282       auto &MRI = DAG.getMachineFunction().getRegInfo();
 283       SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
 284                                        MVT::i32, Glue);
 285       // FR0 = (Value, Chain, Glue)
 286       unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
 287       SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
 288                                      FR0.getValue(0), FR0.getValue(2));
 289       // TPR = (Chain, Glue)
 290       // Don't glue this CopyFromReg, because it copies from a virtual
 291       // register. If it is glued to the call, InstrEmitter will add it
 292       // as an implicit def to the call (EmitMachineNode).
 293       RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
 294       Glue = TPR.getValue(1);
 295       Chain = TPR.getValue(0);
 296     } else {
 297       RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
 298                                   RVLocs[i].getValVT(), Glue);
 299       Glue = RetVal.getValue(2);
 300       Chain = RetVal.getValue(1);
 301     }
 302     InVals.push_back(RetVal.getValue(0));
 303   }
 304
 305   return Chain;
 306 }
 307
 308 /// LowerCall - Functions arguments are copied from virtual regs to
 309 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 310 SDValue
 311 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 312                                  SmallVectorImpl<SDValue> &InVals) const {
 313   SelectionDAG &DAG                     = CLI.DAG;
 314   SDLoc &dl                             = CLI.DL;
 315   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
 316   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
 317   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
 318   SDValue Chain                         = CLI.Chain;
 319   SDValue Callee                        = CLI.Callee;
 320   CallingConv::ID CallConv              = CLI.CallConv;
 321   bool IsVarArg                         = CLI.IsVarArg;
 322   bool DoesNotReturn                    = CLI.DoesNotReturn;
 323
 324   bool IsStructRet    = Outs.empty() ? false : Outs[0].Flags.isSRet();
 325   MachineFunction &MF = DAG.getMachineFunction();
 326   MachineFrameInfo &MFI = MF.getFrameInfo();
 327   auto PtrVT = getPointerTy(MF.getDataLayout());
 328
 329   unsigned NumParams = CLI.CS.getInstruction()
 330                         ? CLI.CS.getFunctionType()->getNumParams()
 331                         : 0;
 332   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
 333     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
 334
 335   // Analyze operands of the call, assigning locations to each operand.
 336   SmallVector<CCValAssign, 16> ArgLocs;
 337   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
 338                         NumParams);
 339
 340   if (Subtarget.useHVXOps())
 341     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
 342   else
 343     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
 344
 345   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
 346   if (Attr.getValueAsString() == "true")
 347     CLI.IsTailCall = false;
 348
 349   if (CLI.IsTailCall) {
 350     bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
 351     CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
 352                         IsVarArg, IsStructRet, StructAttrFlag, Outs,
 353                         OutVals, Ins, DAG);
 354     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 355       CCValAssign &VA = ArgLocs[i];
 356       if (VA.isMemLoc()) {
 357         CLI.IsTailCall = false;
 358         break;
 359       }
 360     }
 361     DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
 362                                     : "Argument must be passed on stack. "
 363                                       "Not eligible for Tail Call\n"));
 364   }
 365   // Get a count of how many bytes are to be pushed on the stack.
 366   unsigned NumBytes = CCInfo.getNextStackOffset();
 367   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
 368   SmallVector<SDValue, 8> MemOpChains;
 369
 370   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 371   SDValue StackPtr =
 372       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
 373
 374   bool NeedsArgAlign = false;
 375   unsigned LargestAlignSeen = 0;
 376   // Walk the register/memloc assignments, inserting copies/loads.
 377   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 378     CCValAssign &VA = ArgLocs[i];
 379     SDValue Arg = OutVals[i];
 380     ISD::ArgFlagsTy Flags = Outs[i].Flags;
 381     // Record if we need > 8 byte alignment on an argument.
 382     bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
 383     NeedsArgAlign |= ArgAlign;
 384
 385     // Promote the value if needed.
 386     switch (VA.getLocInfo()) {
 387       default:
 388         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
 389         llvm_unreachable("Unknown loc info!");
 390       case CCValAssign::Full:
 391         break;
 392       case CCValAssign::BCvt:
 393         Arg = DAG.getBitcast(VA.getLocVT(), Arg);
 394         break;
 395       case CCValAssign::SExt:
 396         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
 397         break;
 398       case CCValAssign::ZExt:
 399         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
 400         break;
 401       case CCValAssign::AExt:
 402         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
 403         break;
 404     }
 405
 406     if (VA.isMemLoc()) {
 407       unsigned LocMemOffset = VA.getLocMemOffset();
 408       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
 409                                         StackPtr.getValueType());
 410       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
 411       if (ArgAlign)
 412         LargestAlignSeen = std::max(LargestAlignSeen,
 413                                     VA.getLocVT().getStoreSizeInBits() >> 3);
 414       if (Flags.isByVal()) {
 415         // The argument is a struct passed by value. According to LLVM, "Arg"
 416         // is is pointer.
 417         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
 418                                                         Flags, DAG, dl));
 419       } else {
 420         MachinePointerInfo LocPI = MachinePointerInfo::getStack(
 421             DAG.getMachineFunction(), LocMemOffset);
 422         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
 423         MemOpChains.push_back(S);
 424       }
 425       continue;
 426     }
 427
 428     // Arguments that can be passed on register must be kept at RegsToPass
 429     // vector.
 430     if (VA.isRegLoc())
 431       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
 432   }
 433
 434   if (NeedsArgAlign && Subtarget.hasV60TOps()) {
 435     DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
 436     unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
 437     LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
 438     MFI.ensureMaxAlignment(LargestAlignSeen);
 439   }
 440   // Transform all store nodes into one single node because all store
 441   // nodes are independent of each other.
 442   if (!MemOpChains.empty())
 443     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 444
 445   SDValue Glue;
 446   if (!CLI.IsTailCall) {
 447     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
 448     Glue = Chain.getValue(1);
 449   }
 450
 451   // Build a sequence of copy-to-reg nodes chained together with token
 452   // chain and flag operands which copy the outgoing args into registers.
 453   // The Glue is necessary since all emitted instructions must be
 454   // stuck together.
 455   if (!CLI.IsTailCall) {
 456     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 457       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
 458                                RegsToPass[i].second, Glue);
 459       Glue = Chain.getValue(1);
 460     }
 461   } else {
 462     // For tail calls lower the arguments to the 'real' stack slot.
 463     //
 464     // Force all the incoming stack arguments to be loaded from the stack
 465     // before any new outgoing arguments are stored to the stack, because the
 466     // outgoing stack slots may alias the incoming argument stack slots, and
 467     // the alias isn't otherwise explicit. This is slightly more conservative
 468     // than necessary, because it means that each store effectively depends
 469     // on every argument instead of just those arguments it would clobber.
 470     //
 471     // Do not flag preceding copytoreg stuff together with the following stuff.
 472     Glue = SDValue();
 473     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 474       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
 475                                RegsToPass[i].second, Glue);
 476       Glue = Chain.getValue(1);
 477     }
 478     Glue = SDValue();
 479   }
 480
 481   bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
 482   unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
 483
 484   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
 485   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
 486   // node so that legalize doesn't hack it.
 487   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
 488     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
 489   } else if (ExternalSymbolSDNode *S =
 490              dyn_cast<ExternalSymbolSDNode>(Callee)) {
 491     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
 492   }
 493
 494   // Returns a chain & a flag for retval copy to use.
 495   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 496   SmallVector<SDValue, 8> Ops;
 497   Ops.push_back(Chain);
 498   Ops.push_back(Callee);
 499
 500   // Add argument registers to the end of the list so that they are
 501   // known live into the call.
 502   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 503     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
 504                                   RegsToPass[i].second.getValueType()));
 505   }
 506
 507   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
 508   assert(Mask && "Missing call preserved mask for calling convention");
 509   Ops.push_back(DAG.getRegisterMask(Mask));
 510
 511   if (Glue.getNode())
 512     Ops.push_back(Glue);
 513
 514   if (CLI.IsTailCall) {
 515     MFI.setHasTailCall();
 516     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
 517   }
 518
 519   // Set this here because we need to know this for "hasFP" in frame lowering.
 520   // The target-independent code calls getFrameRegister before setting it, and
 521   // getFrameRegister uses hasFP to determine whether the function has FP.
 522   MFI.setHasCalls(true);
 523
 524   unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
 525   Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
 526   Glue = Chain.getValue(1);
 527
 528   // Create the CALLSEQ_END node.
 529   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
 530                              DAG.getIntPtrConstant(0, dl, true), Glue, dl);
 531   Glue = Chain.getValue(1);
 532
 533   // Handle result values, copying them out of physregs into vregs that we
 534   // return.
 535   return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
 536                          InVals, OutVals, Callee);
 537 }
 538
 539 /// Returns true by value, base pointer and offset pointer and addressing
 540 /// mode by reference if this node can be combined with a load / store to
 541 /// form a post-indexed load / store.
 542 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 543       SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
 544       SelectionDAG &DAG) const {
 545   LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
 546   if (!LSN)
 547     return false;
 548   EVT VT = LSN->getMemoryVT();
 549   if (!VT.isSimple())
 550     return false;
 551   bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
 552                      VT == MVT::i64 || VT == MVT::v2i16 || VT == MVT::v2i32 ||
 553                      VT == MVT::v4i8 || VT == MVT::v4i16 || VT == MVT::v8i8 ||
 554                      Subtarget.isHVXVectorType(VT.getSimpleVT());
 555   if (!IsLegalType)
 556     return false;
 557
 558   if (Op->getOpcode() != ISD::ADD)
 559     return false;
 560   Base = Op->getOperand(0);
 561   Offset = Op->getOperand(1);
 562   if (!isa<ConstantSDNode>(Offset.getNode()))
 563     return false;
 564   AM = ISD::POST_INC;
 565
 566   int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
 567   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
 568 }
 569
 570 SDValue
 571 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
 572   MachineFunction &MF = DAG.getMachineFunction();
 573   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
 574   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 575   unsigned LR = HRI.getRARegister();
 576
 577   if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
 578     return Op;
 579
 580   unsigned NumOps = Op.getNumOperands();
 581   if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
 582     --NumOps;  // Ignore the flag operand.
 583
 584   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
 585     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
 586     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 587     ++i;  // Skip the ID value.
 588
 589     switch (InlineAsm::getKind(Flags)) {
 590       default:
 591         llvm_unreachable("Bad flags!");
 592       case InlineAsm::Kind_RegUse:
 593       case InlineAsm::Kind_Imm:
 594       case InlineAsm::Kind_Mem:
 595         i += NumVals;
 596         break;
 597       case InlineAsm::Kind_Clobber:
 598       case InlineAsm::Kind_RegDef:
 599       case InlineAsm::Kind_RegDefEarlyClobber: {
 600         for (; NumVals; --NumVals, ++i) {
 601           unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
 602           if (Reg != LR)
 603             continue;
 604           HMFI.setHasClobberLR(true);
 605           return Op;
 606         }
 607         break;
 608       }
 609     }
 610   }
 611
 612   return Op;
 613 }
 614
 615 // Need to transform ISD::PREFETCH into something that doesn't inherit
 616 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
 617 // SDNPMayStore.
 618 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
 619                                              SelectionDAG &DAG) const {
 620   SDValue Chain = Op.getOperand(0);
 621   SDValue Addr = Op.getOperand(1);
 622   // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
 623   // if the "reg" is fed by an "add".
 624   SDLoc DL(Op);
 625   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
 626   return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
 627 }
 628
 629 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
 630 // is marked as having side-effects, while the register read on Hexagon does
 631 // not have any. TableGen refuses to accept the direct pattern from that node
 632 // to the A4_tfrcpp.
 633 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
 634                                                      SelectionDAG &DAG) const {
 635   SDValue Chain = Op.getOperand(0);
 636   SDLoc dl(Op);
 637   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
 638   return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
 639 }
 640
 641 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
 642       SelectionDAG &DAG) const {
 643   SDValue Chain = Op.getOperand(0);
 644   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 645   // Lower the hexagon_prefetch builtin to DCFETCH, as above.
 646   if (IntNo == Intrinsic::hexagon_prefetch) {
 647     SDValue Addr = Op.getOperand(2);
 648     SDLoc DL(Op);
 649     SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
 650     return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
 651   }
 652   return SDValue();
 653 }
 654
 655 SDValue
 656 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 657                                                SelectionDAG &DAG) const {
 658   SDValue Chain = Op.getOperand(0);
 659   SDValue Size = Op.getOperand(1);
 660   SDValue Align = Op.getOperand(2);
 661   SDLoc dl(Op);
 662
 663   ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
 664   assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
 665
 666   unsigned A = AlignConst->getSExtValue();
 667   auto &HFI = *Subtarget.getFrameLowering();
 668   // "Zero" means natural stack alignment.
 669   if (A == 0)
 670     A = HFI.getStackAlignment();
 671
 672   DEBUG({
 673     dbgs () << __func__ << " Align: " << A << " Size: ";
 674     Size.getNode()->dump(&DAG);
 675     dbgs() << "\n";
 676   });
 677
 678   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
 679   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
 680   SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
 681
 682   DAG.ReplaceAllUsesOfValueWith(Op, AA);
 683   return AA;
 684 }
 685
 686 SDValue HexagonTargetLowering::LowerFormalArguments(
 687     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 688     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
 689     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
 690   MachineFunction &MF = DAG.getMachineFunction();
 691   MachineFrameInfo &MFI = MF.getFrameInfo();
 692   MachineRegisterInfo &MRI = MF.getRegInfo();
 693
 694   // Assign locations to all of the incoming arguments.
 695   SmallVector<CCValAssign, 16> ArgLocs;
 696   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
 697                         MF.getFunction().getFunctionType()->getNumParams());
 698
 699   if (Subtarget.useHVXOps())
 700     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
 701   else
 702     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
 703
 704   // For LLVM, in the case when returning a struct by value (>8byte),
 705   // the first argument is a pointer that points to the location on caller's
 706   // stack where the return value will be stored. For Hexagon, the location on
 707   // caller's stack is passed only when the struct size is smaller than (and
 708   // equal to) 8 bytes. If not, no address will be passed into callee and
 709   // callee return the result direclty through R0/R1.
 710
 711   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
 712
 713   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 714     CCValAssign &VA = ArgLocs[i];
 715     ISD::ArgFlagsTy Flags = Ins[i].Flags;
 716     bool ByVal = Flags.isByVal();
 717
 718     // Arguments passed in registers:
 719     // 1. 32- and 64-bit values and HVX vectors are passed directly,
 720     // 2. Large structs are passed via an address, and the address is
 721     //    passed in a register.
 722     if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
 723       llvm_unreachable("ByValSize must be bigger than 8 bytes");
 724
 725     bool InReg = VA.isRegLoc() &&
 726                  (!ByVal || (ByVal && Flags.getByValSize() > 8));
 727
 728     if (InReg) {
 729       MVT RegVT = VA.getLocVT();
 730       if (VA.getLocInfo() == CCValAssign::BCvt)
 731         RegVT = VA.getValVT();
 732
 733       const TargetRegisterClass *RC = getRegClassFor(RegVT);
 734       unsigned VReg = MRI.createVirtualRegister(RC);
 735       SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
 736
 737       // Treat values of type MVT::i1 specially: they are passed in
 738       // registers of type i32, but they need to remain as values of
 739       // type i1 for consistency of the argument lowering.
 740       if (VA.getValVT() == MVT::i1) {
 741         assert(RegVT.getSizeInBits() <= 32);
 742         SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
 743                                 Copy, DAG.getConstant(1, dl, RegVT));
 744         Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
 745                             ISD::SETNE);
 746       } else {
 747 #ifndef NDEBUG
 748         unsigned RegSize = RegVT.getSizeInBits();
 749         assert(RegSize == 32 || RegSize == 64 ||
 750                Subtarget.isHVXVectorType(RegVT));
 751 #endif
 752       }
 753       InVals.push_back(Copy);
 754       MRI.addLiveIn(VA.getLocReg(), VReg);
 755     } else {
 756       assert(VA.isMemLoc() && "Argument should be passed in memory");
 757
 758       // If it's a byval parameter, then we need to compute the
 759       // "real" size, not the size of the pointer.
 760       unsigned ObjSize = Flags.isByVal()
 761                             ? Flags.getByValSize()
 762                             : VA.getLocVT().getStoreSizeInBits() / 8;
 763
 764       // Create the frame index object for this incoming parameter.
 765       int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
 766       int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
 767       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
 768
 769       if (Flags.isByVal()) {
 770         // If it's a pass-by-value aggregate, then do not dereference the stack
 771         // location. Instead, we should generate a reference to the stack
 772         // location.
 773         InVals.push_back(FIN);
 774       } else {
 775         SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
 776                                 MachinePointerInfo::getFixedStack(MF, FI, 0));
 777         InVals.push_back(L);
 778       }
 779     }
 780   }
 781
 782
 783   if (IsVarArg) {
 784     // This will point to the next argument passed via stack.
 785     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
 786     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
 787     HMFI.setVarArgsFrameIndex(FI);
 788   }
 789
 790   return Chain;
 791 }
 792
 793 SDValue
 794 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 795   // VASTART stores the address of the VarArgsFrameIndex slot into the
 796   // memory location argument.
 797   MachineFunction &MF = DAG.getMachineFunction();
 798   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
 799   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
 800   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 801   return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
 802                       MachinePointerInfo(SV));
 803 }
 804
 805 static bool isSExtFree(SDValue N) {
 806   // A sign-extend of a truncate of a sign-extend is free.
 807   if (N.getOpcode() == ISD::TRUNCATE &&
 808       N.getOperand(0).getOpcode() == ISD::AssertSext)
 809     return true;
 810   // We have sign-extended loads.
 811   if (N.getOpcode() == ISD::LOAD)
 812     return true;
 813   return false;
 814 }
 815
 816 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 817   SDLoc dl(Op);
 818   SDValue LHS = Op.getOperand(0);
 819   SDValue RHS = Op.getOperand(1);
 820   SDValue Cmp = Op.getOperand(2);
 821   ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
 822
 823   EVT VT = Op.getValueType();
 824   EVT LHSVT = LHS.getValueType();
 825   EVT RHSVT = RHS.getValueType();
 826
 827   if (LHSVT == MVT::v2i16) {
 828     assert(CC == ISD::SETEQ || CC == ISD::SETNE ||
 829            ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
 830     unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
 831                                                 : ISD::ZERO_EXTEND;
 832     SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
 833     SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
 834     SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
 835     return SC;
 836   }
 837
 838   // Treat all other vector types as legal.
 839   if (VT.isVector())
 840     return Op;
 841
 842   // Equals and not equals should use sign-extend, not zero-extend, since
 843   // we can represent small negative values in the compare instructions.
 844   // The LLVM default is to use zero-extend arbitrarily in these cases.
 845   if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
 846       (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
 847       (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
 848     ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
 849     if (C && C->getAPIntValue().isNegative()) {
 850       LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
 851       RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
 852       return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
 853                          LHS, RHS, Op.getOperand(2));
 854     }
 855     if (isSExtFree(LHS) || isSExtFree(RHS)) {
 856       LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
 857       RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
 858       return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
 859                          LHS, RHS, Op.getOperand(2));
 860     }
 861   }
 862   return SDValue();
 863 }
 864
 865 SDValue
 866 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
 867   SDValue PredOp = Op.getOperand(0);
 868   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
 869   EVT OpVT = Op1.getValueType();
 870   SDLoc DL(Op);
 871
 872   if (OpVT == MVT::v2i16) {
 873     SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
 874     SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
 875     SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
 876     SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
 877     return TR;
 878   }
 879
 880   return SDValue();
 881 }
 882
 883 static Constant *convert_i1_to_i8(const Constant *ConstVal) {
 884   SmallVector<Constant *, 128> NewConst;
 885   const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
 886   if (!CV)
 887     return nullptr;
 888
 889   LLVMContext &Ctx = ConstVal->getContext();
 890   IRBuilder<> IRB(Ctx);
 891   unsigned NumVectorElements = CV->getNumOperands();
 892   assert(isPowerOf2_32(NumVectorElements) &&
 893          "conversion only supported for pow2 VectorSize!");
 894
 895   for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
 896     uint8_t x = 0;
 897     for (unsigned j = 0; j < 8; ++j) {
 898       uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
 899       x |= y << (7 - j);
 900     }
 901     assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
 902     NewConst.push_back(IRB.getInt8(x));
 903   }
 904   return ConstantVector::get(NewConst);
 905 }
 906
 907 SDValue
 908 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
 909   EVT ValTy = Op.getValueType();
 910   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
 911   Constant *CVal = nullptr;
 912   bool isVTi1Type = false;
 913   if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
 914     Type *CValTy = ConstVal->getType();
 915     if (CValTy->isVectorTy() &&
 916         CValTy->getVectorElementType()->isIntegerTy(1)) {
 917       CVal = convert_i1_to_i8(ConstVal);
 918       isVTi1Type = (CVal != nullptr);
 919     }
 920   }
 921   unsigned Align = CPN->getAlignment();
 922   bool IsPositionIndependent = isPositionIndependent();
 923   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
 924
 925   unsigned Offset = 0;
 926   SDValue T;
 927   if (CPN->isMachineConstantPoolEntry())
 928     T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, Offset,
 929                                   TF);
 930   else if (isVTi1Type)
 931     T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF);
 932   else
 933     T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF);
 934
 935   assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
 936          "Inconsistent target flag encountered");
 937
 938   if (IsPositionIndependent)
 939     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
 940   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
 941 }
 942
 943 SDValue
 944 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
 945   EVT VT = Op.getValueType();
 946   int Idx = cast<JumpTableSDNode>(Op)->getIndex();
 947   if (isPositionIndependent()) {
 948     SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
 949     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
 950   }
 951
 952   SDValue T = DAG.getTargetJumpTable(Idx, VT);
 953   return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
 954 }
 955
 956 SDValue
 957 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
 958   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 959   MachineFunction &MF = DAG.getMachineFunction();
 960   MachineFrameInfo &MFI = MF.getFrameInfo();
 961   MFI.setReturnAddressIsTaken(true);
 962
 963   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
 964     return SDValue();
 965
 966   EVT VT = Op.getValueType();
 967   SDLoc dl(Op);
 968   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 969   if (Depth) {
 970     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
 971     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
 972     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
 973                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
 974                        MachinePointerInfo());
 975   }
 976
 977   // Return LR, which contains the return address. Mark it an implicit live-in.
 978   unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
 979   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
 980 }
 981
 982 SDValue
 983 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
 984   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
 985   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
 986   MFI.setFrameAddressIsTaken(true);
 987
 988   EVT VT = Op.getValueType();
 989   SDLoc dl(Op);
 990   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 991   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
 992                                          HRI.getFrameRegister(), VT);
 993   while (Depth--)
 994     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
 995                             MachinePointerInfo());
 996   return FrameAddr;
 997 }
 998
 999 SDValue
1000 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1001   SDLoc dl(Op);
1002   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1003 }
1004
1005 SDValue
1006 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1007   SDLoc dl(Op);
1008   auto *GAN = cast<GlobalAddressSDNode>(Op);
1009   auto PtrVT = getPointerTy(DAG.getDataLayout());
1010   auto *GV = GAN->getGlobal();
1011   int64_t Offset = GAN->getOffset();
1012
1013   auto &HLOF = *HTM.getObjFileLowering();
1014   Reloc::Model RM = HTM.getRelocationModel();
1015
1016   if (RM == Reloc::Static) {
1017     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1018     const GlobalObject *GO = GV->getBaseObject();
1019     if (GO && HLOF.isGlobalInSmallSection(GO, HTM))
1020       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1021     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1022   }
1023
1024   bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1025   if (UsePCRel) {
1026     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1027                                             HexagonII::MO_PCREL);
1028     return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1029   }
1030
1031   // Use GOT index.
1032   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1033   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1034   SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1035   return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1036 }
1037
1038 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1039 SDValue
1040 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1041   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1042   SDLoc dl(Op);
1043   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1044
1045   Reloc::Model RM = HTM.getRelocationModel();
1046   if (RM == Reloc::Static) {
1047     SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1048     return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1049   }
1050
1051   SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1052   return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1053 }
1054
1055 SDValue
1056 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1057       const {
1058   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1059   SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
1060                                                HexagonII::MO_PCREL);
1061   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1062 }
1063
1064 SDValue
1065 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1066       GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1067       unsigned char OperandFlags) const {
1068   MachineFunction &MF = DAG.getMachineFunction();
1069   MachineFrameInfo &MFI = MF.getFrameInfo();
1070   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1071   SDLoc dl(GA);
1072   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1073                                            GA->getValueType(0),
1074                                            GA->getOffset(),
1075                                            OperandFlags);
1076   // Create Operands for the call.The Operands should have the following:
1077   // 1. Chain SDValue
1078   // 2. Callee which in this case is the Global address value.
1079   // 3. Registers live into the call.In this case its R0, as we
1080   //    have just one argument to be passed.
1081   // 4. Glue.
1082   // Note: The order is important.
1083
1084   const auto &HRI = *Subtarget.getRegisterInfo();
1085   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1086   assert(Mask && "Missing call preserved mask for calling convention");
1087   SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1088                     DAG.getRegisterMask(Mask), Glue };
1089   Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1090
1091   // Inform MFI that function has calls.
1092   MFI.setAdjustsStack(true);
1093
1094   Glue = Chain.getValue(1);
1095   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
1096 }
1097
1098 //
1099 // Lower using the intial executable model for TLS addresses
1100 //
1101 SDValue
1102 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1103       SelectionDAG &DAG) const {
1104   SDLoc dl(GA);
1105   int64_t Offset = GA->getOffset();
1106   auto PtrVT = getPointerTy(DAG.getDataLayout());
1107
1108   // Get the thread pointer.
1109   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1110
1111   bool IsPositionIndependent = isPositionIndependent();
1112   unsigned char TF =
1113       IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1114
1115   // First generate the TLS symbol address
1116   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1117                                            Offset, TF);
1118
1119   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1120
1121   if (IsPositionIndependent) {
1122     // Generate the GOT pointer in case of position independent code
1123     SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1124
1125     // Add the TLS Symbol address to GOT pointer.This gives
1126     // GOT relative relocation for the symbol.
1127     Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1128   }
1129
1130   // Load the offset value for TLS symbol.This offset is relative to
1131   // thread pointer.
1132   SDValue LoadOffset =
1133       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
1134
1135   // Address of the thread local variable is the add of thread
1136   // pointer and the offset of the variable.
1137   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1138 }
1139
1140 //
1141 // Lower using the local executable model for TLS addresses
1142 //
1143 SDValue
1144 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1145       SelectionDAG &DAG) const {
1146   SDLoc dl(GA);
1147   int64_t Offset = GA->getOffset();
1148   auto PtrVT = getPointerTy(DAG.getDataLayout());
1149
1150   // Get the thread pointer.
1151   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1152   // Generate the TLS symbol address
1153   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1154                                            HexagonII::MO_TPREL);
1155   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1156
1157   // Address of the thread local variable is the add of thread
1158   // pointer and the offset of the variable.
1159   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1160 }
1161
1162 //
1163 // Lower using the general dynamic model for TLS addresses
1164 //
1165 SDValue
1166 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1167       SelectionDAG &DAG) const {
1168   SDLoc dl(GA);
1169   int64_t Offset = GA->getOffset();
1170   auto PtrVT = getPointerTy(DAG.getDataLayout());
1171
1172   // First generate the TLS symbol address
1173   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1174                                            HexagonII::MO_GDGOT);
1175
1176   // Then, generate the GOT pointer
1177   SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1178
1179   // Add the TLS symbol and the GOT pointer
1180   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1181   SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1182
1183   // Copy over the argument to R0
1184   SDValue InFlag;
1185   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
1186   InFlag = Chain.getValue(1);
1187
1188   unsigned Flags =
1189       static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
1190           ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
1191           : HexagonII::MO_GDPLT;
1192
1193   return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
1194                            Hexagon::R0, Flags);
1195 }
1196
1197 //
1198 // Lower TLS addresses.
1199 //
1200 // For now for dynamic models, we only support the general dynamic model.
1201 //
1202 SDValue
1203 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1204       SelectionDAG &DAG) const {
1205   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1206
1207   switch (HTM.getTLSModel(GA->getGlobal())) {
1208     case TLSModel::GeneralDynamic:
1209     case TLSModel::LocalDynamic:
1210       return LowerToTLSGeneralDynamicModel(GA, DAG);
1211     case TLSModel::InitialExec:
1212       return LowerToTLSInitialExecModel(GA, DAG);
1213     case TLSModel::LocalExec:
1214       return LowerToTLSLocalExecModel(GA, DAG);
1215   }
1216   llvm_unreachable("Bogus TLS model");
1217 }
1218
1219 //===----------------------------------------------------------------------===//
1220 // TargetLowering Implementation
1221 //===----------------------------------------------------------------------===//
1222
1223 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1224                                              const HexagonSubtarget &ST)
1225     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1226       Subtarget(ST) {
1227   bool IsV4 = !Subtarget.hasV5TOps();
1228   auto &HRI = *Subtarget.getRegisterInfo();
1229
1230   setPrefLoopAlignment(4);
1231   setPrefFunctionAlignment(4);
1232   setMinFunctionAlignment(2);
1233   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1234   setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1235   setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1236
1237   setMaxAtomicSizeInBitsSupported(64);
1238   setMinCmpXchgSizeInBits(32);
1239
1240   if (EnableHexSDNodeSched)
1241     setSchedulingPreference(Sched::VLIW);
1242   else
1243     setSchedulingPreference(Sched::Source);
1244
1245   // Limits for inline expansion of memcpy/memmove
1246   MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1247   MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1248   MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1249   MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1250   MaxStoresPerMemset = MaxStoresPerMemsetCL;
1251   MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1252
1253   //
1254   // Set up register classes.
1255   //
1256
1257   addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
1258   addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
1259   addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
1260   addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
1261   addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
1262   addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1263   addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
1264   addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
1265   addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
1266   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1267   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1268
1269   if (Subtarget.hasV5TOps()) {
1270     addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1271     addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1272   }
1273
1274   //
1275   // Handling of scalar operations.
1276   //
1277   // All operations default to "legal", except:
1278   // - indexed loads and stores (pre-/post-incremented),
1279   // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1280   //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1281   //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1282   //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1283   // which default to "expand" for at least one type.
1284
1285   // Misc operations.
1286   setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
1287   setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
1288
1289   setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1290   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
1291   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
1292   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1293   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
1294   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1295   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1296   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1297   setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
1298   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
1299   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1300   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1301
1302   // Custom legalize GlobalAddress nodes into CONST32.
1303   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1304   setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
1305   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
1306
1307   // Hexagon needs to optimize cases with negative constants.
1308   setOperationAction(ISD::SETCC, MVT::i8,  Custom);
1309   setOperationAction(ISD::SETCC, MVT::i16, Custom);
1310
1311   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1312   setOperationAction(ISD::VASTART, MVT::Other, Custom);
1313   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
1314   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
1315   setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
1316
1317   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1318   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1319   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1320
1321   if (EmitJumpTables)
1322     setMinimumJumpTableEntries(MinimumJumpTables);
1323   else
1324     setMinimumJumpTableEntries(std::numeric_limits<int>::max());
1325   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1326
1327   // Hexagon has instructions for add/sub with carry. The problem with
1328   // modeling these instructions is that they produce 2 results: Rdd and Px.
1329   // To model the update of Px, we will have to use Defs[p0..p3] which will
1330   // cause any predicate live range to spill. So, we pretend we dont't have
1331   // these instructions.
1332   setOperationAction(ISD::ADDE, MVT::i8,  Expand);
1333   setOperationAction(ISD::ADDE, MVT::i16, Expand);
1334   setOperationAction(ISD::ADDE, MVT::i32, Expand);
1335   setOperationAction(ISD::ADDE, MVT::i64, Expand);
1336   setOperationAction(ISD::SUBE, MVT::i8,  Expand);
1337   setOperationAction(ISD::SUBE, MVT::i16, Expand);
1338   setOperationAction(ISD::SUBE, MVT::i32, Expand);
1339   setOperationAction(ISD::SUBE, MVT::i64, Expand);
1340   setOperationAction(ISD::ADDC, MVT::i8,  Expand);
1341   setOperationAction(ISD::ADDC, MVT::i16, Expand);
1342   setOperationAction(ISD::ADDC, MVT::i32, Expand);
1343   setOperationAction(ISD::ADDC, MVT::i64, Expand);
1344   setOperationAction(ISD::SUBC, MVT::i8,  Expand);
1345   setOperationAction(ISD::SUBC, MVT::i16, Expand);
1346   setOperationAction(ISD::SUBC, MVT::i32, Expand);
1347   setOperationAction(ISD::SUBC, MVT::i64, Expand);
1348
1349   // Only add and sub that detect overflow are the saturating ones.
1350   for (MVT VT : MVT::integer_valuetypes()) {
1351     setOperationAction(ISD::UADDO, VT, Expand);
1352     setOperationAction(ISD::SADDO, VT, Expand);
1353     setOperationAction(ISD::USUBO, VT, Expand);
1354     setOperationAction(ISD::SSUBO, VT, Expand);
1355   }
1356
1357   setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
1358   setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1359   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
1360   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1361
1362   // In V5, popcount can count # of 1s in i64 but returns i32.
1363   // On V4 it will be expanded (set later).
1364   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
1365   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1366   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1367   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
1368
1369   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1370   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
1371   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
1372   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
1373   setOperationAction(ISD::MUL,   MVT::i64, Legal);
1374
1375   for (unsigned IntExpOp :
1376        { ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
1377          ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
1378          ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1379          ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
1380     setOperationAction(IntExpOp, MVT::i32, Expand);
1381     setOperationAction(IntExpOp, MVT::i64, Expand);
1382   }
1383
1384   for (unsigned FPExpOp :
1385        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1386         ISD::FPOW, ISD::FCOPYSIGN}) {
1387     setOperationAction(FPExpOp, MVT::f32, Expand);
1388     setOperationAction(FPExpOp, MVT::f64, Expand);
1389   }
1390
1391   // No extending loads from i32.
1392   for (MVT VT : MVT::integer_valuetypes()) {
1393     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1394     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1395     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
1396   }
1397   // Turn FP truncstore into trunc + store.
1398   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1399   // Turn FP extload into load/fpextend.
1400   for (MVT VT : MVT::fp_valuetypes())
1401     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1402
1403   // Expand BR_CC and SELECT_CC for all integer and fp types.
1404   for (MVT VT : MVT::integer_valuetypes()) {
1405     setOperationAction(ISD::BR_CC,     VT, Expand);
1406     setOperationAction(ISD::SELECT_CC, VT, Expand);
1407   }
1408   for (MVT VT : MVT::fp_valuetypes()) {
1409     setOperationAction(ISD::BR_CC,     VT, Expand);
1410     setOperationAction(ISD::SELECT_CC, VT, Expand);
1411   }
1412   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1413
1414   //
1415   // Handling of vector operations.
1416   //
1417
1418   promoteLdStType(MVT::v4i8,  MVT::i32);
1419   promoteLdStType(MVT::v2i16, MVT::i32);
1420   promoteLdStType(MVT::v8i8,  MVT::i64);
1421   promoteLdStType(MVT::v4i16, MVT::i64);
1422   promoteLdStType(MVT::v2i32, MVT::i64);
1423
1424   // Set the action for vector operations to "expand", then override it with
1425   // either "custom" or "legal" for specific cases.
1426   static const unsigned VectExpOps[] = {
1427     // Integer arithmetic:
1428     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,    ISD::UDIV,
1429     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
1430     ISD::SUBC,    ISD::SADDO,   ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,
1431     ISD::SMUL_LOHI,             ISD::UMUL_LOHI,
1432     // Logical/bit:
1433     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
1434     ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
1435     // Floating point arithmetic/math functions:
1436     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
1437     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
1438     ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
1439     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
1440     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
1441     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
1442     // Misc:
1443     ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
1444     // Vector:
1445     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
1446     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
1447     ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
1448     ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE
1449   };
1450
1451   for (MVT VT : MVT::vector_valuetypes()) {
1452     for (unsigned VectExpOp : VectExpOps)
1453       setOperationAction(VectExpOp, VT, Expand);
1454
1455     // Expand all extending loads and truncating stores:
1456     for (MVT TargetVT : MVT::vector_valuetypes()) {
1457       if (TargetVT == VT)
1458         continue;
1459       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1460       setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1461       setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1462       setTruncStoreAction(VT, TargetVT, Expand);
1463     }
1464
1465     // Normalize all inputs to SELECT to be vectors of i32.
1466     if (VT.getVectorElementType() != MVT::i32) {
1467       MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
1468       setOperationAction(ISD::SELECT, VT, Promote);
1469       AddPromotedToType(ISD::SELECT, VT, VT32);
1470     }
1471     setOperationAction(ISD::SRA, VT, Custom);
1472     setOperationAction(ISD::SHL, VT, Custom);
1473     setOperationAction(ISD::SRL, VT, Custom);
1474   }
1475
1476   // Extending loads from (native) vectors of i8 into (native) vectors of i16
1477   // are legal.
1478   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
1479   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1480   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1481   setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
1482   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1483   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1484
1485   // Types natively supported:
1486   for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1487                        MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1488     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
1489     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1490     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
1491     setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
1492     setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
1493     setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
1494
1495     setOperationAction(ISD::ADD, NativeVT, Legal);
1496     setOperationAction(ISD::SUB, NativeVT, Legal);
1497     setOperationAction(ISD::MUL, NativeVT, Legal);
1498     setOperationAction(ISD::AND, NativeVT, Legal);
1499     setOperationAction(ISD::OR,  NativeVT, Legal);
1500     setOperationAction(ISD::XOR, NativeVT, Legal);
1501   }
1502
1503   // Custom lower unaligned loads.
1504   for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1505                     MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1506     setOperationAction(ISD::LOAD, VecVT, Custom);
1507   }
1508
1509   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
1510     setCondCodeAction(ISD::SETLT,  VT, Expand);
1511     setCondCodeAction(ISD::SETLE,  VT, Expand);
1512     setCondCodeAction(ISD::SETULT, VT, Expand);
1513     setCondCodeAction(ISD::SETULE, VT, Expand);
1514   }
1515
1516   // Custom-lower bitcasts from i8 to v8i1.
1517   setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
1518   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
1519   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
1520   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
1521   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1522   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
1523
1524   // Subtarget-specific operation actions.
1525   //
1526   if (Subtarget.hasV5TOps()) {
1527     setOperationAction(ISD::FMA,  MVT::f64, Expand);
1528     setOperationAction(ISD::FADD, MVT::f64, Expand);
1529     setOperationAction(ISD::FSUB, MVT::f64, Expand);
1530     setOperationAction(ISD::FMUL, MVT::f64, Expand);
1531
1532     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1533     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1534
1535     setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
1536     setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
1537     setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
1538     setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
1539     setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
1540     setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
1541     setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
1542     setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
1543     setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
1544     setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
1545     setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
1546     setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
1547   } else { // V4
1548     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
1549     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
1550     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
1551     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
1552     setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
1553     setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
1554     setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
1555     setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
1556     setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
1557
1558     setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
1559     setOperationAction(ISD::CTPOP, MVT::i16, Expand);
1560     setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1561     setOperationAction(ISD::CTPOP, MVT::i64, Expand);
1562
1563     // Expand these operations for both f32 and f64:
1564     for (unsigned FPExpOpV4 :
1565          {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
1566       setOperationAction(FPExpOpV4, MVT::f32, Expand);
1567       setOperationAction(FPExpOpV4, MVT::f64, Expand);
1568     }
1569
1570     for (ISD::CondCode FPExpCCV4 :
1571          {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
1572           ISD::SETUO,  ISD::SETO}) {
1573       setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
1574       setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
1575     }
1576   }
1577
1578   // Handling of indexed loads/stores: default is "expand".
1579   //
1580   for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::v2i16,
1581                  MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1582     setIndexedLoadAction(ISD::POST_INC, VT, Legal);
1583     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
1584   }
1585
1586   if (Subtarget.useHVXOps())
1587     initializeHVXLowering();
1588
1589   computeRegisterProperties(&HRI);
1590
1591   //
1592   // Library calls for unsupported operations
1593   //
1594   bool FastMath  = EnableFastMath;
1595
1596   setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
1597   setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
1598   setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
1599   setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
1600   setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
1601   setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
1602   setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
1603   setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
1604
1605   setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
1606   setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
1607   setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
1608   setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
1609   setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
1610   setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
1611
1612   if (IsV4) {
1613     // Handle single-precision floating point operations on V4.
1614     if (FastMath) {
1615       setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
1616       setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
1617       setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
1618       setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
1619       setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
1620       // Double-precision compares.
1621       setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
1622       setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
1623     } else {
1624       setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
1625       setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
1626       setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
1627       setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
1628       setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
1629       // Double-precision compares.
1630       setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
1631       setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
1632     }
1633   }
1634
1635   // This is the only fast library function for sqrtd.
1636   if (FastMath)
1637     setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
1638
1639   // Prefix is: nothing  for "slow-math",
1640   //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
1641   // (actually, keep fast-math and fast-math2 separate for now)
1642   if (FastMath) {
1643     setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
1644     setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
1645     setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
1646     setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
1647     // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
1648     setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
1649   } else {
1650     setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
1651     setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
1652     setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
1653     setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
1654     setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
1655   }
1656
1657   if (Subtarget.hasV5TOps()) {
1658     if (FastMath)
1659       setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
1660     else
1661       setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
1662   } else {
1663     // V4
1664     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
1665     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
1666     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
1667     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
1668     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
1669     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
1670     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
1671     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
1672     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
1673     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
1674     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
1675     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
1676     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
1677     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
1678     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
1679     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
1680     setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
1681     setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
1682     setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
1683     setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
1684     setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
1685     setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
1686     setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
1687     setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
1688     setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
1689     setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
1690     setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
1691     setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
1692     setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
1693     setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
1694   }
1695
1696   // These cause problems when the shift amount is non-constant.
1697   setLibcallName(RTLIB::SHL_I128, nullptr);
1698   setLibcallName(RTLIB::SRL_I128, nullptr);
1699   setLibcallName(RTLIB::SRA_I128, nullptr);
1700 }
1701
1702 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1703   switch ((HexagonISD::NodeType)Opcode) {
1704   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
1705   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
1706   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
1707   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
1708   case HexagonISD::CALL:          return "HexagonISD::CALL";
1709   case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
1710   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
1711   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
1712   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
1713   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
1714   case HexagonISD::CP:            return "HexagonISD::CP";
1715   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
1716   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
1717   case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
1718   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
1719   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
1720   case HexagonISD::JT:            return "HexagonISD::JT";
1721   case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
1722   case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
1723   case HexagonISD::VASL:          return "HexagonISD::VASL";
1724   case HexagonISD::VASR:          return "HexagonISD::VASR";
1725   case HexagonISD::VLSR:          return "HexagonISD::VLSR";
1726   case HexagonISD::VSPLAT:        return "HexagonISD::VSPLAT";
1727   case HexagonISD::VEXTRACTW:     return "HexagonISD::VEXTRACTW";
1728   case HexagonISD::VINSERTW0:     return "HexagonISD::VINSERTW0";
1729   case HexagonISD::VROR:          return "HexagonISD::VROR";
1730   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
1731   case HexagonISD::VZERO:         return "HexagonISD::VZERO";
1732   case HexagonISD::D2P:           return "HexagonISD::D2P";
1733   case HexagonISD::P2D:           return "HexagonISD::P2D";
1734   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
1735   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
1736   case HexagonISD::QCAT:          return "HexagonISD::QCAT";
1737   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
1738   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
1739   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
1740   case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
1741   case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
1742   case HexagonISD::OP_END:        break;
1743   }
1744   return nullptr;
1745 }
1746
1747 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1748 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1749 /// true and store the intrinsic information into the IntrinsicInfo that was
1750 /// passed to the function.
1751 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1752                                                const CallInst &I,
1753                                                MachineFunction &MF,
1754                                                unsigned Intrinsic) const {
1755   switch (Intrinsic) {
1756   case Intrinsic::hexagon_V6_vgathermw:
1757   case Intrinsic::hexagon_V6_vgathermw_128B:
1758   case Intrinsic::hexagon_V6_vgathermh:
1759   case Intrinsic::hexagon_V6_vgathermh_128B:
1760   case Intrinsic::hexagon_V6_vgathermhw:
1761   case Intrinsic::hexagon_V6_vgathermhw_128B:
1762   case Intrinsic::hexagon_V6_vgathermwq:
1763   case Intrinsic::hexagon_V6_vgathermwq_128B:
1764   case Intrinsic::hexagon_V6_vgathermhq:
1765   case Intrinsic::hexagon_V6_vgathermhq_128B:
1766   case Intrinsic::hexagon_V6_vgathermhwq:
1767   case Intrinsic::hexagon_V6_vgathermhwq_128B: {
1768     const Module &M = *I.getParent()->getParent()->getParent();
1769     Info.opc = ISD::INTRINSIC_W_CHAIN;
1770     Type *VecTy = I.getArgOperand(1)->getType();
1771     Info.memVT = MVT::getVT(VecTy);
1772     Info.ptrVal = I.getArgOperand(0);
1773     Info.offset = 0;
1774     Info.align = M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8;
1775     Info.flags = MachineMemOperand::MOLoad |
1776                  MachineMemOperand::MOStore |
1777                  MachineMemOperand::MOVolatile;
1778     return true;
1779   }
1780   default:
1781     break;
1782   }
1783   return false;
1784 }
1785
1786 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
1787   return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
1788 }
1789
1790 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
1791   if (!VT1.isSimple() || !VT2.isSimple())
1792     return false;
1793   return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
1794 }
1795
1796 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
1797   return isOperationLegalOrCustom(ISD::FMA, VT);
1798 }
1799
1800 // Should we expand the build vector with shuffles?
1801 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
1802       unsigned DefinedValues) const {
1803   return false;
1804 }
1805
1806 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
1807                                                EVT VT) const {
1808   return true;
1809 }
1810
1811 TargetLoweringBase::LegalizeTypeAction
1812 HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
1813   if (VT.getVectorNumElements() == 1)
1814     return TargetLoweringBase::TypeScalarizeVector;
1815
1816   // Always widen vectors of i1.
1817   MVT ElemTy = VT.getSimpleVT().getVectorElementType();
1818   if (ElemTy == MVT::i1)
1819     return TargetLoweringBase::TypeWidenVector;
1820
1821   if (Subtarget.useHVXOps()) {
1822     // If the size of VT is at least half of the vector length,
1823     // widen the vector. Note: the threshold was not selected in
1824     // any scientific way.
1825     ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
1826     if (llvm::find(Tys, ElemTy) != Tys.end()) {
1827       unsigned HwWidth = 8*Subtarget.getVectorLength();
1828       unsigned VecWidth = VT.getSizeInBits();
1829       if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
1830         return TargetLoweringBase::TypeWidenVector;
1831     }
1832   }
1833   return TargetLoweringBase::TypeSplitVector;
1834 }
1835
1836 std::pair<SDValue, int>
1837 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
1838   if (Addr.getOpcode() == ISD::ADD) {
1839     SDValue Op1 = Addr.getOperand(1);
1840     if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
1841       return { Addr.getOperand(0), CN->getSExtValue() };
1842   }
1843   return { Addr, 0 };
1844 }
1845
1846 // Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
1847 // to select data from, V3 is the permutation.
1848 SDValue
1849 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
1850       const {
1851   const auto *SVN = cast<ShuffleVectorSDNode>(Op);
1852   ArrayRef<int> AM = SVN->getMask();
1853   assert(AM.size() <= 8 && "Unexpected shuffle mask");
1854   unsigned VecLen = AM.size();
1855
1856   MVT VecTy = ty(Op);
1857   assert(!Subtarget.isHVXVectorType(VecTy, true) &&
1858          "HVX shuffles should be legal");
1859   assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
1860
1861   SDValue Op0 = Op.getOperand(0);
1862   SDValue Op1 = Op.getOperand(1);
1863   const SDLoc &dl(Op);
1864
1865   // If the inputs are not the same as the output, bail. This is not an
1866   // error situation, but complicates the handling and the default expansion
1867   // (into BUILD_VECTOR) should be adequate.
1868   if (ty(Op0) != VecTy || ty(Op1) != VecTy)
1869     return SDValue();
1870
1871   // Normalize the mask so that the first non-negative index comes from
1872   // the first operand.
1873   SmallVector<int,8> Mask(AM.begin(), AM.end());
1874   unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
1875   if (F == AM.size())
1876     return DAG.getUNDEF(VecTy);
1877   if (AM[F] >= int(VecLen)) {
1878     ShuffleVectorSDNode::commuteMask(Mask);
1879     std::swap(Op0, Op1);
1880   }
1881
1882   // Express the shuffle mask in terms of bytes.
1883   SmallVector<int,8> ByteMask;
1884   unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
1885   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1886     int M = Mask[i];
1887     if (M < 0) {
1888       for (unsigned j = 0; j != ElemBytes; ++j)
1889         ByteMask.push_back(-1);
1890     } else {
1891       for (unsigned j = 0; j != ElemBytes; ++j)
1892         ByteMask.push_back(M*ElemBytes + j);
1893     }
1894   }
1895   assert(ByteMask.size() <= 8);
1896
1897   // All non-undef (non-negative) indexes are well within [0..127], so they
1898   // fit in a single byte. Build two 64-bit words:
1899   // - MaskIdx where each byte is the corresponding index (for non-negative
1900   //   indexes), and 0xFF for negative indexes, and
1901   // - MaskUnd that has 0xFF for each negative index.
1902   uint64_t MaskIdx = 0;
1903   uint64_t MaskUnd = 0;
1904   for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
1905     unsigned S = 8*i;
1906     uint64_t M = ByteMask[i] & 0xFF;
1907     if (M == 0xFF)
1908       MaskUnd |= M << S;
1909     MaskIdx |= M << S;
1910   }
1911
1912   if (ByteMask.size() == 4) {
1913     // Identity.
1914     if (MaskIdx == (0x03020100 | MaskUnd))
1915       return Op0;
1916     // Byte swap.
1917     if (MaskIdx == (0x00010203 | MaskUnd)) {
1918       SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
1919       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
1920       return DAG.getBitcast(VecTy, T1);
1921     }
1922
1923     // Byte packs.
1924     SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
1925                                    typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
1926     if (MaskIdx == (0x06040200 | MaskUnd))
1927       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
1928     if (MaskIdx == (0x07050301 | MaskUnd))
1929       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
1930
1931     SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
1932                                    typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
1933     if (MaskIdx == (0x02000604 | MaskUnd))
1934       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
1935     if (MaskIdx == (0x03010705 | MaskUnd))
1936       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
1937   }
1938
1939   if (ByteMask.size() == 8) {
1940     // Identity.
1941     if (MaskIdx == (0x0706050403020100ull | MaskUnd))
1942       return Op0;
1943     // Byte swap.
1944     if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
1945       SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
1946       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
1947       return DAG.getBitcast(VecTy, T1);
1948     }
1949
1950     // Halfword picks.
1951     if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
1952       return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
1953     if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
1954       return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
1955     if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
1956       return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
1957     if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
1958       return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
1959     if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
1960       VectorPair P = opSplit(Op0, dl, DAG);
1961       return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
1962     }
1963
1964     // Byte packs.
1965     if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
1966       return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
1967     if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
1968       return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
1969   }
1970
1971   return SDValue();
1972 }
1973
1974 // Create a Hexagon-specific node for shifting a vector by an integer.
1975 SDValue
1976 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
1977       const {
1978   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) {
1979     if (SDValue S = BVN->getSplatValue()) {
1980       unsigned NewOpc;
1981       switch (Op.getOpcode()) {
1982         case ISD::SHL:
1983           NewOpc = HexagonISD::VASL;
1984           break;
1985         case ISD::SRA:
1986           NewOpc = HexagonISD::VASR;
1987           break;
1988         case ISD::SRL:
1989           NewOpc = HexagonISD::VLSR;
1990           break;
1991         default:
1992           llvm_unreachable("Unexpected shift opcode");
1993       }
1994       return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S);
1995     }
1996   }
1997
1998   return SDValue();
1999 }
2000
2001 SDValue
2002 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2003   return getVectorShiftByInt(Op, DAG);
2004 }
2005
2006 SDValue
2007 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2008   MVT ResTy = ty(Op);
2009   SDValue InpV = Op.getOperand(0);
2010   MVT InpTy = ty(InpV);
2011   assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2012   const SDLoc &dl(Op);
2013
2014   // Handle conversion from i8 to v8i1.
2015   if (ResTy == MVT::v8i1) {
2016     SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2017     SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2018     return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2019   }
2020
2021   return SDValue();
2022 }
2023
2024 bool
2025 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2026       MVT VecTy, SelectionDAG &DAG,
2027       MutableArrayRef<ConstantInt*> Consts) const {
2028   MVT ElemTy = VecTy.getVectorElementType();
2029   unsigned ElemWidth = ElemTy.getSizeInBits();
2030   IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
2031   bool AllConst = true;
2032
2033   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2034     SDValue V = Values[i];
2035     if (V.isUndef()) {
2036       Consts[i] = ConstantInt::get(IntTy, 0);
2037       continue;
2038     }
2039     // Make sure to always cast to IntTy.
2040     if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
2041       const ConstantInt *CI = CN->getConstantIntValue();
2042       Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
2043     } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
2044       const ConstantFP *CF = CN->getConstantFPValue();
2045       APInt A = CF->getValueAPF().bitcastToAPInt();
2046       Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
2047     } else {
2048       AllConst = false;
2049     }
2050   }
2051   return AllConst;
2052 }
2053
2054 SDValue
2055 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2056                                      MVT VecTy, SelectionDAG &DAG) const {
2057   MVT ElemTy = VecTy.getVectorElementType();
2058   assert(VecTy.getVectorNumElements() == Elem.size());
2059
2060   SmallVector<ConstantInt*,4> Consts(Elem.size());
2061   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2062
2063   unsigned First, Num = Elem.size();
2064   for (First = 0; First != Num; ++First)
2065     if (!isUndef(Elem[First]))
2066       break;
2067   if (First == Num)
2068     return DAG.getUNDEF(VecTy);
2069
2070   if (AllConst &&
2071       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2072     return getZero(dl, VecTy, DAG);
2073
2074   if (ElemTy == MVT::i16) {
2075     assert(Elem.size() == 2);
2076     if (AllConst) {
2077       uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2078                    Consts[1]->getZExtValue() << 16;
2079       return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
2080     }
2081     SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
2082                          {Elem[1], Elem[0]}, DAG);
2083     return DAG.getBitcast(MVT::v2i16, N);
2084   }
2085
2086   if (ElemTy == MVT::i8) {
2087     // First try generating a constant.
2088     if (AllConst) {
2089       int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2090                   (Consts[1]->getZExtValue() & 0xFF) << 8 |
2091                   (Consts[1]->getZExtValue() & 0xFF) << 16 |
2092                   Consts[2]->getZExtValue() << 24;
2093       return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2094     }
2095
2096     // Then try splat.
2097     bool IsSplat = true;
2098     for (unsigned i = 0; i != Num; ++i) {
2099       if (i == First)
2100         continue;
2101       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2102         continue;
2103       IsSplat = false;
2104       break;
2105     }
2106     if (IsSplat) {
2107       // Legalize the operand to VSPLAT.
2108       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2109       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
2110     }
2111
2112     // Generate
2113     //   (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2114     //   (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2115     assert(Elem.size() == 4);
2116     SDValue Vs[4];
2117     for (unsigned i = 0; i != 4; ++i) {
2118       Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2119       Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2120     }
2121     SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
2122     SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
2123     SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
2124     SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
2125     SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
2126
2127     SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2128     return DAG.getBitcast(MVT::v4i8, R);
2129   }
2130
2131 #ifndef NDEBUG
2132   dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
2133 #endif
2134   llvm_unreachable("Unexpected vector element type");
2135 }
2136
2137 SDValue
2138 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2139                                      MVT VecTy, SelectionDAG &DAG) const {
2140   MVT ElemTy = VecTy.getVectorElementType();
2141   assert(VecTy.getVectorNumElements() == Elem.size());
2142
2143   SmallVector<ConstantInt*,8> Consts(Elem.size());
2144   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2145
2146   unsigned First, Num = Elem.size();
2147   for (First = 0; First != Num; ++First)
2148     if (!isUndef(Elem[First]))
2149       break;
2150   if (First == Num)
2151     return DAG.getUNDEF(VecTy);
2152
2153   if (AllConst &&
2154       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2155     return getZero(dl, VecTy, DAG);
2156
2157   // First try splat if possible.
2158   if (ElemTy == MVT::i16) {
2159     bool IsSplat = true;
2160     for (unsigned i = 0; i != Num; ++i) {
2161       if (i == First)
2162         continue;
2163       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2164         continue;
2165       IsSplat = false;
2166       break;
2167     }
2168     if (IsSplat) {
2169       // Legalize the operand to VSPLAT.
2170       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2171       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
2172     }
2173   }
2174
2175   // Then try constant.
2176   if (AllConst) {
2177     uint64_t Val = 0;
2178     unsigned W = ElemTy.getSizeInBits();
2179     uint64_t Mask = (ElemTy == MVT::i8)  ? 0xFFull
2180                   : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
2181     for (unsigned i = 0; i != Num; ++i)
2182       Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2183     SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2184     return DAG.getBitcast(VecTy, V0);
2185   }
2186
2187   // Build two 32-bit vectors and concatenate.
2188   MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
2189   SDValue L = (ElemTy == MVT::i32)
2190                 ? Elem[0]
2191                 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
2192   SDValue H = (ElemTy == MVT::i32)
2193                 ? Elem[1]
2194                 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
2195   return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
2196 }
2197
2198 SDValue
2199 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2200                                      const SDLoc &dl, MVT ValTy, MVT ResTy,
2201                                      SelectionDAG &DAG) const {
2202   MVT VecTy = ty(VecV);
2203   assert(!ValTy.isVector() ||
2204          VecTy.getVectorElementType() == ValTy.getVectorElementType());
2205   unsigned VecWidth = VecTy.getSizeInBits();
2206   unsigned ValWidth = ValTy.getSizeInBits();
2207   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2208   assert((VecWidth % ElemWidth) == 0);
2209   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
2210
2211   // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2212   // without any coprocessors).
2213   if (ElemWidth == 1) {
2214     assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
2215     assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2216     // Check if this is an extract of the lowest bit.
2217     if (IdxN) {
2218       // Extracting the lowest bit is a no-op, but it changes the type,
2219       // so it must be kept as an operation to avoid errors related to
2220       // type mismatches.
2221       if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
2222         return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2223     }
2224
2225     // If the value extracted is a single bit, use tstbit.
2226     if (ValWidth == 1) {
2227       SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2228       return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, IdxV);
2229     }
2230
2231     // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2232     // a predicate register. The elements of the vector are repeated
2233     // in the register (if necessary) so that the total number is 8.
2234     // The extracted subvector will need to be expanded in such a way.
2235     unsigned Scale = VecWidth / ValWidth;
2236
2237     // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2238     // position 0.
2239     assert(ty(IdxV) == MVT::i32);
2240     SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2241                              DAG.getConstant(8*Scale, dl, MVT::i32));
2242     SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2243     SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2244     while (Scale > 1) {
2245       // The longest possible subvector is at most 32 bits, so it is always
2246       // contained in the low subregister.
2247       T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
2248       T1 = expandPredicate(T1, dl, DAG);
2249       Scale /= 2;
2250     }
2251
2252     return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
2253   }
2254
2255   assert(VecWidth == 32 || VecWidth == 64);
2256
2257   // Cast everything to scalar integer types.
2258   MVT ScalarTy = tyScalar(VecTy);
2259   VecV = DAG.getBitcast(ScalarTy, VecV);
2260
2261   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2262   SDValue ExtV;
2263
2264   if (IdxN) {
2265     unsigned Off = IdxN->getZExtValue() * ElemWidth;
2266     if (VecWidth == 64 && ValWidth == 32) {
2267       assert(Off == 0 || Off == 32);
2268       unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
2269       ExtV = DAG.getTargetExtractSubreg(SubIdx, dl, MVT::i32, VecV);
2270     } else if (Off == 0 && (ValWidth % 8) == 0) {
2271       ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
2272     } else {
2273       SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2274       // The return type of EXTRACTU must be the same as the type of the
2275       // input vector.
2276       ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2277                          {VecV, WidthV, OffV});
2278     }
2279   } else {
2280     if (ty(IdxV) != MVT::i32)
2281       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2282     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2283                                DAG.getConstant(ElemWidth, dl, MVT::i32));
2284     ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2285                        {VecV, WidthV, OffV});
2286   }
2287
2288   // Cast ExtV to the requested result type.
2289   ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
2290   ExtV = DAG.getBitcast(ResTy, ExtV);
2291   return ExtV;
2292 }
2293
2294 SDValue
2295 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2296                                     const SDLoc &dl, MVT ValTy,
2297                                     SelectionDAG &DAG) const {
2298   MVT VecTy = ty(VecV);
2299   if (VecTy.getVectorElementType() == MVT::i1) {
2300     MVT ValTy = ty(ValV);
2301     assert(ValTy.getVectorElementType() == MVT::i1);
2302     SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
2303     unsigned VecLen = VecTy.getVectorNumElements();
2304     unsigned Scale = VecLen / ValTy.getVectorNumElements();
2305     assert(Scale > 1);
2306
2307     for (unsigned R = Scale; R > 1; R /= 2) {
2308       ValR = contractPredicate(ValR, dl, DAG);
2309       ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2310                          DAG.getUNDEF(MVT::i32), ValR);
2311     }
2312     // The longest possible subvector is at most 32 bits, so it is always
2313     // contained in the low subregister.
2314     ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
2315
2316     unsigned ValBytes = 64 / Scale;
2317     SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
2318     SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2319                               DAG.getConstant(8, dl, MVT::i32));
2320     SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2321     SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2322                               {VecR, ValR, Width, Idx});
2323     return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
2324   }
2325
2326   unsigned VecWidth = VecTy.getSizeInBits();
2327   unsigned ValWidth = ValTy.getSizeInBits();
2328   assert(VecWidth == 32 || VecWidth == 64);
2329   assert((VecWidth % ValWidth) == 0);
2330
2331   // Cast everything to scalar integer types.
2332   MVT ScalarTy = MVT::getIntegerVT(VecWidth);
2333   // The actual type of ValV may be different than ValTy (which is related
2334   // to the vector type).
2335   unsigned VW = ty(ValV).getSizeInBits();
2336   ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
2337   VecV = DAG.getBitcast(ScalarTy, VecV);
2338   if (VW != VecWidth)
2339     ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
2340
2341   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2342   SDValue InsV;
2343
2344   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
2345     unsigned W = C->getZExtValue() * ValWidth;
2346     SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2347     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2348                        {VecV, ValV, WidthV, OffV});
2349   } else {
2350     if (ty(IdxV) != MVT::i32)
2351       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2352     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2353     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2354                        {VecV, ValV, WidthV, OffV});
2355   }
2356
2357   return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
2358 }
2359
2360 SDValue
2361 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2362                                        SelectionDAG &DAG) const {
2363   assert(ty(Vec32).getSizeInBits() == 32);
2364   if (isUndef(Vec32))
2365     return DAG.getUNDEF(MVT::i64);
2366   return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
2367 }
2368
2369 SDValue
2370 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2371                                          SelectionDAG &DAG) const {
2372   assert(ty(Vec64).getSizeInBits() == 64);
2373   if (isUndef(Vec64))
2374     return DAG.getUNDEF(MVT::i32);
2375   return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
2376 }
2377
2378 SDValue
2379 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2380       const {
2381   if (Ty.isVector()) {
2382     assert(Ty.isInteger() && "Only integer vectors are supported here");
2383     unsigned W = Ty.getSizeInBits();
2384     if (W <= 64)
2385       return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
2386     return DAG.getNode(HexagonISD::VZERO, dl, Ty);
2387   }
2388
2389   if (Ty.isInteger())
2390     return DAG.getConstant(0, dl, Ty);
2391   if (Ty.isFloatingPoint())
2392     return DAG.getConstantFP(0.0, dl, Ty);
2393   llvm_unreachable("Invalid type for zero");
2394 }
2395
2396 SDValue
2397 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2398   MVT VecTy = ty(Op);
2399   unsigned BW = VecTy.getSizeInBits();
2400   const SDLoc &dl(Op);
2401   SmallVector<SDValue,8> Ops;
2402   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2403     Ops.push_back(Op.getOperand(i));
2404
2405   if (BW == 32)
2406     return buildVector32(Ops, dl, VecTy, DAG);
2407   if (BW == 64)
2408     return buildVector64(Ops, dl, VecTy, DAG);
2409
2410   if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2411     // For each i1 element in the resulting predicate register, put 1
2412     // shifted by the index of the element into a general-purpose register,
2413     // then or them together and transfer it back into a predicate register.
2414     SDValue Rs[8];
2415     SDValue Z = getZero(dl, MVT::i32, DAG);
2416     // Always produce 8 bits, repeat inputs if necessary.
2417     unsigned Rep = 8 / VecTy.getVectorNumElements();
2418     for (unsigned i = 0; i != 8; ++i) {
2419       SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
2420       Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
2421     }
2422     for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
2423       for (unsigned i = 0, e = A.size()/2; i != e; ++i)
2424         Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
2425     }
2426     // Move the value directly to a predicate register.
2427     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
2428   }
2429
2430   return SDValue();
2431 }
2432
2433 SDValue
2434 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
2435                                            SelectionDAG &DAG) const {
2436   MVT VecTy = ty(Op);
2437   const SDLoc &dl(Op);
2438   if (VecTy.getSizeInBits() == 64) {
2439     assert(Op.getNumOperands() == 2);
2440     return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
2441                        Op.getOperand(0));
2442   }
2443
2444   MVT ElemTy = VecTy.getVectorElementType();
2445   if (ElemTy == MVT::i1) {
2446     assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
2447     MVT OpTy = ty(Op.getOperand(0));
2448     // Scale is how many times the operands need to be contracted to match
2449     // the representation in the target register.
2450     unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
2451     assert(Scale == Op.getNumOperands() && Scale > 1);
2452
2453     // First, convert all bool vectors to integers, then generate pairwise
2454     // inserts to form values of doubled length. Up until there are only
2455     // two values left to concatenate, all of these values will fit in a
2456     // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2457     SmallVector<SDValue,4> Words[2];
2458     unsigned IdxW = 0;
2459
2460     for (SDValue P : Op.getNode()->op_values()) {
2461       SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
2462       for (unsigned R = Scale; R > 1; R /= 2) {
2463         W = contractPredicate(W, dl, DAG);
2464         W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2465                         DAG.getUNDEF(MVT::i32), W);
2466       }
2467       W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
2468       Words[IdxW].push_back(W);
2469     }
2470
2471     while (Scale > 2) {
2472       SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
2473       Words[IdxW ^ 1].clear();
2474
2475       for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
2476         SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
2477         // Insert W1 into W0 right next to the significant bits of W0.
2478         SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2479                                 {W0, W1, WidthV, WidthV});
2480         Words[IdxW ^ 1].push_back(T);
2481       }
2482       IdxW ^= 1;
2483       Scale /= 2;
2484     }
2485
2486     // Another sanity check. At this point there should only be two words
2487     // left, and Scale should be 2.
2488     assert(Scale == 2 && Words[IdxW].size() == 2);
2489
2490     SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2491                              Words[IdxW][1], Words[IdxW][0]);
2492     return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
2493   }
2494
2495   return SDValue();
2496 }
2497
2498 SDValue
2499 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
2500                                                SelectionDAG &DAG) const {
2501   SDValue Vec = Op.getOperand(0);
2502   MVT ElemTy = ty(Vec).getVectorElementType();
2503   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
2504 }
2505
2506 SDValue
2507 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
2508                                               SelectionDAG &DAG) const {
2509   return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
2510                        ty(Op), ty(Op), DAG);
2511 }
2512
2513 SDValue
2514 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2515                                               SelectionDAG &DAG) const {
2516   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
2517                       SDLoc(Op), ty(Op).getVectorElementType(), DAG);
2518 }
2519
2520 SDValue
2521 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
2522                                              SelectionDAG &DAG) const {
2523   SDValue ValV = Op.getOperand(1);
2524   return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
2525                       SDLoc(Op), ty(ValV), DAG);
2526 }
2527
2528 bool
2529 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
2530   // Assuming the caller does not have either a signext or zeroext modifier, and
2531   // only one value is accepted, any reasonable truncation is allowed.
2532   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
2533     return false;
2534
2535   // FIXME: in principle up to 64-bit could be made safe, but it would be very
2536   // fragile at the moment: any support for multiple value returns would be
2537   // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2538   return Ty1->getPrimitiveSizeInBits() <= 32;
2539 }
2540
2541 SDValue
2542 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
2543       const {
2544   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2545   unsigned HaveAlign = LN->getAlignment();
2546   MVT LoadTy = ty(Op);
2547   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
2548   if (HaveAlign >= NeedAlign)
2549     return Op;
2550
2551   const SDLoc &dl(Op);
2552   const DataLayout &DL = DAG.getDataLayout();
2553   LLVMContext &Ctx = *DAG.getContext();
2554   unsigned AS = LN->getAddressSpace();
2555
2556   // If the load aligning is disabled or the load can be broken up into two
2557   // smaller legal loads, do the default (target-independent) expansion.
2558   bool DoDefault = false;
2559   // Handle it in the default way if this is an indexed load.
2560   if (!LN->isUnindexed())
2561     DoDefault = true;
2562
2563   if (!AlignLoads) {
2564     if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign))
2565       return Op;
2566     DoDefault = true;
2567   }
2568   if (!DoDefault && 2*HaveAlign == NeedAlign) {
2569     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
2570     MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign)
2571                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
2572     DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign);
2573   }
2574   if (DoDefault) {
2575     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
2576     return DAG.getMergeValues({P.first, P.second}, dl);
2577   }
2578
2579   // The code below generates two loads, both aligned as NeedAlign, and
2580   // with the distance of NeedAlign between them. For that to cover the
2581   // bits that need to be loaded (and without overlapping), the size of
2582   // the loads should be equal to NeedAlign. This is true for all loadable
2583   // types, but add an assertion in case something changes in the future.
2584   assert(LoadTy.getSizeInBits() == 8*NeedAlign);
2585
2586   unsigned LoadLen = NeedAlign;
2587   SDValue Base = LN->getBasePtr();
2588   SDValue Chain = LN->getChain();
2589   auto BO = getBaseAndOffset(Base);
2590   unsigned BaseOpc = BO.first.getOpcode();
2591   if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
2592     return Op;
2593
2594   if (BO.second % LoadLen != 0) {
2595     BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
2596                            DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
2597     BO.second -= BO.second % LoadLen;
2598   }
2599   SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
2600       ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
2601                     DAG.getConstant(NeedAlign, dl, MVT::i32))
2602       : BO.first;
2603   SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl);
2604   SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl);
2605
2606   MachineMemOperand *WideMMO = nullptr;
2607   if (MachineMemOperand *MMO = LN->getMemOperand()) {
2608     MachineFunction &MF = DAG.getMachineFunction();
2609     WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(),
2610                     2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(),
2611                     MMO->getSyncScopeID(), MMO->getOrdering(),
2612                     MMO->getFailureOrdering());
2613   }
2614
2615   SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
2616   SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
2617
2618   SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
2619                                 {Load1, Load0, BaseNoOff.getOperand(0)});
2620   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2621                                  Load0.getValue(1), Load1.getValue(1));
2622   SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
2623   return M;
2624 }
2625
2626 SDValue
2627 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
2628   SDValue Chain     = Op.getOperand(0);
2629   SDValue Offset    = Op.getOperand(1);
2630   SDValue Handler   = Op.getOperand(2);
2631   SDLoc dl(Op);
2632   auto PtrVT = getPointerTy(DAG.getDataLayout());
2633
2634   // Mark function as containing a call to EH_RETURN.
2635   HexagonMachineFunctionInfo *FuncInfo =
2636     DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
2637   FuncInfo->setHasEHReturn();
2638
2639   unsigned OffsetReg = Hexagon::R28;
2640
2641   SDValue StoreAddr =
2642       DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
2643                   DAG.getIntPtrConstant(4, dl));
2644   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
2645   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
2646
2647   // Not needed we already use it as explict input to EH_RETURN.
2648   // MF.getRegInfo().addLiveOut(OffsetReg);
2649
2650   return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
2651 }
2652
2653 SDValue
2654 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
2655   unsigned Opc = Op.getOpcode();
2656
2657   // Handle INLINEASM first.
2658   if (Opc == ISD::INLINEASM)
2659     return LowerINLINEASM(Op, DAG);
2660
2661   if (isHvxOperation(Op)) {
2662     // If HVX lowering returns nothing, try the default lowering.
2663     if (SDValue V = LowerHvxOperation(Op, DAG))
2664       return V;
2665   }
2666
2667   switch (Opc) {
2668     default:
2669 #ifndef NDEBUG
2670       Op.getNode()->dumpr(&DAG);
2671       if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
2672         errs() << "Error: check for a non-legal type in this operation\n";
2673 #endif
2674       llvm_unreachable("Should not custom lower this!");
2675     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
2676     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
2677     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
2678     case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_SUBVECTOR(Op, DAG);
2679     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2680     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
2681     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
2682     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
2683     case ISD::LOAD:                 return LowerUnalignedLoad(Op, DAG);
2684     case ISD::SRA:
2685     case ISD::SHL:
2686     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
2687     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
2688     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
2689     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
2690     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
2691     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
2692     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
2693     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
2694     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
2695     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
2696     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
2697     case ISD::VASTART:              return LowerVASTART(Op, DAG);
2698     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
2699     case ISD::SETCC:                return LowerSETCC(Op, DAG);
2700     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
2701     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2702     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
2703     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
2704     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
2705       break;
2706   }
2707
2708   return SDValue();
2709 }
2710
2711 void
2712 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
2713                                           SmallVectorImpl<SDValue> &Results,
2714                                           SelectionDAG &DAG) const {
2715   const SDLoc &dl(N);
2716   switch (N->getOpcode()) {
2717     case ISD::SRL:
2718     case ISD::SRA:
2719     case ISD::SHL:
2720       return;
2721     case ISD::BITCAST:
2722       // Handle a bitcast from v8i1 to i8.
2723       if (N->getValueType(0) == MVT::i8) {
2724         SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
2725                              N->getOperand(0), DAG);
2726         Results.push_back(P);
2727       }
2728       break;
2729   }
2730 }
2731
2732 /// Returns relocation base for the given PIC jumptable.
2733 SDValue
2734 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2735                                                 SelectionDAG &DAG) const {
2736   int Idx = cast<JumpTableSDNode>(Table)->getIndex();
2737   EVT VT = Table.getValueType();
2738   SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
2739   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
2740 }
2741
2742 //===----------------------------------------------------------------------===//
2743 // Inline Assembly Support
2744 //===----------------------------------------------------------------------===//
2745
2746 TargetLowering::ConstraintType
2747 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
2748   if (Constraint.size() == 1) {
2749     switch (Constraint[0]) {
2750       case 'q':
2751       case 'v':
2752         if (Subtarget.useHVXOps())
2753           return C_RegisterClass;
2754         break;
2755       case 'a':
2756         return C_RegisterClass;
2757       default:
2758         break;
2759     }
2760   }
2761   return TargetLowering::getConstraintType(Constraint);
2762 }
2763
2764 std::pair<unsigned, const TargetRegisterClass*>
2765 HexagonTargetLowering::getRegForInlineAsmConstraint(
2766     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
2767
2768   if (Constraint.size() == 1) {
2769     switch (Constraint[0]) {
2770     case 'r':   // R0-R31
2771       switch (VT.SimpleTy) {
2772       default:
2773         return {0u, nullptr};
2774       case MVT::i1:
2775       case MVT::i8:
2776       case MVT::i16:
2777       case MVT::i32:
2778       case MVT::f32:
2779         return {0u, &Hexagon::IntRegsRegClass};
2780       case MVT::i64:
2781       case MVT::f64:
2782         return {0u, &Hexagon::DoubleRegsRegClass};
2783       }
2784       break;
2785     case 'a': // M0-M1
2786       if (VT != MVT::i32)
2787         return {0u, nullptr};
2788       return {0u, &Hexagon::ModRegsRegClass};
2789     case 'q': // q0-q3
2790       switch (VT.getSizeInBits()) {
2791       default:
2792         return {0u, nullptr};
2793       case 512:
2794       case 1024:
2795         return {0u, &Hexagon::HvxQRRegClass};
2796       }
2797       break;
2798     case 'v': // V0-V31
2799       switch (VT.getSizeInBits()) {
2800       default:
2801         return {0u, nullptr};
2802       case 512:
2803         return {0u, &Hexagon::HvxVRRegClass};
2804       case 1024:
2805         if (Subtarget.hasV60TOps() && Subtarget.useHVX128BOps())
2806           return {0u, &Hexagon::HvxVRRegClass};
2807         return {0u, &Hexagon::HvxWRRegClass};
2808       case 2048:
2809         return {0u, &Hexagon::HvxWRRegClass};
2810       }
2811       break;
2812     default:
2813       return {0u, nullptr};
2814     }
2815   }
2816
2817   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2818 }
2819
2820 /// isFPImmLegal - Returns true if the target can instruction select the
2821 /// specified FP immediate natively. If false, the legalizer will
2822 /// materialize the FP immediate as a load from a constant pool.
2823 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
2824   return Subtarget.hasV5TOps();
2825 }
2826
2827 /// isLegalAddressingMode - Return true if the addressing mode represented by
2828 /// AM is legal for this target, for a load/store of the specified type.
2829 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
2830                                                   const AddrMode &AM, Type *Ty,
2831                                                   unsigned AS, Instruction *I) const {
2832   if (Ty->isSized()) {
2833     // When LSR detects uses of the same base address to access different
2834     // types (e.g. unions), it will assume a conservative type for these
2835     // uses:
2836     //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
2837     // The type Ty passed here would then be "void". Skip the alignment
2838     // checks, but do not return false right away, since that confuses
2839     // LSR into crashing.
2840     unsigned A = DL.getABITypeAlignment(Ty);
2841     // The base offset must be a multiple of the alignment.
2842     if ((AM.BaseOffs % A) != 0)
2843       return false;
2844     // The shifted offset must fit in 11 bits.
2845     if (!isInt<11>(AM.BaseOffs >> Log2_32(A)))
2846       return false;
2847   }
2848
2849   // No global is ever allowed as a base.
2850   if (AM.BaseGV)
2851     return false;
2852
2853   int Scale = AM.Scale;
2854   if (Scale < 0)
2855     Scale = -Scale;
2856   switch (Scale) {
2857   case 0:  // No scale reg, "r+i", "r", or just "i".
2858     break;
2859   default: // No scaled addressing mode.
2860     return false;
2861   }
2862   return true;
2863 }
2864
2865 /// Return true if folding a constant offset with the given GlobalAddress is
2866 /// legal.  It is frequently not legal in PIC relocation models.
2867 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
2868       const {
2869   return HTM.getRelocationModel() == Reloc::Static;
2870 }
2871
2872 /// isLegalICmpImmediate - Return true if the specified immediate is legal
2873 /// icmp immediate, that is the target has icmp instructions which can compare
2874 /// a register against the immediate without having to materialize the
2875 /// immediate into a register.
2876 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
2877   return Imm >= -512 && Imm <= 511;
2878 }
2879
2880 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2881 /// for tail call optimization. Targets which want to do tail call
2882 /// optimization should implement this function.
2883 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
2884                                  SDValue Callee,
2885                                  CallingConv::ID CalleeCC,
2886                                  bool IsVarArg,
2887                                  bool IsCalleeStructRet,
2888                                  bool IsCallerStructRet,
2889                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2890                                  const SmallVectorImpl<SDValue> &OutVals,
2891                                  const SmallVectorImpl<ISD::InputArg> &Ins,
2892                                  SelectionDAG& DAG) const {
2893   const Function &CallerF = DAG.getMachineFunction().getFunction();
2894   CallingConv::ID CallerCC = CallerF.getCallingConv();
2895   bool CCMatch = CallerCC == CalleeCC;
2896
2897   // ***************************************************************************
2898   //  Look for obvious safe cases to perform tail call optimization that do not
2899   //  require ABI changes.
2900   // ***************************************************************************
2901
2902   // If this is a tail call via a function pointer, then don't do it!
2903   if (!isa<GlobalAddressSDNode>(Callee) &&
2904       !isa<ExternalSymbolSDNode>(Callee)) {
2905     return false;
2906   }
2907
2908   // Do not optimize if the calling conventions do not match and the conventions
2909   // used are not C or Fast.
2910   if (!CCMatch) {
2911     bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
2912     bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
2913     // If R & E, then ok.
2914     if (!R || !E)
2915       return false;
2916   }
2917
2918   // Do not tail call optimize vararg calls.
2919   if (IsVarArg)
2920     return false;
2921
2922   // Also avoid tail call optimization if either caller or callee uses struct
2923   // return semantics.
2924   if (IsCalleeStructRet || IsCallerStructRet)
2925     return false;
2926
2927   // In addition to the cases above, we also disable Tail Call Optimization if
2928   // the calling convention code that at least one outgoing argument needs to
2929   // go on the stack. We cannot check that here because at this point that
2930   // information is not available.
2931   return true;
2932 }
2933
2934 /// Returns the target specific optimal type for load and store operations as
2935 /// a result of memset, memcpy, and memmove lowering.
2936 ///
2937 /// If DstAlign is zero that means it's safe to destination alignment can
2938 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
2939 /// a need to check it against alignment requirement, probably because the
2940 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
2941 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
2942 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
2943 /// does not need to be loaded.  It returns EVT::Other if the type should be
2944 /// determined using generic target-independent logic.
2945 EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
2946       unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
2947       bool MemcpyStrSrc, MachineFunction &MF) const {
2948
2949   auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
2950     return (GivenA % MinA) == 0;
2951   };
2952
2953   if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
2954     return MVT::i64;
2955   if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
2956     return MVT::i32;
2957   if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
2958     return MVT::i16;
2959
2960   return MVT::Other;
2961 }
2962
2963 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2964       unsigned AS, unsigned Align, bool *Fast) const {
2965   if (Fast)
2966     *Fast = false;
2967   return Subtarget.isHVXVectorType(VT.getSimpleVT());
2968 }
2969
2970 std::pair<const TargetRegisterClass*, uint8_t>
2971 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2972       MVT VT) const {
2973   if (Subtarget.isHVXVectorType(VT, true)) {
2974     unsigned BitWidth = VT.getSizeInBits();
2975     unsigned VecWidth = Subtarget.getVectorLength() * 8;
2976
2977     if (VT.getVectorElementType() == MVT::i1)
2978       return std::make_pair(&Hexagon::HvxQRRegClass, 1);
2979     if (BitWidth == VecWidth)
2980       return std::make_pair(&Hexagon::HvxVRRegClass, 1);
2981     assert(BitWidth == 2 * VecWidth);
2982     return std::make_pair(&Hexagon::HvxWRRegClass, 1);
2983   }
2984
2985   return TargetLowering::findRepresentativeClass(TRI, VT);
2986 }
2987
2988 Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
2989       AtomicOrdering Ord) const {
2990   BasicBlock *BB = Builder.GetInsertBlock();
2991   Module *M = BB->getParent()->getParent();
2992   Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
2993   unsigned SZ = Ty->getPrimitiveSizeInBits();
2994   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
2995   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
2996                                    : Intrinsic::hexagon_L4_loadd_locked;
2997   Value *Fn = Intrinsic::getDeclaration(M, IntID);
2998   return Builder.CreateCall(Fn, Addr, "larx");
2999 }
3000
3001 /// Perform a store-conditional operation to Addr. Return the status of the
3002 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3003 Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
3004       Value *Val, Value *Addr, AtomicOrdering Ord) const {
3005   BasicBlock *BB = Builder.GetInsertBlock();
3006   Module *M = BB->getParent()->getParent();
3007   Type *Ty = Val->getType();
3008   unsigned SZ = Ty->getPrimitiveSizeInBits();
3009   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3010   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3011                                    : Intrinsic::hexagon_S4_stored_locked;
3012   Value *Fn = Intrinsic::getDeclaration(M, IntID);
3013   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
3014   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3015   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3016   return Ext;
3017 }
3018
3019 TargetLowering::AtomicExpansionKind
3020 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3021   // Do not expand loads and stores that don't exceed 64 bits.
3022   return LI->getType()->getPrimitiveSizeInBits() > 64
3023              ? AtomicExpansionKind::LLOnly
3024              : AtomicExpansionKind::None;
3025 }
3026
3027 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3028   // Do not expand loads and stores that don't exceed 64 bits.
3029   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3030 }
3031
3032 bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3033       AtomicCmpXchgInst *AI) const {
3034   const DataLayout &DL = AI->getModule()->getDataLayout();
3035   unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
3036   return Size >= 4 && Size <= 8;
3037 }