lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  11 // both before and after the DAG is legalized.
  12 //
  13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  14 // primarily intended to handle simplification opportunities that are implicit
  15 // in the LLVM IR and exposed by the various codegen lowering phases.
  16 //
  17 //===----------------------------------------------------------------------===//
  18
  19 #include "llvm/ADT/APFloat.h"
  20 #include "llvm/ADT/APInt.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/DenseMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/CodeGen/DAGCombine.h"
  35 #include "llvm/CodeGen/ISDOpcodes.h"
  36 #include "llvm/CodeGen/MachineFrameInfo.h"
  37 #include "llvm/CodeGen/MachineFunction.h"
  38 #include "llvm/CodeGen/MachineMemOperand.h"
  39 #include "llvm/CodeGen/MachineValueType.h"
  40 #include "llvm/CodeGen/RuntimeLibcalls.h"
  41 #include "llvm/CodeGen/SelectionDAG.h"
  42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  43 #include "llvm/CodeGen/SelectionDAGNodes.h"
  44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  45 #include "llvm/CodeGen/TargetLowering.h"
  46 #include "llvm/CodeGen/TargetRegisterInfo.h"
  47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  48 #include "llvm/CodeGen/ValueTypes.h"
  49 #include "llvm/IR/Attributes.h"
  50 #include "llvm/IR/Constant.h"
  51 #include "llvm/IR/DataLayout.h"
  52 #include "llvm/IR/DerivedTypes.h"
  53 #include "llvm/IR/Function.h"
  54 #include "llvm/IR/LLVMContext.h"
  55 #include "llvm/IR/Metadata.h"
  56 #include "llvm/Support/Casting.h"
  57 #include "llvm/Support/CodeGen.h"
  58 #include "llvm/Support/CommandLine.h"
  59 #include "llvm/Support/Compiler.h"
  60 #include "llvm/Support/Debug.h"
  61 #include "llvm/Support/ErrorHandling.h"
  62 #include "llvm/Support/KnownBits.h"
  63 #include "llvm/Support/MathExtras.h"
  64 #include "llvm/Support/raw_ostream.h"
  65 #include "llvm/Target/TargetMachine.h"
  66 #include "llvm/Target/TargetOptions.h"
  67 #include <algorithm>
  68 #include <cassert>
  69 #include <cstdint>
  70 #include <functional>
  71 #include <iterator>
  72 #include <string>
  73 #include <tuple>
  74 #include <utility>
  75 #include <vector>
  76
  77 using namespace llvm;
  78
  79 #define DEBUG_TYPE "dagcombine"
  80
  81 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  84 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  85 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  86 STATISTIC(SlicedLoads, "Number of load sliced");
  87
  88 static cl::opt<bool>
  89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  91
  92 static cl::opt<bool>
  93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  94         cl::desc("Enable DAG combiner's use of TBAA"));
  95
  96 #ifndef NDEBUG
  97 static cl::opt<std::string>
  98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  99                    cl::desc("Only use DAG-combiner alias analysis in this"
 100                             " function"));
 101 #endif
 102
 103 /// Hidden option to stress test load slicing, i.e., when this option
 104 /// is enabled, load slicing bypasses most of its profitability guards.
 105 static cl::opt<bool>
 106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 107                   cl::desc("Bypass the profitability model of load slicing"),
 108                   cl::init(false));
 109
 110 static cl::opt<bool>
 111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 112                     cl::desc("DAG combiner may split indexing from loads"));
 113
 114 namespace {
 115
 116   class DAGCombiner {
 117     SelectionDAG &DAG;
 118     const TargetLowering &TLI;
 119     CombineLevel Level;
 120     CodeGenOpt::Level OptLevel;
 121     bool LegalOperations = false;
 122     bool LegalTypes = false;
 123     bool ForCodeSize;
 124
 125     /// \brief Worklist of all of the nodes that need to be simplified.
 126     ///
 127     /// This must behave as a stack -- new nodes to process are pushed onto the
 128     /// back and when processing we pop off of the back.
 129     ///
 130     /// The worklist will not contain duplicates but may contain null entries
 131     /// due to nodes being deleted from the underlying DAG.
 132     SmallVector<SDNode *, 64> Worklist;
 133
 134     /// \brief Mapping from an SDNode to its position on the worklist.
 135     ///
 136     /// This is used to find and remove nodes from the worklist (by nulling
 137     /// them) when they are deleted from the underlying DAG. It relies on
 138     /// stable indices of nodes within the worklist.
 139     DenseMap<SDNode *, unsigned> WorklistMap;
 140
 141     /// \brief Set of nodes which have been combined (at least once).
 142     ///
 143     /// This is used to allow us to reliably add any operands of a DAG node
 144     /// which have not yet been combined to the worklist.
 145     SmallPtrSet<SDNode *, 32> CombinedNodes;
 146
 147     // AA - Used for DAG load/store alias analysis.
 148     AliasAnalysis *AA;
 149
 150     /// When an instruction is simplified, add all users of the instruction to
 151     /// the work lists because they might get more simplified now.
 152     void AddUsersToWorklist(SDNode *N) {
 153       for (SDNode *Node : N->uses())
 154         AddToWorklist(Node);
 155     }
 156
 157     /// Call the node-specific routine that folds each particular type of node.
 158     SDValue visit(SDNode *N);
 159
 160   public:
 161     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 162         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 163           OptLevel(OL), AA(AA) {
 164       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
 165
 166       MaximumLegalStoreInBits = 0;
 167       for (MVT VT : MVT::all_valuetypes())
 168         if (EVT(VT).isSimple() && VT != MVT::Other &&
 169             TLI.isTypeLegal(EVT(VT)) &&
 170             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 171           MaximumLegalStoreInBits = VT.getSizeInBits();
 172     }
 173
 174     /// Add to the worklist making sure its instance is at the back (next to be
 175     /// processed.)
 176     void AddToWorklist(SDNode *N) {
 177       assert(N->getOpcode() != ISD::DELETED_NODE &&
 178              "Deleted Node added to Worklist");
 179
 180       // Skip handle nodes as they can't usefully be combined and confuse the
 181       // zero-use deletion strategy.
 182       if (N->getOpcode() == ISD::HANDLENODE)
 183         return;
 184
 185       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 186         Worklist.push_back(N);
 187     }
 188
 189     /// Remove all instances of N from the worklist.
 190     void removeFromWorklist(SDNode *N) {
 191       CombinedNodes.erase(N);
 192
 193       auto It = WorklistMap.find(N);
 194       if (It == WorklistMap.end())
 195         return; // Not in the worklist.
 196
 197       // Null out the entry rather than erasing it to avoid a linear operation.
 198       Worklist[It->second] = nullptr;
 199       WorklistMap.erase(It);
 200     }
 201
 202     void deleteAndRecombine(SDNode *N);
 203     bool recursivelyDeleteUnusedNodes(SDNode *N);
 204
 205     /// Replaces all uses of the results of one DAG node with new values.
 206     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 207                       bool AddTo = true);
 208
 209     /// Replaces all uses of the results of one DAG node with new values.
 210     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 211       return CombineTo(N, &Res, 1, AddTo);
 212     }
 213
 214     /// Replaces all uses of the results of one DAG node with new values.
 215     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 216                       bool AddTo = true) {
 217       SDValue To[] = { Res0, Res1 };
 218       return CombineTo(N, To, 2, AddTo);
 219     }
 220
 221     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 222
 223   private:
 224     unsigned MaximumLegalStoreInBits;
 225
 226     /// Check the specified integer node value to see if it can be simplified or
 227     /// if things it uses can be simplified by bit propagation.
 228     /// If so, return true.
 229     bool SimplifyDemandedBits(SDValue Op) {
 230       unsigned BitWidth = Op.getScalarValueSizeInBits();
 231       APInt Demanded = APInt::getAllOnesValue(BitWidth);
 232       return SimplifyDemandedBits(Op, Demanded);
 233     }
 234
 235     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
 236
 237     bool CombineToPreIndexedLoadStore(SDNode *N);
 238     bool CombineToPostIndexedLoadStore(SDNode *N);
 239     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 240     bool SliceUpLoad(SDNode *N);
 241
 242     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 243     ///   load.
 244     ///
 245     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 246     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 247     /// \param EltNo index of the vector element to load.
 248     /// \param OriginalLoad load that EVE came from to be replaced.
 249     /// \returns EVE on success SDValue() on failure.
 250     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
 251         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
 252     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 253     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 254     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 255     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 256     SDValue PromoteIntBinOp(SDValue Op);
 257     SDValue PromoteIntShiftOp(SDValue Op);
 258     SDValue PromoteExtend(SDValue Op);
 259     bool PromoteLoad(SDValue Op);
 260
 261     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
 262                          SDValue ExtLoad, const SDLoc &DL,
 263                          ISD::NodeType ExtType);
 264
 265     /// Call the node-specific routine that knows how to fold each
 266     /// particular type of node. If that doesn't do anything, try the
 267     /// target-specific DAG combines.
 268     SDValue combine(SDNode *N);
 269
 270     // Visitation implementation - Implement dag node combining for different
 271     // node types.  The semantics are as follows:
 272     // Return Value:
 273     //   SDValue.getNode() == 0 - No change was made
 274     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 275     //   otherwise              - N should be replaced by the returned Operand.
 276     //
 277     SDValue visitTokenFactor(SDNode *N);
 278     SDValue visitMERGE_VALUES(SDNode *N);
 279     SDValue visitADD(SDNode *N);
 280     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 281     SDValue visitSUB(SDNode *N);
 282     SDValue visitADDC(SDNode *N);
 283     SDValue visitUADDO(SDNode *N);
 284     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 285     SDValue visitSUBC(SDNode *N);
 286     SDValue visitUSUBO(SDNode *N);
 287     SDValue visitADDE(SDNode *N);
 288     SDValue visitADDCARRY(SDNode *N);
 289     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 290     SDValue visitSUBE(SDNode *N);
 291     SDValue visitSUBCARRY(SDNode *N);
 292     SDValue visitMUL(SDNode *N);
 293     SDValue useDivRem(SDNode *N);
 294     SDValue visitSDIV(SDNode *N);
 295     SDValue visitUDIV(SDNode *N);
 296     SDValue visitREM(SDNode *N);
 297     SDValue visitMULHU(SDNode *N);
 298     SDValue visitMULHS(SDNode *N);
 299     SDValue visitSMUL_LOHI(SDNode *N);
 300     SDValue visitUMUL_LOHI(SDNode *N);
 301     SDValue visitSMULO(SDNode *N);
 302     SDValue visitUMULO(SDNode *N);
 303     SDValue visitIMINMAX(SDNode *N);
 304     SDValue visitAND(SDNode *N);
 305     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 306     SDValue visitOR(SDNode *N);
 307     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
 308     SDValue visitXOR(SDNode *N);
 309     SDValue SimplifyVBinOp(SDNode *N);
 310     SDValue visitSHL(SDNode *N);
 311     SDValue visitSRA(SDNode *N);
 312     SDValue visitSRL(SDNode *N);
 313     SDValue visitRotate(SDNode *N);
 314     SDValue visitABS(SDNode *N);
 315     SDValue visitBSWAP(SDNode *N);
 316     SDValue visitBITREVERSE(SDNode *N);
 317     SDValue visitCTLZ(SDNode *N);
 318     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 319     SDValue visitCTTZ(SDNode *N);
 320     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 321     SDValue visitCTPOP(SDNode *N);
 322     SDValue visitSELECT(SDNode *N);
 323     SDValue visitVSELECT(SDNode *N);
 324     SDValue visitSELECT_CC(SDNode *N);
 325     SDValue visitSETCC(SDNode *N);
 326     SDValue visitSETCCE(SDNode *N);
 327     SDValue visitSETCCCARRY(SDNode *N);
 328     SDValue visitSIGN_EXTEND(SDNode *N);
 329     SDValue visitZERO_EXTEND(SDNode *N);
 330     SDValue visitANY_EXTEND(SDNode *N);
 331     SDValue visitAssertExt(SDNode *N);
 332     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 333     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 334     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 335     SDValue visitTRUNCATE(SDNode *N);
 336     SDValue visitBITCAST(SDNode *N);
 337     SDValue visitBUILD_PAIR(SDNode *N);
 338     SDValue visitFADD(SDNode *N);
 339     SDValue visitFSUB(SDNode *N);
 340     SDValue visitFMUL(SDNode *N);
 341     SDValue visitFMA(SDNode *N);
 342     SDValue visitFDIV(SDNode *N);
 343     SDValue visitFREM(SDNode *N);
 344     SDValue visitFSQRT(SDNode *N);
 345     SDValue visitFCOPYSIGN(SDNode *N);
 346     SDValue visitSINT_TO_FP(SDNode *N);
 347     SDValue visitUINT_TO_FP(SDNode *N);
 348     SDValue visitFP_TO_SINT(SDNode *N);
 349     SDValue visitFP_TO_UINT(SDNode *N);
 350     SDValue visitFP_ROUND(SDNode *N);
 351     SDValue visitFP_ROUND_INREG(SDNode *N);
 352     SDValue visitFP_EXTEND(SDNode *N);
 353     SDValue visitFNEG(SDNode *N);
 354     SDValue visitFABS(SDNode *N);
 355     SDValue visitFCEIL(SDNode *N);
 356     SDValue visitFTRUNC(SDNode *N);
 357     SDValue visitFFLOOR(SDNode *N);
 358     SDValue visitFMINNUM(SDNode *N);
 359     SDValue visitFMAXNUM(SDNode *N);
 360     SDValue visitBRCOND(SDNode *N);
 361     SDValue visitBR_CC(SDNode *N);
 362     SDValue visitLOAD(SDNode *N);
 363
 364     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 365     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 366
 367     SDValue visitSTORE(SDNode *N);
 368     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 369     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 370     SDValue visitBUILD_VECTOR(SDNode *N);
 371     SDValue visitCONCAT_VECTORS(SDNode *N);
 372     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 373     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 374     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 375     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 376     SDValue visitMLOAD(SDNode *N);
 377     SDValue visitMSTORE(SDNode *N);
 378     SDValue visitMGATHER(SDNode *N);
 379     SDValue visitMSCATTER(SDNode *N);
 380     SDValue visitFP_TO_FP16(SDNode *N);
 381     SDValue visitFP16_TO_FP(SDNode *N);
 382
 383     SDValue visitFADDForFMACombine(SDNode *N);
 384     SDValue visitFSUBForFMACombine(SDNode *N);
 385     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 386
 387     SDValue XformToShuffleWithZero(SDNode *N);
 388     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
 389                            SDValue RHS);
 390
 391     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 392
 393     SDValue foldSelectOfConstants(SDNode *N);
 394     SDValue foldVSelectOfConstants(SDNode *N);
 395     SDValue foldBinOpIntoSelect(SDNode *BO);
 396     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 397     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
 398     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 399     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 400                              SDValue N2, SDValue N3, ISD::CondCode CC,
 401                              bool NotExtCompare = false);
 402     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 403                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 404     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 405                               const SDLoc &DL);
 406     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 407                           const SDLoc &DL, bool foldBooleans = true);
 408
 409     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 410                            SDValue &CC) const;
 411     bool isOneUseSetCC(SDValue N) const;
 412
 413     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 414                                          unsigned HiOp);
 415     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 416     SDValue CombineExtLoad(SDNode *N);
 417     SDValue combineRepeatedFPDivisors(SDNode *N);
 418     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 419     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 420     SDValue BuildSDIV(SDNode *N);
 421     SDValue BuildSDIVPow2(SDNode *N);
 422     SDValue BuildUDIV(SDNode *N);
 423     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
 424     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 425     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 426     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 427     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 428     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
 429                                 SDNodeFlags Flags, bool Reciprocal);
 430     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
 431                                 SDNodeFlags Flags, bool Reciprocal);
 432     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 433                                bool DemandHighBits = true);
 434     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 435     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 436                               SDValue InnerPos, SDValue InnerNeg,
 437                               unsigned PosOpcode, unsigned NegOpcode,
 438                               const SDLoc &DL);
 439     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 440     SDValue MatchLoadCombine(SDNode *N);
 441     SDValue ReduceLoadWidth(SDNode *N);
 442     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 443     SDValue splitMergedValStore(StoreSDNode *ST);
 444     SDValue TransformFPLoadStorePair(SDNode *N);
 445     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 446     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
 447     SDValue reduceBuildVecToShuffle(SDNode *N);
 448     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 449                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 450                                   SDValue VecIn2, unsigned LeftIdx);
 451     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
 452
 453     /// Walk up chain skipping non-aliasing memory nodes,
 454     /// looking for aliasing nodes and adding them to the Aliases vector.
 455     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 456                           SmallVectorImpl<SDValue> &Aliases);
 457
 458     /// Return true if there is any possibility that the two addresses overlap.
 459     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
 460
 461     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 462     /// chain (aliasing node.)
 463     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 464
 465     /// Try to replace a store and any possibly adjacent stores on
 466     /// consecutive chains with better chains. Return true only if St is
 467     /// replaced.
 468     ///
 469     /// Notice that other chains may still be replaced even if the function
 470     /// returns false.
 471     bool findBetterNeighborChains(StoreSDNode *St);
 472
 473     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
 474     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
 475
 476     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 477     /// is located in a sequence of memory operations connected by a chain.
 478     struct MemOpLink {
 479       // Ptr to the mem node.
 480       LSBaseSDNode *MemNode;
 481
 482       // Offset from the base ptr.
 483       int64_t OffsetFromBase;
 484
 485       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 486           : MemNode(N), OffsetFromBase(Offset) {}
 487     };
 488
 489     /// This is a helper function for visitMUL to check the profitability
 490     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 491     /// MulNode is the original multiply, AddNode is (add x, c1),
 492     /// and ConstNode is c2.
 493     bool isMulAddWithConstProfitable(SDNode *MulNode,
 494                                      SDValue &AddNode,
 495                                      SDValue &ConstNode);
 496
 497     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 498     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 499     /// the type of the loaded value to be extended.
 500     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 501                           EVT LoadResultTy, EVT &ExtVT);
 502
 503     /// Helper function to calculate whether the given Load can have its
 504     /// width reduced to ExtVT.
 505     bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
 506                            EVT &ExtVT, unsigned ShAmt = 0);
 507
 508     /// Used by BackwardsPropagateMask to find suitable loads.
 509     bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
 510                            SmallPtrSetImpl<SDNode*> &NodeWithConsts,
 511                            ConstantSDNode *Mask, SDNode *&UncombinedNode);
 512     /// Attempt to propagate a given AND node back to load leaves so that they
 513     /// can be combined into narrow loads.
 514     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 515
 516     /// Helper function for MergeConsecutiveStores which merges the
 517     /// component store chains.
 518     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 519                                 unsigned NumStores);
 520
 521     /// This is a helper function for MergeConsecutiveStores. When the
 522     /// source elements of the consecutive stores are all constants or
 523     /// all extracted vector elements, try to merge them into one
 524     /// larger store introducing bitcasts if necessary.  \return True
 525     /// if a merged store was created.
 526     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 527                                          EVT MemVT, unsigned NumStores,
 528                                          bool IsConstantSrc, bool UseVector,
 529                                          bool UseTrunc);
 530
 531     /// This is a helper function for MergeConsecutiveStores. Stores
 532     /// that potentially may be merged with St are placed in
 533     /// StoreNodes.
 534     void getStoreMergeCandidates(StoreSDNode *St,
 535                                  SmallVectorImpl<MemOpLink> &StoreNodes);
 536
 537     /// Helper function for MergeConsecutiveStores. Checks if
 538     /// candidate stores have indirect dependency through their
 539     /// operands. \return True if safe to merge.
 540     bool checkMergeStoreCandidatesForDependencies(
 541         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
 542
 543     /// Merge consecutive store operations into a wide store.
 544     /// This optimization uses wide integers or vectors when possible.
 545     /// \return number of stores that were merged into a merged store (the
 546     /// affected nodes are stored as a prefix in \p StoreNodes).
 547     bool MergeConsecutiveStores(StoreSDNode *N);
 548
 549     /// \brief Try to transform a truncation where C is a constant:
 550     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 551     ///
 552     /// \p N needs to be a truncation and its first operand an AND. Other
 553     /// requirements are checked by the function (e.g. that trunc is
 554     /// single-use) and if missed an empty SDValue is returned.
 555     SDValue distributeTruncateThroughAnd(SDNode *N);
 556
 557   public:
 558     /// Runs the dag combiner on all nodes in the work list
 559     void Run(CombineLevel AtLevel);
 560
 561     SelectionDAG &getDAG() const { return DAG; }
 562
 563     /// Returns a type large enough to hold any valid shift amount - before type
 564     /// legalization these can be huge.
 565     EVT getShiftAmountTy(EVT LHSTy) {
 566       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 567       if (LHSTy.isVector())
 568         return LHSTy;
 569       auto &DL = DAG.getDataLayout();
 570       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
 571                         : TLI.getPointerTy(DL);
 572     }
 573
 574     /// This method returns true if we are running before type legalization or
 575     /// if the specified VT is legal.
 576     bool isTypeLegal(const EVT &VT) {
 577       if (!LegalTypes) return true;
 578       return TLI.isTypeLegal(VT);
 579     }
 580
 581     /// Convenience wrapper around TargetLowering::getSetCCResultType
 582     EVT getSetCCResultType(EVT VT) const {
 583       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 584     }
 585   };
 586
 587 /// This class is a DAGUpdateListener that removes any deleted
 588 /// nodes from the worklist.
 589 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 590   DAGCombiner &DC;
 591
 592 public:
 593   explicit WorklistRemover(DAGCombiner &dc)
 594     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 595
 596   void NodeDeleted(SDNode *N, SDNode *E) override {
 597     DC.removeFromWorklist(N);
 598   }
 599 };
 600
 601 } // end anonymous namespace
 602
 603 //===----------------------------------------------------------------------===//
 604 //  TargetLowering::DAGCombinerInfo implementation
 605 //===----------------------------------------------------------------------===//
 606
 607 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 608   ((DAGCombiner*)DC)->AddToWorklist(N);
 609 }
 610
 611 SDValue TargetLowering::DAGCombinerInfo::
 612 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 613   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 614 }
 615
 616 SDValue TargetLowering::DAGCombinerInfo::
 617 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 618   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 619 }
 620
 621 SDValue TargetLowering::DAGCombinerInfo::
 622 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 623   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 624 }
 625
 626 void TargetLowering::DAGCombinerInfo::
 627 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 628   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 629 }
 630
 631 //===----------------------------------------------------------------------===//
 632 // Helper Functions
 633 //===----------------------------------------------------------------------===//
 634
 635 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 636   removeFromWorklist(N);
 637
 638   // If the operands of this node are only used by the node, they will now be
 639   // dead. Make sure to re-visit them and recursively delete dead nodes.
 640   for (const SDValue &Op : N->ops())
 641     // For an operand generating multiple values, one of the values may
 642     // become dead allowing further simplification (e.g. split index
 643     // arithmetic from an indexed load).
 644     if (Op->hasOneUse() || Op->getNumValues() > 1)
 645       AddToWorklist(Op.getNode());
 646
 647   DAG.DeleteNode(N);
 648 }
 649
 650 /// Return 1 if we can compute the negated form of the specified expression for
 651 /// the same cost as the expression itself, or 2 if we can compute the negated
 652 /// form more cheaply than the expression itself.
 653 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 654                                const TargetLowering &TLI,
 655                                const TargetOptions *Options,
 656                                unsigned Depth = 0) {
 657   // fneg is removable even if it has multiple uses.
 658   if (Op.getOpcode() == ISD::FNEG) return 2;
 659
 660   // Don't allow anything with multiple uses.
 661   if (!Op.hasOneUse()) return 0;
 662
 663   // Don't recurse exponentially.
 664   if (Depth > 6) return 0;
 665
 666   switch (Op.getOpcode()) {
 667   default: return false;
 668   case ISD::ConstantFP: {
 669     if (!LegalOperations)
 670       return 1;
 671
 672     // Don't invert constant FP values after legalization unless the target says
 673     // the negated constant is legal.
 674     EVT VT = Op.getValueType();
 675     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 676       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
 677   }
 678   case ISD::FADD:
 679     // FIXME: determine better conditions for this xform.
 680     if (!Options->UnsafeFPMath) return 0;
 681
 682     // After operation legalization, it might not be legal to create new FSUBs.
 683     if (LegalOperations &&
 684         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
 685       return 0;
 686
 687     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 688     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 689                                     Options, Depth + 1))
 690       return V;
 691     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 692     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 693                               Depth + 1);
 694   case ISD::FSUB:
 695     // We can't turn -(A-B) into B-A when we honor signed zeros.
 696     if (!Options->NoSignedZerosFPMath &&
 697         !Op.getNode()->getFlags().hasNoSignedZeros())
 698       return 0;
 699
 700     // fold (fneg (fsub A, B)) -> (fsub B, A)
 701     return 1;
 702
 703   case ISD::FMUL:
 704   case ISD::FDIV:
 705     if (Options->HonorSignDependentRoundingFPMath()) return 0;
 706
 707     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 708     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 709                                     Options, Depth + 1))
 710       return V;
 711
 712     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 713                               Depth + 1);
 714
 715   case ISD::FP_EXTEND:
 716   case ISD::FP_ROUND:
 717   case ISD::FSIN:
 718     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 719                               Depth + 1);
 720   }
 721 }
 722
 723 /// If isNegatibleForFree returns true, return the newly negated expression.
 724 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 725                                     bool LegalOperations, unsigned Depth = 0) {
 726   const TargetOptions &Options = DAG.getTarget().Options;
 727   // fneg is removable even if it has multiple uses.
 728   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
 729
 730   // Don't allow anything with multiple uses.
 731   assert(Op.hasOneUse() && "Unknown reuse!");
 732
 733   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 734
 735   const SDNodeFlags Flags = Op.getNode()->getFlags();
 736
 737   switch (Op.getOpcode()) {
 738   default: llvm_unreachable("Unknown code");
 739   case ISD::ConstantFP: {
 740     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 741     V.changeSign();
 742     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 743   }
 744   case ISD::FADD:
 745     // FIXME: determine better conditions for this xform.
 746     assert(Options.UnsafeFPMath);
 747
 748     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 749     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 750                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 751       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 752                          GetNegatedExpression(Op.getOperand(0), DAG,
 753                                               LegalOperations, Depth+1),
 754                          Op.getOperand(1), Flags);
 755     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 756     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 757                        GetNegatedExpression(Op.getOperand(1), DAG,
 758                                             LegalOperations, Depth+1),
 759                        Op.getOperand(0), Flags);
 760   case ISD::FSUB:
 761     // fold (fneg (fsub 0, B)) -> B
 762     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
 763       if (N0CFP->isZero())
 764         return Op.getOperand(1);
 765
 766     // fold (fneg (fsub A, B)) -> (fsub B, A)
 767     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 768                        Op.getOperand(1), Op.getOperand(0), Flags);
 769
 770   case ISD::FMUL:
 771   case ISD::FDIV:
 772     assert(!Options.HonorSignDependentRoundingFPMath());
 773
 774     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 775     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 776                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 777       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 778                          GetNegatedExpression(Op.getOperand(0), DAG,
 779                                               LegalOperations, Depth+1),
 780                          Op.getOperand(1), Flags);
 781
 782     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 783     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 784                        Op.getOperand(0),
 785                        GetNegatedExpression(Op.getOperand(1), DAG,
 786                                             LegalOperations, Depth+1), Flags);
 787
 788   case ISD::FP_EXTEND:
 789   case ISD::FSIN:
 790     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 791                        GetNegatedExpression(Op.getOperand(0), DAG,
 792                                             LegalOperations, Depth+1));
 793   case ISD::FP_ROUND:
 794       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 795                          GetNegatedExpression(Op.getOperand(0), DAG,
 796                                               LegalOperations, Depth+1),
 797                          Op.getOperand(1));
 798   }
 799 }
 800
 801 // APInts must be the same size for most operations, this helper
 802 // function zero extends the shorter of the pair so that they match.
 803 // We provide an Offset so that we can create bitwidths that won't overflow.
 804 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 805   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 806   LHS = LHS.zextOrSelf(Bits);
 807   RHS = RHS.zextOrSelf(Bits);
 808 }
 809
 810 // Return true if this node is a setcc, or is a select_cc
 811 // that selects between the target values used for true and false, making it
 812 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 813 // the appropriate nodes based on the type of node we are checking. This
 814 // simplifies life a bit for the callers.
 815 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 816                                     SDValue &CC) const {
 817   if (N.getOpcode() == ISD::SETCC) {
 818     LHS = N.getOperand(0);
 819     RHS = N.getOperand(1);
 820     CC  = N.getOperand(2);
 821     return true;
 822   }
 823
 824   if (N.getOpcode() != ISD::SELECT_CC ||
 825       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 826       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 827     return false;
 828
 829   if (TLI.getBooleanContents(N.getValueType()) ==
 830       TargetLowering::UndefinedBooleanContent)
 831     return false;
 832
 833   LHS = N.getOperand(0);
 834   RHS = N.getOperand(1);
 835   CC  = N.getOperand(4);
 836   return true;
 837 }
 838
 839 /// Return true if this is a SetCC-equivalent operation with only one use.
 840 /// If this is true, it allows the users to invert the operation for free when
 841 /// it is profitable to do so.
 842 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 843   SDValue N0, N1, N2;
 844   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 845     return true;
 846   return false;
 847 }
 848
 849 // \brief Returns the SDNode if it is a constant float BuildVector
 850 // or constant float.
 851 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 852   if (isa<ConstantFPSDNode>(N))
 853     return N.getNode();
 854   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 855     return N.getNode();
 856   return nullptr;
 857 }
 858
 859 // Determines if it is a constant integer or a build vector of constant
 860 // integers (and undefs).
 861 // Do not permit build vector implicit truncation.
 862 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 863   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 864     return !(Const->isOpaque() && NoOpaques);
 865   if (N.getOpcode() != ISD::BUILD_VECTOR)
 866     return false;
 867   unsigned BitWidth = N.getScalarValueSizeInBits();
 868   for (const SDValue &Op : N->op_values()) {
 869     if (Op.isUndef())
 870       continue;
 871     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 872     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 873         (Const->isOpaque() && NoOpaques))
 874       return false;
 875   }
 876   return true;
 877 }
 878
 879 // Determines if it is a constant null integer or a splatted vector of a
 880 // constant null integer (with no undefs).
 881 // Build vector implicit truncation is not an issue for null values.
 882 static bool isNullConstantOrNullSplatConstant(SDValue N) {
 883   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 884     return Splat->isNullValue();
 885   return false;
 886 }
 887
 888 // Determines if it is a constant integer of one or a splatted vector of a
 889 // constant integer of one (with no undefs).
 890 // Do not permit build vector implicit truncation.
 891 static bool isOneConstantOrOneSplatConstant(SDValue N) {
 892   unsigned BitWidth = N.getScalarValueSizeInBits();
 893   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 894     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
 895   return false;
 896 }
 897
 898 // Determines if it is a constant integer of all ones or a splatted vector of a
 899 // constant integer of all ones (with no undefs).
 900 // Do not permit build vector implicit truncation.
 901 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
 902   unsigned BitWidth = N.getScalarValueSizeInBits();
 903   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 904     return Splat->isAllOnesValue() &&
 905            Splat->getAPIntValue().getBitWidth() == BitWidth;
 906   return false;
 907 }
 908
 909 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 910 // undef's.
 911 static bool isAnyConstantBuildVector(const SDNode *N) {
 912   return ISD::isBuildVectorOfConstantSDNodes(N) ||
 913          ISD::isBuildVectorOfConstantFPSDNodes(N);
 914 }
 915
 916 // Attempt to match a unary predicate against a scalar/splat constant or
 917 // every element of a constant BUILD_VECTOR.
 918 static bool matchUnaryPredicate(SDValue Op,
 919                                 std::function<bool(ConstantSDNode *)> Match) {
 920   if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
 921     return Match(Cst);
 922
 923   if (ISD::BUILD_VECTOR != Op.getOpcode())
 924     return false;
 925
 926   EVT SVT = Op.getValueType().getScalarType();
 927   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 928     auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
 929     if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
 930       return false;
 931   }
 932   return true;
 933 }
 934
 935 // Attempt to match a binary predicate against a pair of scalar/splat constants
 936 // or every element of a pair of constant BUILD_VECTORs.
 937 static bool matchBinaryPredicate(
 938     SDValue LHS, SDValue RHS,
 939     std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
 940   if (LHS.getValueType() != RHS.getValueType())
 941     return false;
 942
 943   if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
 944     if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
 945       return Match(LHSCst, RHSCst);
 946
 947   if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
 948       ISD::BUILD_VECTOR != RHS.getOpcode())
 949     return false;
 950
 951   EVT SVT = LHS.getValueType().getScalarType();
 952   for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
 953     auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
 954     auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
 955     if (!LHSCst || !RHSCst)
 956       return false;
 957     if (LHSCst->getValueType(0) != SVT ||
 958         LHSCst->getValueType(0) != RHSCst->getValueType(0))
 959       return false;
 960     if (!Match(LHSCst, RHSCst))
 961       return false;
 962   }
 963   return true;
 964 }
 965
 966 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 967                                     SDValue N1) {
 968   EVT VT = N0.getValueType();
 969   if (N0.getOpcode() == Opc) {
 970     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 971       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 972         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
 973         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
 974           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 975         return SDValue();
 976       }
 977       if (N0.hasOneUse()) {
 978         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
 979         // use
 980         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 981         if (!OpNode.getNode())
 982           return SDValue();
 983         AddToWorklist(OpNode.getNode());
 984         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 985       }
 986     }
 987   }
 988
 989   if (N1.getOpcode() == Opc) {
 990     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
 991       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
 992         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
 993         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
 994           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
 995         return SDValue();
 996       }
 997       if (N1.hasOneUse()) {
 998         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
 999         // use
1000         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
1001         if (!OpNode.getNode())
1002           return SDValue();
1003         AddToWorklist(OpNode.getNode());
1004         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
1005       }
1006     }
1007   }
1008
1009   return SDValue();
1010 }
1011
1012 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1013                                bool AddTo) {
1014   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1015   ++NodesCombined;
1016   DEBUG(dbgs() << "\nReplacing.1 ";
1017         N->dump(&DAG);
1018         dbgs() << "\nWith: ";
1019         To[0].getNode()->dump(&DAG);
1020         dbgs() << " and " << NumTo-1 << " other values\n");
1021   for (unsigned i = 0, e = NumTo; i != e; ++i)
1022     assert((!To[i].getNode() ||
1023             N->getValueType(i) == To[i].getValueType()) &&
1024            "Cannot combine value to value of different type!");
1025
1026   WorklistRemover DeadNodes(*this);
1027   DAG.ReplaceAllUsesWith(N, To);
1028   if (AddTo) {
1029     // Push the new nodes and any users onto the worklist
1030     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1031       if (To[i].getNode()) {
1032         AddToWorklist(To[i].getNode());
1033         AddUsersToWorklist(To[i].getNode());
1034       }
1035     }
1036   }
1037
1038   // Finally, if the node is now dead, remove it from the graph.  The node
1039   // may not be dead if the replacement process recursively simplified to
1040   // something else needing this node.
1041   if (N->use_empty())
1042     deleteAndRecombine(N);
1043   return SDValue(N, 0);
1044 }
1045
1046 void DAGCombiner::
1047 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1048   // Replace all uses.  If any nodes become isomorphic to other nodes and
1049   // are deleted, make sure to remove them from our worklist.
1050   WorklistRemover DeadNodes(*this);
1051   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1052
1053   // Push the new node and any (possibly new) users onto the worklist.
1054   AddToWorklist(TLO.New.getNode());
1055   AddUsersToWorklist(TLO.New.getNode());
1056
1057   // Finally, if the node is now dead, remove it from the graph.  The node
1058   // may not be dead if the replacement process recursively simplified to
1059   // something else needing this node.
1060   if (TLO.Old.getNode()->use_empty())
1061     deleteAndRecombine(TLO.Old.getNode());
1062 }
1063
1064 /// Check the specified integer node value to see if it can be simplified or if
1065 /// things it uses can be simplified by bit propagation. If so, return true.
1066 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1067   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1068   KnownBits Known;
1069   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1070     return false;
1071
1072   // Revisit the node.
1073   AddToWorklist(Op.getNode());
1074
1075   // Replace the old value with the new one.
1076   ++NodesCombined;
1077   DEBUG(dbgs() << "\nReplacing.2 ";
1078         TLO.Old.getNode()->dump(&DAG);
1079         dbgs() << "\nWith: ";
1080         TLO.New.getNode()->dump(&DAG);
1081         dbgs() << '\n');
1082
1083   CommitTargetLoweringOpt(TLO);
1084   return true;
1085 }
1086
1087 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1088   SDLoc DL(Load);
1089   EVT VT = Load->getValueType(0);
1090   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1091
1092   DEBUG(dbgs() << "\nReplacing.9 ";
1093         Load->dump(&DAG);
1094         dbgs() << "\nWith: ";
1095         Trunc.getNode()->dump(&DAG);
1096         dbgs() << '\n');
1097   WorklistRemover DeadNodes(*this);
1098   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1099   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1100   deleteAndRecombine(Load);
1101   AddToWorklist(Trunc.getNode());
1102 }
1103
1104 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1105   Replace = false;
1106   SDLoc DL(Op);
1107   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1108     LoadSDNode *LD = cast<LoadSDNode>(Op);
1109     EVT MemVT = LD->getMemoryVT();
1110     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1111       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1112                                                        : ISD::EXTLOAD)
1113       : LD->getExtensionType();
1114     Replace = true;
1115     return DAG.getExtLoad(ExtType, DL, PVT,
1116                           LD->getChain(), LD->getBasePtr(),
1117                           MemVT, LD->getMemOperand());
1118   }
1119
1120   unsigned Opc = Op.getOpcode();
1121   switch (Opc) {
1122   default: break;
1123   case ISD::AssertSext:
1124     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1125       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1126     break;
1127   case ISD::AssertZext:
1128     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1129       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1130     break;
1131   case ISD::Constant: {
1132     unsigned ExtOpc =
1133       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1134     return DAG.getNode(ExtOpc, DL, PVT, Op);
1135   }
1136   }
1137
1138   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1139     return SDValue();
1140   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1141 }
1142
1143 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1144   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1145     return SDValue();
1146   EVT OldVT = Op.getValueType();
1147   SDLoc DL(Op);
1148   bool Replace = false;
1149   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1150   if (!NewOp.getNode())
1151     return SDValue();
1152   AddToWorklist(NewOp.getNode());
1153
1154   if (Replace)
1155     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1156   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1157                      DAG.getValueType(OldVT));
1158 }
1159
1160 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1161   EVT OldVT = Op.getValueType();
1162   SDLoc DL(Op);
1163   bool Replace = false;
1164   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1165   if (!NewOp.getNode())
1166     return SDValue();
1167   AddToWorklist(NewOp.getNode());
1168
1169   if (Replace)
1170     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1171   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1172 }
1173
1174 /// Promote the specified integer binary operation if the target indicates it is
1175 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1176 /// i32 since i16 instructions are longer.
1177 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1178   if (!LegalOperations)
1179     return SDValue();
1180
1181   EVT VT = Op.getValueType();
1182   if (VT.isVector() || !VT.isInteger())
1183     return SDValue();
1184
1185   // If operation type is 'undesirable', e.g. i16 on x86, consider
1186   // promoting it.
1187   unsigned Opc = Op.getOpcode();
1188   if (TLI.isTypeDesirableForOp(Opc, VT))
1189     return SDValue();
1190
1191   EVT PVT = VT;
1192   // Consult target whether it is a good idea to promote this operation and
1193   // what's the right type to promote it to.
1194   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1195     assert(PVT != VT && "Don't know what type to promote to!");
1196
1197     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1198
1199     bool Replace0 = false;
1200     SDValue N0 = Op.getOperand(0);
1201     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1202
1203     bool Replace1 = false;
1204     SDValue N1 = Op.getOperand(1);
1205     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1206     SDLoc DL(Op);
1207
1208     SDValue RV =
1209         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1210
1211     // We are always replacing N0/N1's use in N and only need
1212     // additional replacements if there are additional uses.
1213     Replace0 &= !N0->hasOneUse();
1214     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1215
1216     // Combine Op here so it is preserved past replacements.
1217     CombineTo(Op.getNode(), RV);
1218
1219     // If operands have a use ordering, make sure we deal with
1220     // predecessor first.
1221     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1222       std::swap(N0, N1);
1223       std::swap(NN0, NN1);
1224     }
1225
1226     if (Replace0) {
1227       AddToWorklist(NN0.getNode());
1228       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1229     }
1230     if (Replace1) {
1231       AddToWorklist(NN1.getNode());
1232       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1233     }
1234     return Op;
1235   }
1236   return SDValue();
1237 }
1238
1239 /// Promote the specified integer shift operation if the target indicates it is
1240 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1241 /// i32 since i16 instructions are longer.
1242 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1243   if (!LegalOperations)
1244     return SDValue();
1245
1246   EVT VT = Op.getValueType();
1247   if (VT.isVector() || !VT.isInteger())
1248     return SDValue();
1249
1250   // If operation type is 'undesirable', e.g. i16 on x86, consider
1251   // promoting it.
1252   unsigned Opc = Op.getOpcode();
1253   if (TLI.isTypeDesirableForOp(Opc, VT))
1254     return SDValue();
1255
1256   EVT PVT = VT;
1257   // Consult target whether it is a good idea to promote this operation and
1258   // what's the right type to promote it to.
1259   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1260     assert(PVT != VT && "Don't know what type to promote to!");
1261
1262     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1263
1264     bool Replace = false;
1265     SDValue N0 = Op.getOperand(0);
1266     SDValue N1 = Op.getOperand(1);
1267     if (Opc == ISD::SRA)
1268       N0 = SExtPromoteOperand(N0, PVT);
1269     else if (Opc == ISD::SRL)
1270       N0 = ZExtPromoteOperand(N0, PVT);
1271     else
1272       N0 = PromoteOperand(N0, PVT, Replace);
1273
1274     if (!N0.getNode())
1275       return SDValue();
1276
1277     SDLoc DL(Op);
1278     SDValue RV =
1279         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1280
1281     AddToWorklist(N0.getNode());
1282     if (Replace)
1283       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1284
1285     // Deal with Op being deleted.
1286     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1287       return RV;
1288   }
1289   return SDValue();
1290 }
1291
1292 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1293   if (!LegalOperations)
1294     return SDValue();
1295
1296   EVT VT = Op.getValueType();
1297   if (VT.isVector() || !VT.isInteger())
1298     return SDValue();
1299
1300   // If operation type is 'undesirable', e.g. i16 on x86, consider
1301   // promoting it.
1302   unsigned Opc = Op.getOpcode();
1303   if (TLI.isTypeDesirableForOp(Opc, VT))
1304     return SDValue();
1305
1306   EVT PVT = VT;
1307   // Consult target whether it is a good idea to promote this operation and
1308   // what's the right type to promote it to.
1309   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1310     assert(PVT != VT && "Don't know what type to promote to!");
1311     // fold (aext (aext x)) -> (aext x)
1312     // fold (aext (zext x)) -> (zext x)
1313     // fold (aext (sext x)) -> (sext x)
1314     DEBUG(dbgs() << "\nPromoting ";
1315           Op.getNode()->dump(&DAG));
1316     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1317   }
1318   return SDValue();
1319 }
1320
1321 bool DAGCombiner::PromoteLoad(SDValue Op) {
1322   if (!LegalOperations)
1323     return false;
1324
1325   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1326     return false;
1327
1328   EVT VT = Op.getValueType();
1329   if (VT.isVector() || !VT.isInteger())
1330     return false;
1331
1332   // If operation type is 'undesirable', e.g. i16 on x86, consider
1333   // promoting it.
1334   unsigned Opc = Op.getOpcode();
1335   if (TLI.isTypeDesirableForOp(Opc, VT))
1336     return false;
1337
1338   EVT PVT = VT;
1339   // Consult target whether it is a good idea to promote this operation and
1340   // what's the right type to promote it to.
1341   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1342     assert(PVT != VT && "Don't know what type to promote to!");
1343
1344     SDLoc DL(Op);
1345     SDNode *N = Op.getNode();
1346     LoadSDNode *LD = cast<LoadSDNode>(N);
1347     EVT MemVT = LD->getMemoryVT();
1348     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1349       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1350                                                        : ISD::EXTLOAD)
1351       : LD->getExtensionType();
1352     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1353                                    LD->getChain(), LD->getBasePtr(),
1354                                    MemVT, LD->getMemOperand());
1355     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1356
1357     DEBUG(dbgs() << "\nPromoting ";
1358           N->dump(&DAG);
1359           dbgs() << "\nTo: ";
1360           Result.getNode()->dump(&DAG);
1361           dbgs() << '\n');
1362     WorklistRemover DeadNodes(*this);
1363     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1364     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1365     deleteAndRecombine(N);
1366     AddToWorklist(Result.getNode());
1367     return true;
1368   }
1369   return false;
1370 }
1371
1372 /// \brief Recursively delete a node which has no uses and any operands for
1373 /// which it is the only use.
1374 ///
1375 /// Note that this both deletes the nodes and removes them from the worklist.
1376 /// It also adds any nodes who have had a user deleted to the worklist as they
1377 /// may now have only one use and subject to other combines.
1378 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1379   if (!N->use_empty())
1380     return false;
1381
1382   SmallSetVector<SDNode *, 16> Nodes;
1383   Nodes.insert(N);
1384   do {
1385     N = Nodes.pop_back_val();
1386     if (!N)
1387       continue;
1388
1389     if (N->use_empty()) {
1390       for (const SDValue &ChildN : N->op_values())
1391         Nodes.insert(ChildN.getNode());
1392
1393       removeFromWorklist(N);
1394       DAG.DeleteNode(N);
1395     } else {
1396       AddToWorklist(N);
1397     }
1398   } while (!Nodes.empty());
1399   return true;
1400 }
1401
1402 //===----------------------------------------------------------------------===//
1403 //  Main DAG Combiner implementation
1404 //===----------------------------------------------------------------------===//
1405
1406 void DAGCombiner::Run(CombineLevel AtLevel) {
1407   // set the instance variables, so that the various visit routines may use it.
1408   Level = AtLevel;
1409   LegalOperations = Level >= AfterLegalizeVectorOps;
1410   LegalTypes = Level >= AfterLegalizeTypes;
1411
1412   // Add all the dag nodes to the worklist.
1413   for (SDNode &Node : DAG.allnodes())
1414     AddToWorklist(&Node);
1415
1416   // Create a dummy node (which is not added to allnodes), that adds a reference
1417   // to the root node, preventing it from being deleted, and tracking any
1418   // changes of the root.
1419   HandleSDNode Dummy(DAG.getRoot());
1420
1421   // While the worklist isn't empty, find a node and try to combine it.
1422   while (!WorklistMap.empty()) {
1423     SDNode *N;
1424     // The Worklist holds the SDNodes in order, but it may contain null entries.
1425     do {
1426       N = Worklist.pop_back_val();
1427     } while (!N);
1428
1429     bool GoodWorklistEntry = WorklistMap.erase(N);
1430     (void)GoodWorklistEntry;
1431     assert(GoodWorklistEntry &&
1432            "Found a worklist entry without a corresponding map entry!");
1433
1434     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1435     // N is deleted from the DAG, since they too may now be dead or may have a
1436     // reduced number of uses, allowing other xforms.
1437     if (recursivelyDeleteUnusedNodes(N))
1438       continue;
1439
1440     WorklistRemover DeadNodes(*this);
1441
1442     // If this combine is running after legalizing the DAG, re-legalize any
1443     // nodes pulled off the worklist.
1444     if (Level == AfterLegalizeDAG) {
1445       SmallSetVector<SDNode *, 16> UpdatedNodes;
1446       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1447
1448       for (SDNode *LN : UpdatedNodes) {
1449         AddToWorklist(LN);
1450         AddUsersToWorklist(LN);
1451       }
1452       if (!NIsValid)
1453         continue;
1454     }
1455
1456     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1457
1458     // Add any operands of the new node which have not yet been combined to the
1459     // worklist as well. Because the worklist uniques things already, this
1460     // won't repeatedly process the same operand.
1461     CombinedNodes.insert(N);
1462     for (const SDValue &ChildN : N->op_values())
1463       if (!CombinedNodes.count(ChildN.getNode()))
1464         AddToWorklist(ChildN.getNode());
1465
1466     SDValue RV = combine(N);
1467
1468     if (!RV.getNode())
1469       continue;
1470
1471     ++NodesCombined;
1472
1473     // If we get back the same node we passed in, rather than a new node or
1474     // zero, we know that the node must have defined multiple values and
1475     // CombineTo was used.  Since CombineTo takes care of the worklist
1476     // mechanics for us, we have no work to do in this case.
1477     if (RV.getNode() == N)
1478       continue;
1479
1480     assert(N->getOpcode() != ISD::DELETED_NODE &&
1481            RV.getOpcode() != ISD::DELETED_NODE &&
1482            "Node was deleted but visit returned new node!");
1483
1484     DEBUG(dbgs() << " ... into: ";
1485           RV.getNode()->dump(&DAG));
1486
1487     if (N->getNumValues() == RV.getNode()->getNumValues())
1488       DAG.ReplaceAllUsesWith(N, RV.getNode());
1489     else {
1490       assert(N->getValueType(0) == RV.getValueType() &&
1491              N->getNumValues() == 1 && "Type mismatch");
1492       DAG.ReplaceAllUsesWith(N, &RV);
1493     }
1494
1495     // Push the new node and any users onto the worklist
1496     AddToWorklist(RV.getNode());
1497     AddUsersToWorklist(RV.getNode());
1498
1499     // Finally, if the node is now dead, remove it from the graph.  The node
1500     // may not be dead if the replacement process recursively simplified to
1501     // something else needing this node. This will also take care of adding any
1502     // operands which have lost a user to the worklist.
1503     recursivelyDeleteUnusedNodes(N);
1504   }
1505
1506   // If the root changed (e.g. it was a dead load, update the root).
1507   DAG.setRoot(Dummy.getValue());
1508   DAG.RemoveDeadNodes();
1509 }
1510
1511 SDValue DAGCombiner::visit(SDNode *N) {
1512   switch (N->getOpcode()) {
1513   default: break;
1514   case ISD::TokenFactor:        return visitTokenFactor(N);
1515   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1516   case ISD::ADD:                return visitADD(N);
1517   case ISD::SUB:                return visitSUB(N);
1518   case ISD::ADDC:               return visitADDC(N);
1519   case ISD::UADDO:              return visitUADDO(N);
1520   case ISD::SUBC:               return visitSUBC(N);
1521   case ISD::USUBO:              return visitUSUBO(N);
1522   case ISD::ADDE:               return visitADDE(N);
1523   case ISD::ADDCARRY:           return visitADDCARRY(N);
1524   case ISD::SUBE:               return visitSUBE(N);
1525   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1526   case ISD::MUL:                return visitMUL(N);
1527   case ISD::SDIV:               return visitSDIV(N);
1528   case ISD::UDIV:               return visitUDIV(N);
1529   case ISD::SREM:
1530   case ISD::UREM:               return visitREM(N);
1531   case ISD::MULHU:              return visitMULHU(N);
1532   case ISD::MULHS:              return visitMULHS(N);
1533   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1534   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1535   case ISD::SMULO:              return visitSMULO(N);
1536   case ISD::UMULO:              return visitUMULO(N);
1537   case ISD::SMIN:
1538   case ISD::SMAX:
1539   case ISD::UMIN:
1540   case ISD::UMAX:               return visitIMINMAX(N);
1541   case ISD::AND:                return visitAND(N);
1542   case ISD::OR:                 return visitOR(N);
1543   case ISD::XOR:                return visitXOR(N);
1544   case ISD::SHL:                return visitSHL(N);
1545   case ISD::SRA:                return visitSRA(N);
1546   case ISD::SRL:                return visitSRL(N);
1547   case ISD::ROTR:
1548   case ISD::ROTL:               return visitRotate(N);
1549   case ISD::ABS:                return visitABS(N);
1550   case ISD::BSWAP:              return visitBSWAP(N);
1551   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1552   case ISD::CTLZ:               return visitCTLZ(N);
1553   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1554   case ISD::CTTZ:               return visitCTTZ(N);
1555   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1556   case ISD::CTPOP:              return visitCTPOP(N);
1557   case ISD::SELECT:             return visitSELECT(N);
1558   case ISD::VSELECT:            return visitVSELECT(N);
1559   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1560   case ISD::SETCC:              return visitSETCC(N);
1561   case ISD::SETCCE:             return visitSETCCE(N);
1562   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1563   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1564   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1565   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1566   case ISD::AssertSext:
1567   case ISD::AssertZext:         return visitAssertExt(N);
1568   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1569   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1570   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1571   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1572   case ISD::BITCAST:            return visitBITCAST(N);
1573   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1574   case ISD::FADD:               return visitFADD(N);
1575   case ISD::FSUB:               return visitFSUB(N);
1576   case ISD::FMUL:               return visitFMUL(N);
1577   case ISD::FMA:                return visitFMA(N);
1578   case ISD::FDIV:               return visitFDIV(N);
1579   case ISD::FREM:               return visitFREM(N);
1580   case ISD::FSQRT:              return visitFSQRT(N);
1581   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1582   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1583   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1584   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1585   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1586   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1587   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1588   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1589   case ISD::FNEG:               return visitFNEG(N);
1590   case ISD::FABS:               return visitFABS(N);
1591   case ISD::FFLOOR:             return visitFFLOOR(N);
1592   case ISD::FMINNUM:            return visitFMINNUM(N);
1593   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1594   case ISD::FCEIL:              return visitFCEIL(N);
1595   case ISD::FTRUNC:             return visitFTRUNC(N);
1596   case ISD::BRCOND:             return visitBRCOND(N);
1597   case ISD::BR_CC:              return visitBR_CC(N);
1598   case ISD::LOAD:               return visitLOAD(N);
1599   case ISD::STORE:              return visitSTORE(N);
1600   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1601   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1602   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1603   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1604   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1605   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1606   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1607   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1608   case ISD::MGATHER:            return visitMGATHER(N);
1609   case ISD::MLOAD:              return visitMLOAD(N);
1610   case ISD::MSCATTER:           return visitMSCATTER(N);
1611   case ISD::MSTORE:             return visitMSTORE(N);
1612   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1613   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1614   }
1615   return SDValue();
1616 }
1617
1618 SDValue DAGCombiner::combine(SDNode *N) {
1619   SDValue RV = visit(N);
1620
1621   // If nothing happened, try a target-specific DAG combine.
1622   if (!RV.getNode()) {
1623     assert(N->getOpcode() != ISD::DELETED_NODE &&
1624            "Node was deleted but visit returned NULL!");
1625
1626     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1627         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1628
1629       // Expose the DAG combiner to the target combiner impls.
1630       TargetLowering::DAGCombinerInfo
1631         DagCombineInfo(DAG, Level, false, this);
1632
1633       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1634     }
1635   }
1636
1637   // If nothing happened still, try promoting the operation.
1638   if (!RV.getNode()) {
1639     switch (N->getOpcode()) {
1640     default: break;
1641     case ISD::ADD:
1642     case ISD::SUB:
1643     case ISD::MUL:
1644     case ISD::AND:
1645     case ISD::OR:
1646     case ISD::XOR:
1647       RV = PromoteIntBinOp(SDValue(N, 0));
1648       break;
1649     case ISD::SHL:
1650     case ISD::SRA:
1651     case ISD::SRL:
1652       RV = PromoteIntShiftOp(SDValue(N, 0));
1653       break;
1654     case ISD::SIGN_EXTEND:
1655     case ISD::ZERO_EXTEND:
1656     case ISD::ANY_EXTEND:
1657       RV = PromoteExtend(SDValue(N, 0));
1658       break;
1659     case ISD::LOAD:
1660       if (PromoteLoad(SDValue(N, 0)))
1661         RV = SDValue(N, 0);
1662       break;
1663     }
1664   }
1665
1666   // If N is a commutative binary node, try eliminate it if the commuted
1667   // version is already present in the DAG.
1668   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1669       N->getNumValues() == 1) {
1670     SDValue N0 = N->getOperand(0);
1671     SDValue N1 = N->getOperand(1);
1672
1673     // Constant operands are canonicalized to RHS.
1674     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1675       SDValue Ops[] = {N1, N0};
1676       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1677                                             N->getFlags());
1678       if (CSENode)
1679         return SDValue(CSENode, 0);
1680     }
1681   }
1682
1683   return RV;
1684 }
1685
1686 /// Given a node, return its input chain if it has one, otherwise return a null
1687 /// sd operand.
1688 static SDValue getInputChainForNode(SDNode *N) {
1689   if (unsigned NumOps = N->getNumOperands()) {
1690     if (N->getOperand(0).getValueType() == MVT::Other)
1691       return N->getOperand(0);
1692     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1693       return N->getOperand(NumOps-1);
1694     for (unsigned i = 1; i < NumOps-1; ++i)
1695       if (N->getOperand(i).getValueType() == MVT::Other)
1696         return N->getOperand(i);
1697   }
1698   return SDValue();
1699 }
1700
1701 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1702   // If N has two operands, where one has an input chain equal to the other,
1703   // the 'other' chain is redundant.
1704   if (N->getNumOperands() == 2) {
1705     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1706       return N->getOperand(0);
1707     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1708       return N->getOperand(1);
1709   }
1710
1711   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1712   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1713   SmallPtrSet<SDNode*, 16> SeenOps;
1714   bool Changed = false;             // If we should replace this token factor.
1715
1716   // Start out with this token factor.
1717   TFs.push_back(N);
1718
1719   // Iterate through token factors.  The TFs grows when new token factors are
1720   // encountered.
1721   for (unsigned i = 0; i < TFs.size(); ++i) {
1722     SDNode *TF = TFs[i];
1723
1724     // Check each of the operands.
1725     for (const SDValue &Op : TF->op_values()) {
1726       switch (Op.getOpcode()) {
1727       case ISD::EntryToken:
1728         // Entry tokens don't need to be added to the list. They are
1729         // redundant.
1730         Changed = true;
1731         break;
1732
1733       case ISD::TokenFactor:
1734         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1735           // Queue up for processing.
1736           TFs.push_back(Op.getNode());
1737           // Clean up in case the token factor is removed.
1738           AddToWorklist(Op.getNode());
1739           Changed = true;
1740           break;
1741         }
1742         LLVM_FALLTHROUGH;
1743
1744       default:
1745         // Only add if it isn't already in the list.
1746         if (SeenOps.insert(Op.getNode()).second)
1747           Ops.push_back(Op);
1748         else
1749           Changed = true;
1750         break;
1751       }
1752     }
1753   }
1754
1755   // Remove Nodes that are chained to another node in the list. Do so
1756   // by walking up chains breath-first stopping when we've seen
1757   // another operand. In general we must climb to the EntryNode, but we can exit
1758   // early if we find all remaining work is associated with just one operand as
1759   // no further pruning is possible.
1760
1761   // List of nodes to search through and original Ops from which they originate.
1762   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1763   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1764   SmallPtrSet<SDNode *, 16> SeenChains;
1765   bool DidPruneOps = false;
1766
1767   unsigned NumLeftToConsider = 0;
1768   for (const SDValue &Op : Ops) {
1769     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1770     OpWorkCount.push_back(1);
1771   }
1772
1773   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1774     // If this is an Op, we can remove the op from the list. Remark any
1775     // search associated with it as from the current OpNumber.
1776     if (SeenOps.count(Op) != 0) {
1777       Changed = true;
1778       DidPruneOps = true;
1779       unsigned OrigOpNumber = 0;
1780       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1781         OrigOpNumber++;
1782       assert((OrigOpNumber != Ops.size()) &&
1783              "expected to find TokenFactor Operand");
1784       // Re-mark worklist from OrigOpNumber to OpNumber
1785       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1786         if (Worklist[i].second == OrigOpNumber) {
1787           Worklist[i].second = OpNumber;
1788         }
1789       }
1790       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1791       OpWorkCount[OrigOpNumber] = 0;
1792       NumLeftToConsider--;
1793     }
1794     // Add if it's a new chain
1795     if (SeenChains.insert(Op).second) {
1796       OpWorkCount[OpNumber]++;
1797       Worklist.push_back(std::make_pair(Op, OpNumber));
1798     }
1799   };
1800
1801   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1802     // We need at least be consider at least 2 Ops to prune.
1803     if (NumLeftToConsider <= 1)
1804       break;
1805     auto CurNode = Worklist[i].first;
1806     auto CurOpNumber = Worklist[i].second;
1807     assert((OpWorkCount[CurOpNumber] > 0) &&
1808            "Node should not appear in worklist");
1809     switch (CurNode->getOpcode()) {
1810     case ISD::EntryToken:
1811       // Hitting EntryToken is the only way for the search to terminate without
1812       // hitting
1813       // another operand's search. Prevent us from marking this operand
1814       // considered.
1815       NumLeftToConsider++;
1816       break;
1817     case ISD::TokenFactor:
1818       for (const SDValue &Op : CurNode->op_values())
1819         AddToWorklist(i, Op.getNode(), CurOpNumber);
1820       break;
1821     case ISD::CopyFromReg:
1822     case ISD::CopyToReg:
1823       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1824       break;
1825     default:
1826       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1827         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1828       break;
1829     }
1830     OpWorkCount[CurOpNumber]--;
1831     if (OpWorkCount[CurOpNumber] == 0)
1832       NumLeftToConsider--;
1833   }
1834
1835   // If we've changed things around then replace token factor.
1836   if (Changed) {
1837     SDValue Result;
1838     if (Ops.empty()) {
1839       // The entry token is the only possible outcome.
1840       Result = DAG.getEntryNode();
1841     } else {
1842       if (DidPruneOps) {
1843         SmallVector<SDValue, 8> PrunedOps;
1844         //
1845         for (const SDValue &Op : Ops) {
1846           if (SeenChains.count(Op.getNode()) == 0)
1847             PrunedOps.push_back(Op);
1848         }
1849         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1850       } else {
1851         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1852       }
1853     }
1854     return Result;
1855   }
1856   return SDValue();
1857 }
1858
1859 /// MERGE_VALUES can always be eliminated.
1860 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1861   WorklistRemover DeadNodes(*this);
1862   // Replacing results may cause a different MERGE_VALUES to suddenly
1863   // be CSE'd with N, and carry its uses with it. Iterate until no
1864   // uses remain, to ensure that the node can be safely deleted.
1865   // First add the users of this node to the work list so that they
1866   // can be tried again once they have new operands.
1867   AddUsersToWorklist(N);
1868   do {
1869     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1870       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1871   } while (!N->use_empty());
1872   deleteAndRecombine(N);
1873   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1874 }
1875
1876 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1877 /// ConstantSDNode pointer else nullptr.
1878 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1879   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1880   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1881 }
1882
1883 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1884   auto BinOpcode = BO->getOpcode();
1885   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1886           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1887           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1888           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1889           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1890           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1891           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1892           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1893           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1894          "Unexpected binary operator");
1895
1896   // Bail out if any constants are opaque because we can't constant fold those.
1897   SDValue C1 = BO->getOperand(1);
1898   if (!isConstantOrConstantVector(C1, true) &&
1899       !isConstantFPBuildVectorOrConstantFP(C1))
1900     return SDValue();
1901
1902   // Don't do this unless the old select is going away. We want to eliminate the
1903   // binary operator, not replace a binop with a select.
1904   // TODO: Handle ISD::SELECT_CC.
1905   SDValue Sel = BO->getOperand(0);
1906   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1907     return SDValue();
1908
1909   SDValue CT = Sel.getOperand(1);
1910   if (!isConstantOrConstantVector(CT, true) &&
1911       !isConstantFPBuildVectorOrConstantFP(CT))
1912     return SDValue();
1913
1914   SDValue CF = Sel.getOperand(2);
1915   if (!isConstantOrConstantVector(CF, true) &&
1916       !isConstantFPBuildVectorOrConstantFP(CF))
1917     return SDValue();
1918
1919   // We have a select-of-constants followed by a binary operator with a
1920   // constant. Eliminate the binop by pulling the constant math into the select.
1921   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1922   EVT VT = Sel.getValueType();
1923   SDLoc DL(Sel);
1924   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1925   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1926           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1927          "Failed to constant fold a binop with constant operands");
1928
1929   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1930   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1931           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1932          "Failed to constant fold a binop with constant operands");
1933
1934   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1935 }
1936
1937 SDValue DAGCombiner::visitADD(SDNode *N) {
1938   SDValue N0 = N->getOperand(0);
1939   SDValue N1 = N->getOperand(1);
1940   EVT VT = N0.getValueType();
1941   SDLoc DL(N);
1942
1943   // fold vector ops
1944   if (VT.isVector()) {
1945     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1946       return FoldedVOp;
1947
1948     // fold (add x, 0) -> x, vector edition
1949     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1950       return N0;
1951     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1952       return N1;
1953   }
1954
1955   // fold (add x, undef) -> undef
1956   if (N0.isUndef())
1957     return N0;
1958
1959   if (N1.isUndef())
1960     return N1;
1961
1962   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1963     // canonicalize constant to RHS
1964     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1965       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1966     // fold (add c1, c2) -> c1+c2
1967     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1968                                       N1.getNode());
1969   }
1970
1971   // fold (add x, 0) -> x
1972   if (isNullConstant(N1))
1973     return N0;
1974
1975   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1976     // fold ((c1-A)+c2) -> (c1+c2)-A
1977     if (N0.getOpcode() == ISD::SUB &&
1978         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1979       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1980       return DAG.getNode(ISD::SUB, DL, VT,
1981                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1982                          N0.getOperand(1));
1983     }
1984
1985     // add (sext i1 X), 1 -> zext (not i1 X)
1986     // We don't transform this pattern:
1987     //   add (zext i1 X), -1 -> sext (not i1 X)
1988     // because most (?) targets generate better code for the zext form.
1989     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1990         isOneConstantOrOneSplatConstant(N1)) {
1991       SDValue X = N0.getOperand(0);
1992       if ((!LegalOperations ||
1993            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1994             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1995           X.getScalarValueSizeInBits() == 1) {
1996         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1997         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1998       }
1999     }
2000
2001     // Undo the add -> or combine to merge constant offsets from a frame index.
2002     if (N0.getOpcode() == ISD::OR &&
2003         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2004         isa<ConstantSDNode>(N0.getOperand(1)) &&
2005         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2006       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2007       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2008     }
2009   }
2010
2011   if (SDValue NewSel = foldBinOpIntoSelect(N))
2012     return NewSel;
2013
2014   // reassociate add
2015   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
2016     return RADD;
2017
2018   // fold ((0-A) + B) -> B-A
2019   if (N0.getOpcode() == ISD::SUB &&
2020       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2021     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2022
2023   // fold (A + (0-B)) -> A-B
2024   if (N1.getOpcode() == ISD::SUB &&
2025       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2026     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2027
2028   // fold (A+(B-A)) -> B
2029   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2030     return N1.getOperand(0);
2031
2032   // fold ((B-A)+A) -> B
2033   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2034     return N0.getOperand(0);
2035
2036   // fold (A+(B-(A+C))) to (B-C)
2037   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2038       N0 == N1.getOperand(1).getOperand(0))
2039     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2040                        N1.getOperand(1).getOperand(1));
2041
2042   // fold (A+(B-(C+A))) to (B-C)
2043   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2044       N0 == N1.getOperand(1).getOperand(1))
2045     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2046                        N1.getOperand(1).getOperand(0));
2047
2048   // fold (A+((B-A)+or-C)) to (B+or-C)
2049   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2050       N1.getOperand(0).getOpcode() == ISD::SUB &&
2051       N0 == N1.getOperand(0).getOperand(1))
2052     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2053                        N1.getOperand(1));
2054
2055   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2056   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2057     SDValue N00 = N0.getOperand(0);
2058     SDValue N01 = N0.getOperand(1);
2059     SDValue N10 = N1.getOperand(0);
2060     SDValue N11 = N1.getOperand(1);
2061
2062     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2063       return DAG.getNode(ISD::SUB, DL, VT,
2064                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2065                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2066   }
2067
2068   if (SimplifyDemandedBits(SDValue(N, 0)))
2069     return SDValue(N, 0);
2070
2071   // fold (a+b) -> (a|b) iff a and b share no bits.
2072   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2073       DAG.haveNoCommonBitsSet(N0, N1))
2074     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2075
2076   if (SDValue Combined = visitADDLike(N0, N1, N))
2077     return Combined;
2078
2079   if (SDValue Combined = visitADDLike(N1, N0, N))
2080     return Combined;
2081
2082   return SDValue();
2083 }
2084
2085 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2086   bool Masked = false;
2087
2088   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2089   while (true) {
2090     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2091       V = V.getOperand(0);
2092       continue;
2093     }
2094
2095     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2096       Masked = true;
2097       V = V.getOperand(0);
2098       continue;
2099     }
2100
2101     break;
2102   }
2103
2104   // If this is not a carry, return.
2105   if (V.getResNo() != 1)
2106     return SDValue();
2107
2108   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2109       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2110     return SDValue();
2111
2112   // If the result is masked, then no matter what kind of bool it is we can
2113   // return. If it isn't, then we need to make sure the bool type is either 0 or
2114   // 1 and not other values.
2115   if (Masked ||
2116       TLI.getBooleanContents(V.getValueType()) ==
2117           TargetLoweringBase::ZeroOrOneBooleanContent)
2118     return V;
2119
2120   return SDValue();
2121 }
2122
2123 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2124   EVT VT = N0.getValueType();
2125   SDLoc DL(LocReference);
2126
2127   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2128   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2129       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2130     return DAG.getNode(ISD::SUB, DL, VT, N0,
2131                        DAG.getNode(ISD::SHL, DL, VT,
2132                                    N1.getOperand(0).getOperand(1),
2133                                    N1.getOperand(1)));
2134
2135   if (N1.getOpcode() == ISD::AND) {
2136     SDValue AndOp0 = N1.getOperand(0);
2137     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2138     unsigned DestBits = VT.getScalarSizeInBits();
2139
2140     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2141     // and similar xforms where the inner op is either ~0 or 0.
2142     if (NumSignBits == DestBits &&
2143         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2144       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2145   }
2146
2147   // add (sext i1), X -> sub X, (zext i1)
2148   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2149       N0.getOperand(0).getValueType() == MVT::i1 &&
2150       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2151     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2152     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2153   }
2154
2155   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2156   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2157     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2158     if (TN->getVT() == MVT::i1) {
2159       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2160                                  DAG.getConstant(1, DL, VT));
2161       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2162     }
2163   }
2164
2165   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2166   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2167       N1.getResNo() == 0)
2168     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2169                        N0, N1.getOperand(0), N1.getOperand(2));
2170
2171   // (add X, Carry) -> (addcarry X, 0, Carry)
2172   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2173     if (SDValue Carry = getAsCarry(TLI, N1))
2174       return DAG.getNode(ISD::ADDCARRY, DL,
2175                          DAG.getVTList(VT, Carry.getValueType()), N0,
2176                          DAG.getConstant(0, DL, VT), Carry);
2177
2178   return SDValue();
2179 }
2180
2181 SDValue DAGCombiner::visitADDC(SDNode *N) {
2182   SDValue N0 = N->getOperand(0);
2183   SDValue N1 = N->getOperand(1);
2184   EVT VT = N0.getValueType();
2185   SDLoc DL(N);
2186
2187   // If the flag result is dead, turn this into an ADD.
2188   if (!N->hasAnyUseOfValue(1))
2189     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2190                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2191
2192   // canonicalize constant to RHS.
2193   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2194   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2195   if (N0C && !N1C)
2196     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2197
2198   // fold (addc x, 0) -> x + no carry out
2199   if (isNullConstant(N1))
2200     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2201                                         DL, MVT::Glue));
2202
2203   // If it cannot overflow, transform into an add.
2204   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2205     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2206                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2207
2208   return SDValue();
2209 }
2210
2211 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2212   SDValue N0 = N->getOperand(0);
2213   SDValue N1 = N->getOperand(1);
2214   EVT VT = N0.getValueType();
2215   if (VT.isVector())
2216     return SDValue();
2217
2218   EVT CarryVT = N->getValueType(1);
2219   SDLoc DL(N);
2220
2221   // If the flag result is dead, turn this into an ADD.
2222   if (!N->hasAnyUseOfValue(1))
2223     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2224                      DAG.getUNDEF(CarryVT));
2225
2226   // canonicalize constant to RHS.
2227   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2228   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2229   if (N0C && !N1C)
2230     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2231
2232   // fold (uaddo x, 0) -> x + no carry out
2233   if (isNullConstant(N1))
2234     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2235
2236   // If it cannot overflow, transform into an add.
2237   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2238     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2239                      DAG.getConstant(0, DL, CarryVT));
2240
2241   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2242     return Combined;
2243
2244   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2245     return Combined;
2246
2247   return SDValue();
2248 }
2249
2250 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2251   auto VT = N0.getValueType();
2252
2253   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2254   // If Y + 1 cannot overflow.
2255   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2256     SDValue Y = N1.getOperand(0);
2257     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2258     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2259       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2260                          N1.getOperand(2));
2261   }
2262
2263   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2264   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2265     if (SDValue Carry = getAsCarry(TLI, N1))
2266       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2267                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2268
2269   return SDValue();
2270 }
2271
2272 SDValue DAGCombiner::visitADDE(SDNode *N) {
2273   SDValue N0 = N->getOperand(0);
2274   SDValue N1 = N->getOperand(1);
2275   SDValue CarryIn = N->getOperand(2);
2276
2277   // canonicalize constant to RHS
2278   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2279   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2280   if (N0C && !N1C)
2281     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2282                        N1, N0, CarryIn);
2283
2284   // fold (adde x, y, false) -> (addc x, y)
2285   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2286     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2287
2288   return SDValue();
2289 }
2290
2291 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2292   SDValue N0 = N->getOperand(0);
2293   SDValue N1 = N->getOperand(1);
2294   SDValue CarryIn = N->getOperand(2);
2295   SDLoc DL(N);
2296
2297   // canonicalize constant to RHS
2298   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2299   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2300   if (N0C && !N1C)
2301     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2302
2303   // fold (addcarry x, y, false) -> (uaddo x, y)
2304   if (isNullConstant(CarryIn))
2305     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2306
2307   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2308   if (isNullConstant(N0) && isNullConstant(N1)) {
2309     EVT VT = N0.getValueType();
2310     EVT CarryVT = CarryIn.getValueType();
2311     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2312     AddToWorklist(CarryExt.getNode());
2313     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2314                                     DAG.getConstant(1, DL, VT)),
2315                      DAG.getConstant(0, DL, CarryVT));
2316   }
2317
2318   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2319     return Combined;
2320
2321   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2322     return Combined;
2323
2324   return SDValue();
2325 }
2326
2327 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2328                                        SDNode *N) {
2329   // Iff the flag result is dead:
2330   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2331   if ((N0.getOpcode() == ISD::ADD ||
2332        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2333       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2334     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2335                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2336
2337   /**
2338    * When one of the addcarry argument is itself a carry, we may be facing
2339    * a diamond carry propagation. In which case we try to transform the DAG
2340    * to ensure linear carry propagation if that is possible.
2341    *
2342    * We are trying to get:
2343    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2344    */
2345   if (auto Y = getAsCarry(TLI, N1)) {
2346     /**
2347      *            (uaddo A, B)
2348      *             /       \
2349      *          Carry      Sum
2350      *            |          \
2351      *            | (addcarry *, 0, Z)
2352      *            |       /
2353      *             \   Carry
2354      *              |   /
2355      * (addcarry X, *, *)
2356      */
2357     if (Y.getOpcode() == ISD::UADDO &&
2358         CarryIn.getResNo() == 1 &&
2359         CarryIn.getOpcode() == ISD::ADDCARRY &&
2360         isNullConstant(CarryIn.getOperand(1)) &&
2361         CarryIn.getOperand(0) == Y.getValue(0)) {
2362       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2363                               Y.getOperand(0), Y.getOperand(1),
2364                               CarryIn.getOperand(2));
2365       AddToWorklist(NewY.getNode());
2366       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2367                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2368                          NewY.getValue(1));
2369     }
2370   }
2371
2372   return SDValue();
2373 }
2374
2375 // Since it may not be valid to emit a fold to zero for vector initializers
2376 // check if we can before folding.
2377 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2378                              SelectionDAG &DAG, bool LegalOperations,
2379                              bool LegalTypes) {
2380   if (!VT.isVector())
2381     return DAG.getConstant(0, DL, VT);
2382   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2383     return DAG.getConstant(0, DL, VT);
2384   return SDValue();
2385 }
2386
2387 SDValue DAGCombiner::visitSUB(SDNode *N) {
2388   SDValue N0 = N->getOperand(0);
2389   SDValue N1 = N->getOperand(1);
2390   EVT VT = N0.getValueType();
2391   SDLoc DL(N);
2392
2393   // fold vector ops
2394   if (VT.isVector()) {
2395     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2396       return FoldedVOp;
2397
2398     // fold (sub x, 0) -> x, vector edition
2399     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2400       return N0;
2401   }
2402
2403   // fold (sub x, x) -> 0
2404   // FIXME: Refactor this and xor and other similar operations together.
2405   if (N0 == N1)
2406     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2407   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2408       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2409     // fold (sub c1, c2) -> c1-c2
2410     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2411                                       N1.getNode());
2412   }
2413
2414   if (SDValue NewSel = foldBinOpIntoSelect(N))
2415     return NewSel;
2416
2417   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2418
2419   // fold (sub x, c) -> (add x, -c)
2420   if (N1C) {
2421     return DAG.getNode(ISD::ADD, DL, VT, N0,
2422                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2423   }
2424
2425   if (isNullConstantOrNullSplatConstant(N0)) {
2426     unsigned BitWidth = VT.getScalarSizeInBits();
2427     // Right-shifting everything out but the sign bit followed by negation is
2428     // the same as flipping arithmetic/logical shift type without the negation:
2429     // -(X >>u 31) -> (X >>s 31)
2430     // -(X >>s 31) -> (X >>u 31)
2431     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2432       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2433       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2434         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2435         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2436           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2437       }
2438     }
2439
2440     // 0 - X --> 0 if the sub is NUW.
2441     if (N->getFlags().hasNoUnsignedWrap())
2442       return N0;
2443
2444     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2445       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2446       // N1 must be 0 because negating the minimum signed value is undefined.
2447       if (N->getFlags().hasNoSignedWrap())
2448         return N0;
2449
2450       // 0 - X --> X if X is 0 or the minimum signed value.
2451       return N1;
2452     }
2453   }
2454
2455   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2456   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2457     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2458
2459   // fold A-(A-B) -> B
2460   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2461     return N1.getOperand(1);
2462
2463   // fold (A+B)-A -> B
2464   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2465     return N0.getOperand(1);
2466
2467   // fold (A+B)-B -> A
2468   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2469     return N0.getOperand(0);
2470
2471   // fold C2-(A+C1) -> (C2-C1)-A
2472   if (N1.getOpcode() == ISD::ADD) {
2473     SDValue N11 = N1.getOperand(1);
2474     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2475         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2476       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2477       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2478     }
2479   }
2480
2481   // fold ((A+(B+or-C))-B) -> A+or-C
2482   if (N0.getOpcode() == ISD::ADD &&
2483       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2484        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2485       N0.getOperand(1).getOperand(0) == N1)
2486     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2487                        N0.getOperand(1).getOperand(1));
2488
2489   // fold ((A+(C+B))-B) -> A+C
2490   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2491       N0.getOperand(1).getOperand(1) == N1)
2492     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2493                        N0.getOperand(1).getOperand(0));
2494
2495   // fold ((A-(B-C))-C) -> A-B
2496   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2497       N0.getOperand(1).getOperand(1) == N1)
2498     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2499                        N0.getOperand(1).getOperand(0));
2500
2501   // If either operand of a sub is undef, the result is undef
2502   if (N0.isUndef())
2503     return N0;
2504   if (N1.isUndef())
2505     return N1;
2506
2507   // If the relocation model supports it, consider symbol offsets.
2508   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2509     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2510       // fold (sub Sym, c) -> Sym-c
2511       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2512         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2513                                     GA->getOffset() -
2514                                         (uint64_t)N1C->getSExtValue());
2515       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2516       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2517         if (GA->getGlobal() == GB->getGlobal())
2518           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2519                                  DL, VT);
2520     }
2521
2522   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2523   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2524     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2525     if (TN->getVT() == MVT::i1) {
2526       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2527                                  DAG.getConstant(1, DL, VT));
2528       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2529     }
2530   }
2531
2532   return SDValue();
2533 }
2534
2535 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2536   SDValue N0 = N->getOperand(0);
2537   SDValue N1 = N->getOperand(1);
2538   EVT VT = N0.getValueType();
2539   SDLoc DL(N);
2540
2541   // If the flag result is dead, turn this into an SUB.
2542   if (!N->hasAnyUseOfValue(1))
2543     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2544                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2545
2546   // fold (subc x, x) -> 0 + no borrow
2547   if (N0 == N1)
2548     return CombineTo(N, DAG.getConstant(0, DL, VT),
2549                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2550
2551   // fold (subc x, 0) -> x + no borrow
2552   if (isNullConstant(N1))
2553     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2554
2555   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2556   if (isAllOnesConstant(N0))
2557     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2558                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2559
2560   return SDValue();
2561 }
2562
2563 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2564   SDValue N0 = N->getOperand(0);
2565   SDValue N1 = N->getOperand(1);
2566   EVT VT = N0.getValueType();
2567   if (VT.isVector())
2568     return SDValue();
2569
2570   EVT CarryVT = N->getValueType(1);
2571   SDLoc DL(N);
2572
2573   // If the flag result is dead, turn this into an SUB.
2574   if (!N->hasAnyUseOfValue(1))
2575     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2576                      DAG.getUNDEF(CarryVT));
2577
2578   // fold (usubo x, x) -> 0 + no borrow
2579   if (N0 == N1)
2580     return CombineTo(N, DAG.getConstant(0, DL, VT),
2581                      DAG.getConstant(0, DL, CarryVT));
2582
2583   // fold (usubo x, 0) -> x + no borrow
2584   if (isNullConstant(N1))
2585     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2586
2587   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2588   if (isAllOnesConstant(N0))
2589     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2590                      DAG.getConstant(0, DL, CarryVT));
2591
2592   return SDValue();
2593 }
2594
2595 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2596   SDValue N0 = N->getOperand(0);
2597   SDValue N1 = N->getOperand(1);
2598   SDValue CarryIn = N->getOperand(2);
2599
2600   // fold (sube x, y, false) -> (subc x, y)
2601   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2602     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2603
2604   return SDValue();
2605 }
2606
2607 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2608   SDValue N0 = N->getOperand(0);
2609   SDValue N1 = N->getOperand(1);
2610   SDValue CarryIn = N->getOperand(2);
2611
2612   // fold (subcarry x, y, false) -> (usubo x, y)
2613   if (isNullConstant(CarryIn))
2614     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2615
2616   return SDValue();
2617 }
2618
2619 SDValue DAGCombiner::visitMUL(SDNode *N) {
2620   SDValue N0 = N->getOperand(0);
2621   SDValue N1 = N->getOperand(1);
2622   EVT VT = N0.getValueType();
2623
2624   // fold (mul x, undef) -> 0
2625   if (N0.isUndef() || N1.isUndef())
2626     return DAG.getConstant(0, SDLoc(N), VT);
2627
2628   bool N0IsConst = false;
2629   bool N1IsConst = false;
2630   bool N1IsOpaqueConst = false;
2631   bool N0IsOpaqueConst = false;
2632   APInt ConstValue0, ConstValue1;
2633   // fold vector ops
2634   if (VT.isVector()) {
2635     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2636       return FoldedVOp;
2637
2638     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2639     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2640     assert((!N0IsConst ||
2641             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2642            "Splat APInt should be element width");
2643     assert((!N1IsConst ||
2644             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2645            "Splat APInt should be element width");
2646   } else {
2647     N0IsConst = isa<ConstantSDNode>(N0);
2648     if (N0IsConst) {
2649       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2650       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2651     }
2652     N1IsConst = isa<ConstantSDNode>(N1);
2653     if (N1IsConst) {
2654       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2655       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2656     }
2657   }
2658
2659   // fold (mul c1, c2) -> c1*c2
2660   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2661     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2662                                       N0.getNode(), N1.getNode());
2663
2664   // canonicalize constant to RHS (vector doesn't have to splat)
2665   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2666      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2667     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2668   // fold (mul x, 0) -> 0
2669   if (N1IsConst && ConstValue1.isNullValue())
2670     return N1;
2671   // fold (mul x, 1) -> x
2672   if (N1IsConst && ConstValue1.isOneValue())
2673     return N0;
2674
2675   if (SDValue NewSel = foldBinOpIntoSelect(N))
2676     return NewSel;
2677
2678   // fold (mul x, -1) -> 0-x
2679   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2680     SDLoc DL(N);
2681     return DAG.getNode(ISD::SUB, DL, VT,
2682                        DAG.getConstant(0, DL, VT), N0);
2683   }
2684   // fold (mul x, (1 << c)) -> x << c
2685   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2686       DAG.isKnownToBeAPowerOfTwo(N1) &&
2687       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2688     SDLoc DL(N);
2689     SDValue LogBase2 = BuildLogBase2(N1, DL);
2690     AddToWorklist(LogBase2.getNode());
2691
2692     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2693     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2694     AddToWorklist(Trunc.getNode());
2695     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2696   }
2697   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2698   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2699     unsigned Log2Val = (-ConstValue1).logBase2();
2700     SDLoc DL(N);
2701     // FIXME: If the input is something that is easily negated (e.g. a
2702     // single-use add), we should put the negate there.
2703     return DAG.getNode(ISD::SUB, DL, VT,
2704                        DAG.getConstant(0, DL, VT),
2705                        DAG.getNode(ISD::SHL, DL, VT, N0,
2706                             DAG.getConstant(Log2Val, DL,
2707                                       getShiftAmountTy(N0.getValueType()))));
2708   }
2709
2710   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2711   if (N0.getOpcode() == ISD::SHL &&
2712       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2713       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2714     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2715     if (isConstantOrConstantVector(C3))
2716       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2717   }
2718
2719   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2720   // use.
2721   {
2722     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2723
2724     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2725     if (N0.getOpcode() == ISD::SHL &&
2726         isConstantOrConstantVector(N0.getOperand(1)) &&
2727         N0.getNode()->hasOneUse()) {
2728       Sh = N0; Y = N1;
2729     } else if (N1.getOpcode() == ISD::SHL &&
2730                isConstantOrConstantVector(N1.getOperand(1)) &&
2731                N1.getNode()->hasOneUse()) {
2732       Sh = N1; Y = N0;
2733     }
2734
2735     if (Sh.getNode()) {
2736       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2737       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2738     }
2739   }
2740
2741   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2742   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2743       N0.getOpcode() == ISD::ADD &&
2744       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2745       isMulAddWithConstProfitable(N, N0, N1))
2746       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2747                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2748                                      N0.getOperand(0), N1),
2749                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2750                                      N0.getOperand(1), N1));
2751
2752   // reassociate mul
2753   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2754     return RMUL;
2755
2756   return SDValue();
2757 }
2758
2759 /// Return true if divmod libcall is available.
2760 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2761                                      const TargetLowering &TLI) {
2762   RTLIB::Libcall LC;
2763   EVT NodeType = Node->getValueType(0);
2764   if (!NodeType.isSimple())
2765     return false;
2766   switch (NodeType.getSimpleVT().SimpleTy) {
2767   default: return false; // No libcall for vector types.
2768   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2769   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2770   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2771   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2772   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2773   }
2774
2775   return TLI.getLibcallName(LC) != nullptr;
2776 }
2777
2778 /// Issue divrem if both quotient and remainder are needed.
2779 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2780   if (Node->use_empty())
2781     return SDValue(); // This is a dead node, leave it alone.
2782
2783   unsigned Opcode = Node->getOpcode();
2784   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2785   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2786
2787   // DivMod lib calls can still work on non-legal types if using lib-calls.
2788   EVT VT = Node->getValueType(0);
2789   if (VT.isVector() || !VT.isInteger())
2790     return SDValue();
2791
2792   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2793     return SDValue();
2794
2795   // If DIVREM is going to get expanded into a libcall,
2796   // but there is no libcall available, then don't combine.
2797   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2798       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2799     return SDValue();
2800
2801   // If div is legal, it's better to do the normal expansion
2802   unsigned OtherOpcode = 0;
2803   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2804     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2805     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2806       return SDValue();
2807   } else {
2808     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2809     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2810       return SDValue();
2811   }
2812
2813   SDValue Op0 = Node->getOperand(0);
2814   SDValue Op1 = Node->getOperand(1);
2815   SDValue combined;
2816   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2817          UE = Op0.getNode()->use_end(); UI != UE;) {
2818     SDNode *User = *UI++;
2819     if (User == Node || User->use_empty())
2820       continue;
2821     // Convert the other matching node(s), too;
2822     // otherwise, the DIVREM may get target-legalized into something
2823     // target-specific that we won't be able to recognize.
2824     unsigned UserOpc = User->getOpcode();
2825     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2826         User->getOperand(0) == Op0 &&
2827         User->getOperand(1) == Op1) {
2828       if (!combined) {
2829         if (UserOpc == OtherOpcode) {
2830           SDVTList VTs = DAG.getVTList(VT, VT);
2831           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2832         } else if (UserOpc == DivRemOpc) {
2833           combined = SDValue(User, 0);
2834         } else {
2835           assert(UserOpc == Opcode);
2836           continue;
2837         }
2838       }
2839       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2840         CombineTo(User, combined);
2841       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2842         CombineTo(User, combined.getValue(1));
2843     }
2844   }
2845   return combined;
2846 }
2847
2848 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2849   SDValue N0 = N->getOperand(0);
2850   SDValue N1 = N->getOperand(1);
2851   EVT VT = N->getValueType(0);
2852   SDLoc DL(N);
2853
2854   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2855     return DAG.getUNDEF(VT);
2856
2857   // undef / X -> 0
2858   // undef % X -> 0
2859   if (N0.isUndef())
2860     return DAG.getConstant(0, DL, VT);
2861
2862   return SDValue();
2863 }
2864
2865 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2866   SDValue N0 = N->getOperand(0);
2867   SDValue N1 = N->getOperand(1);
2868   EVT VT = N->getValueType(0);
2869
2870   // fold vector ops
2871   if (VT.isVector())
2872     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2873       return FoldedVOp;
2874
2875   SDLoc DL(N);
2876
2877   // fold (sdiv c1, c2) -> c1/c2
2878   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2879   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2880   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2881     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2882   // fold (sdiv X, 1) -> X
2883   if (N1C && N1C->isOne())
2884     return N0;
2885   // fold (sdiv X, -1) -> 0-X
2886   if (N1C && N1C->isAllOnesValue())
2887     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2888
2889   if (SDValue V = simplifyDivRem(N, DAG))
2890     return V;
2891
2892   if (SDValue NewSel = foldBinOpIntoSelect(N))
2893     return NewSel;
2894
2895   // If we know the sign bits of both operands are zero, strength reduce to a
2896   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2897   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2898     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2899
2900   // fold (sdiv X, pow2) -> simple ops after legalize
2901   // FIXME: We check for the exact bit here because the generic lowering gives
2902   // better results in that case. The target-specific lowering should learn how
2903   // to handle exact sdivs efficiently.
2904   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2905       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2906                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2907     // Target-specific implementation of sdiv x, pow2.
2908     if (SDValue Res = BuildSDIVPow2(N))
2909       return Res;
2910
2911     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2912
2913     // Splat the sign bit into the register
2914     SDValue SGN =
2915         DAG.getNode(ISD::SRA, DL, VT, N0,
2916                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2917                                     getShiftAmountTy(N0.getValueType())));
2918     AddToWorklist(SGN.getNode());
2919
2920     // Add (N0 < 0) ? abs2 - 1 : 0;
2921     SDValue SRL =
2922         DAG.getNode(ISD::SRL, DL, VT, SGN,
2923                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2924                                     getShiftAmountTy(SGN.getValueType())));
2925     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2926     AddToWorklist(SRL.getNode());
2927     AddToWorklist(ADD.getNode());    // Divide by pow2
2928     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2929                   DAG.getConstant(lg2, DL,
2930                                   getShiftAmountTy(ADD.getValueType())));
2931
2932     // If we're dividing by a positive value, we're done.  Otherwise, we must
2933     // negate the result.
2934     if (N1C->getAPIntValue().isNonNegative())
2935       return SRA;
2936
2937     AddToWorklist(SRA.getNode());
2938     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2939   }
2940
2941   // If integer divide is expensive and we satisfy the requirements, emit an
2942   // alternate sequence.  Targets may check function attributes for size/speed
2943   // trade-offs.
2944   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
2945   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2946     if (SDValue Op = BuildSDIV(N))
2947       return Op;
2948
2949   // sdiv, srem -> sdivrem
2950   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2951   // true.  Otherwise, we break the simplification logic in visitREM().
2952   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2953     if (SDValue DivRem = useDivRem(N))
2954         return DivRem;
2955
2956   return SDValue();
2957 }
2958
2959 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2960   SDValue N0 = N->getOperand(0);
2961   SDValue N1 = N->getOperand(1);
2962   EVT VT = N->getValueType(0);
2963
2964   // fold vector ops
2965   if (VT.isVector())
2966     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2967       return FoldedVOp;
2968
2969   SDLoc DL(N);
2970
2971   // fold (udiv c1, c2) -> c1/c2
2972   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2973   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2974   if (N0C && N1C)
2975     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2976                                                     N0C, N1C))
2977       return Folded;
2978
2979   if (SDValue V = simplifyDivRem(N, DAG))
2980     return V;
2981
2982   if (SDValue NewSel = foldBinOpIntoSelect(N))
2983     return NewSel;
2984
2985   // fold (udiv x, (1 << c)) -> x >>u c
2986   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2987       DAG.isKnownToBeAPowerOfTwo(N1)) {
2988     SDValue LogBase2 = BuildLogBase2(N1, DL);
2989     AddToWorklist(LogBase2.getNode());
2990
2991     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2992     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2993     AddToWorklist(Trunc.getNode());
2994     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2995   }
2996
2997   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2998   if (N1.getOpcode() == ISD::SHL) {
2999     SDValue N10 = N1.getOperand(0);
3000     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3001         DAG.isKnownToBeAPowerOfTwo(N10)) {
3002       SDValue LogBase2 = BuildLogBase2(N10, DL);
3003       AddToWorklist(LogBase2.getNode());
3004
3005       EVT ADDVT = N1.getOperand(1).getValueType();
3006       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3007       AddToWorklist(Trunc.getNode());
3008       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3009       AddToWorklist(Add.getNode());
3010       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3011     }
3012   }
3013
3014   // fold (udiv x, c) -> alternate
3015   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3016   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3017     if (SDValue Op = BuildUDIV(N))
3018       return Op;
3019
3020   // sdiv, srem -> sdivrem
3021   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3022   // true.  Otherwise, we break the simplification logic in visitREM().
3023   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3024     if (SDValue DivRem = useDivRem(N))
3025         return DivRem;
3026
3027   return SDValue();
3028 }
3029
3030 // handles ISD::SREM and ISD::UREM
3031 SDValue DAGCombiner::visitREM(SDNode *N) {
3032   unsigned Opcode = N->getOpcode();
3033   SDValue N0 = N->getOperand(0);
3034   SDValue N1 = N->getOperand(1);
3035   EVT VT = N->getValueType(0);
3036   bool isSigned = (Opcode == ISD::SREM);
3037   SDLoc DL(N);
3038
3039   // fold (rem c1, c2) -> c1%c2
3040   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3041   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3042   if (N0C && N1C)
3043     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3044       return Folded;
3045
3046   if (SDValue V = simplifyDivRem(N, DAG))
3047     return V;
3048
3049   if (SDValue NewSel = foldBinOpIntoSelect(N))
3050     return NewSel;
3051
3052   if (isSigned) {
3053     // If we know the sign bits of both operands are zero, strength reduce to a
3054     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3055     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3056       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3057   } else {
3058     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3059     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3060       // fold (urem x, pow2) -> (and x, pow2-1)
3061       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3062       AddToWorklist(Add.getNode());
3063       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3064     }
3065     if (N1.getOpcode() == ISD::SHL &&
3066         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3067       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3068       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3069       AddToWorklist(Add.getNode());
3070       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3071     }
3072   }
3073
3074   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3075
3076   // If X/C can be simplified by the division-by-constant logic, lower
3077   // X%C to the equivalent of X-X/C*C.
3078   // To avoid mangling nodes, this simplification requires that the combine()
3079   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
3080   // against this by skipping the simplification if isIntDivCheap().  When
3081   // div is not cheap, combine will not return a DIVREM.  Regardless,
3082   // checking cheapness here makes sense since the simplification results in
3083   // fatter code.
3084   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3085     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3086     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3087     AddToWorklist(Div.getNode());
3088     SDValue OptimizedDiv = combine(Div.getNode());
3089     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3090       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3091              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3092       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3093       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3094       AddToWorklist(Mul.getNode());
3095       return Sub;
3096     }
3097   }
3098
3099   // sdiv, srem -> sdivrem
3100   if (SDValue DivRem = useDivRem(N))
3101     return DivRem.getValue(1);
3102
3103   return SDValue();
3104 }
3105
3106 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3107   SDValue N0 = N->getOperand(0);
3108   SDValue N1 = N->getOperand(1);
3109   EVT VT = N->getValueType(0);
3110   SDLoc DL(N);
3111
3112   if (VT.isVector()) {
3113     // fold (mulhs x, 0) -> 0
3114     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3115       return N1;
3116     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3117       return N0;
3118   }
3119
3120   // fold (mulhs x, 0) -> 0
3121   if (isNullConstant(N1))
3122     return N1;
3123   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3124   if (isOneConstant(N1))
3125     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3126                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3127                                        getShiftAmountTy(N0.getValueType())));
3128
3129   // fold (mulhs x, undef) -> 0
3130   if (N0.isUndef() || N1.isUndef())
3131     return DAG.getConstant(0, DL, VT);
3132
3133   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3134   // plus a shift.
3135   if (VT.isSimple() && !VT.isVector()) {
3136     MVT Simple = VT.getSimpleVT();
3137     unsigned SimpleSize = Simple.getSizeInBits();
3138     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3139     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3140       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3141       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3142       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3143       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3144             DAG.getConstant(SimpleSize, DL,
3145                             getShiftAmountTy(N1.getValueType())));
3146       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3147     }
3148   }
3149
3150   return SDValue();
3151 }
3152
3153 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3154   SDValue N0 = N->getOperand(0);
3155   SDValue N1 = N->getOperand(1);
3156   EVT VT = N->getValueType(0);
3157   SDLoc DL(N);
3158
3159   if (VT.isVector()) {
3160     // fold (mulhu x, 0) -> 0
3161     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3162       return N1;
3163     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3164       return N0;
3165   }
3166
3167   // fold (mulhu x, 0) -> 0
3168   if (isNullConstant(N1))
3169     return N1;
3170   // fold (mulhu x, 1) -> 0
3171   if (isOneConstant(N1))
3172     return DAG.getConstant(0, DL, N0.getValueType());
3173   // fold (mulhu x, undef) -> 0
3174   if (N0.isUndef() || N1.isUndef())
3175     return DAG.getConstant(0, DL, VT);
3176
3177   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3178   // plus a shift.
3179   if (VT.isSimple() && !VT.isVector()) {
3180     MVT Simple = VT.getSimpleVT();
3181     unsigned SimpleSize = Simple.getSizeInBits();
3182     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3183     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3184       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3185       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3186       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3187       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3188             DAG.getConstant(SimpleSize, DL,
3189                             getShiftAmountTy(N1.getValueType())));
3190       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3191     }
3192   }
3193
3194   return SDValue();
3195 }
3196
3197 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3198 /// give the opcodes for the two computations that are being performed. Return
3199 /// true if a simplification was made.
3200 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3201                                                 unsigned HiOp) {
3202   // If the high half is not needed, just compute the low half.
3203   bool HiExists = N->hasAnyUseOfValue(1);
3204   if (!HiExists &&
3205       (!LegalOperations ||
3206        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3207     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3208     return CombineTo(N, Res, Res);
3209   }
3210
3211   // If the low half is not needed, just compute the high half.
3212   bool LoExists = N->hasAnyUseOfValue(0);
3213   if (!LoExists &&
3214       (!LegalOperations ||
3215        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3216     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3217     return CombineTo(N, Res, Res);
3218   }
3219
3220   // If both halves are used, return as it is.
3221   if (LoExists && HiExists)
3222     return SDValue();
3223
3224   // If the two computed results can be simplified separately, separate them.
3225   if (LoExists) {
3226     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3227     AddToWorklist(Lo.getNode());
3228     SDValue LoOpt = combine(Lo.getNode());
3229     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3230         (!LegalOperations ||
3231          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3232       return CombineTo(N, LoOpt, LoOpt);
3233   }
3234
3235   if (HiExists) {
3236     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3237     AddToWorklist(Hi.getNode());
3238     SDValue HiOpt = combine(Hi.getNode());
3239     if (HiOpt.getNode() && HiOpt != Hi &&
3240         (!LegalOperations ||
3241          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3242       return CombineTo(N, HiOpt, HiOpt);
3243   }
3244
3245   return SDValue();
3246 }
3247
3248 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3249   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3250     return Res;
3251
3252   EVT VT = N->getValueType(0);
3253   SDLoc DL(N);
3254
3255   // If the type is twice as wide is legal, transform the mulhu to a wider
3256   // multiply plus a shift.
3257   if (VT.isSimple() && !VT.isVector()) {
3258     MVT Simple = VT.getSimpleVT();
3259     unsigned SimpleSize = Simple.getSizeInBits();
3260     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3261     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3262       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3263       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3264       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3265       // Compute the high part as N1.
3266       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3267             DAG.getConstant(SimpleSize, DL,
3268                             getShiftAmountTy(Lo.getValueType())));
3269       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3270       // Compute the low part as N0.
3271       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3272       return CombineTo(N, Lo, Hi);
3273     }
3274   }
3275
3276   return SDValue();
3277 }
3278
3279 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3280   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3281     return Res;
3282
3283   EVT VT = N->getValueType(0);
3284   SDLoc DL(N);
3285
3286   // If the type is twice as wide is legal, transform the mulhu to a wider
3287   // multiply plus a shift.
3288   if (VT.isSimple() && !VT.isVector()) {
3289     MVT Simple = VT.getSimpleVT();
3290     unsigned SimpleSize = Simple.getSizeInBits();
3291     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3292     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3293       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3294       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3295       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3296       // Compute the high part as N1.
3297       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3298             DAG.getConstant(SimpleSize, DL,
3299                             getShiftAmountTy(Lo.getValueType())));
3300       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3301       // Compute the low part as N0.
3302       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3303       return CombineTo(N, Lo, Hi);
3304     }
3305   }
3306
3307   return SDValue();
3308 }
3309
3310 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3311   // (smulo x, 2) -> (saddo x, x)
3312   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3313     if (C2->getAPIntValue() == 2)
3314       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3315                          N->getOperand(0), N->getOperand(0));
3316
3317   return SDValue();
3318 }
3319
3320 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3321   // (umulo x, 2) -> (uaddo x, x)
3322   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3323     if (C2->getAPIntValue() == 2)
3324       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3325                          N->getOperand(0), N->getOperand(0));
3326
3327   return SDValue();
3328 }
3329
3330 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3331   SDValue N0 = N->getOperand(0);
3332   SDValue N1 = N->getOperand(1);
3333   EVT VT = N0.getValueType();
3334
3335   // fold vector ops
3336   if (VT.isVector())
3337     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3338       return FoldedVOp;
3339
3340   // fold operation with constant operands.
3341   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3342   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3343   if (N0C && N1C)
3344     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3345
3346   // canonicalize constant to RHS
3347   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3348      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3349     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3350
3351   return SDValue();
3352 }
3353
3354 /// If this is a binary operator with two operands of the same opcode, try to
3355 /// simplify it.
3356 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3357   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3358   EVT VT = N0.getValueType();
3359   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3360
3361   // Bail early if none of these transforms apply.
3362   if (N0.getNumOperands() == 0) return SDValue();
3363
3364   // For each of OP in AND/OR/XOR:
3365   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3366   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3367   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3368   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3369   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3370   //
3371   // do not sink logical op inside of a vector extend, since it may combine
3372   // into a vsetcc.
3373   EVT Op0VT = N0.getOperand(0).getValueType();
3374   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3375        N0.getOpcode() == ISD::SIGN_EXTEND ||
3376        N0.getOpcode() == ISD::BSWAP ||
3377        // Avoid infinite looping with PromoteIntBinOp.
3378        (N0.getOpcode() == ISD::ANY_EXTEND &&
3379         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3380        (N0.getOpcode() == ISD::TRUNCATE &&
3381         (!TLI.isZExtFree(VT, Op0VT) ||
3382          !TLI.isTruncateFree(Op0VT, VT)) &&
3383         TLI.isTypeLegal(Op0VT))) &&
3384       !VT.isVector() &&
3385       Op0VT == N1.getOperand(0).getValueType() &&
3386       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3387     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3388                                  N0.getOperand(0).getValueType(),
3389                                  N0.getOperand(0), N1.getOperand(0));
3390     AddToWorklist(ORNode.getNode());
3391     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3392   }
3393
3394   // For each of OP in SHL/SRL/SRA/AND...
3395   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3396   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3397   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3398   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3399        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3400       N0.getOperand(1) == N1.getOperand(1)) {
3401     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3402                                  N0.getOperand(0).getValueType(),
3403                                  N0.getOperand(0), N1.getOperand(0));
3404     AddToWorklist(ORNode.getNode());
3405     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3406                        ORNode, N0.getOperand(1));
3407   }
3408
3409   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3410   // Only perform this optimization up until type legalization, before
3411   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3412   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3413   // we don't want to undo this promotion.
3414   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3415   // on scalars.
3416   if ((N0.getOpcode() == ISD::BITCAST ||
3417        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3418        Level <= AfterLegalizeTypes) {
3419     SDValue In0 = N0.getOperand(0);
3420     SDValue In1 = N1.getOperand(0);
3421     EVT In0Ty = In0.getValueType();
3422     EVT In1Ty = In1.getValueType();
3423     SDLoc DL(N);
3424     // If both incoming values are integers, and the original types are the
3425     // same.
3426     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3427       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3428       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3429       AddToWorklist(Op.getNode());
3430       return BC;
3431     }
3432   }
3433
3434   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3435   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3436   // If both shuffles use the same mask, and both shuffle within a single
3437   // vector, then it is worthwhile to move the swizzle after the operation.
3438   // The type-legalizer generates this pattern when loading illegal
3439   // vector types from memory. In many cases this allows additional shuffle
3440   // optimizations.
3441   // There are other cases where moving the shuffle after the xor/and/or
3442   // is profitable even if shuffles don't perform a swizzle.
3443   // If both shuffles use the same mask, and both shuffles have the same first
3444   // or second operand, then it might still be profitable to move the shuffle
3445   // after the xor/and/or operation.
3446   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3447     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3448     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3449
3450     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3451            "Inputs to shuffles are not the same type");
3452
3453     // Check that both shuffles use the same mask. The masks are known to be of
3454     // the same length because the result vector type is the same.
3455     // Check also that shuffles have only one use to avoid introducing extra
3456     // instructions.
3457     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3458         SVN0->getMask().equals(SVN1->getMask())) {
3459       SDValue ShOp = N0->getOperand(1);
3460
3461       // Don't try to fold this node if it requires introducing a
3462       // build vector of all zeros that might be illegal at this stage.
3463       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3464         if (!LegalTypes)
3465           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3466         else
3467           ShOp = SDValue();
3468       }
3469
3470       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3471       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3472       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3473       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3474         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3475                                       N0->getOperand(0), N1->getOperand(0));
3476         AddToWorklist(NewNode.getNode());
3477         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3478                                     SVN0->getMask());
3479       }
3480
3481       // Don't try to fold this node if it requires introducing a
3482       // build vector of all zeros that might be illegal at this stage.
3483       ShOp = N0->getOperand(0);
3484       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3485         if (!LegalTypes)
3486           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3487         else
3488           ShOp = SDValue();
3489       }
3490
3491       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3492       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3493       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3494       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3495         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3496                                       N0->getOperand(1), N1->getOperand(1));
3497         AddToWorklist(NewNode.getNode());
3498         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3499                                     SVN0->getMask());
3500       }
3501     }
3502   }
3503
3504   return SDValue();
3505 }
3506
3507 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3508 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3509                                        const SDLoc &DL) {
3510   SDValue LL, LR, RL, RR, N0CC, N1CC;
3511   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3512       !isSetCCEquivalent(N1, RL, RR, N1CC))
3513     return SDValue();
3514
3515   assert(N0.getValueType() == N1.getValueType() &&
3516          "Unexpected operand types for bitwise logic op");
3517   assert(LL.getValueType() == LR.getValueType() &&
3518          RL.getValueType() == RR.getValueType() &&
3519          "Unexpected operand types for setcc");
3520
3521   // If we're here post-legalization or the logic op type is not i1, the logic
3522   // op type must match a setcc result type. Also, all folds require new
3523   // operations on the left and right operands, so those types must match.
3524   EVT VT = N0.getValueType();
3525   EVT OpVT = LL.getValueType();
3526   if (LegalOperations || VT != MVT::i1)
3527     if (VT != getSetCCResultType(OpVT))
3528       return SDValue();
3529   if (OpVT != RL.getValueType())
3530     return SDValue();
3531
3532   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3533   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3534   bool IsInteger = OpVT.isInteger();
3535   if (LR == RR && CC0 == CC1 && IsInteger) {
3536     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3537     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3538
3539     // All bits clear?
3540     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3541     // All sign bits clear?
3542     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3543     // Any bits set?
3544     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3545     // Any sign bits set?
3546     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3547
3548     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3549     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3550     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3551     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3552     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3553       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3554       AddToWorklist(Or.getNode());
3555       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3556     }
3557
3558     // All bits set?
3559     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3560     // All sign bits set?
3561     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3562     // Any bits clear?
3563     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3564     // Any sign bits clear?
3565     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3566
3567     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3568     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3569     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3570     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3571     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3572       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3573       AddToWorklist(And.getNode());
3574       return DAG.getSetCC(DL, VT, And, LR, CC1);
3575     }
3576   }
3577
3578   // TODO: What is the 'or' equivalent of this fold?
3579   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3580   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3581       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3582        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3583     SDValue One = DAG.getConstant(1, DL, OpVT);
3584     SDValue Two = DAG.getConstant(2, DL, OpVT);
3585     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3586     AddToWorklist(Add.getNode());
3587     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3588   }
3589
3590   // Try more general transforms if the predicates match and the only user of
3591   // the compares is the 'and' or 'or'.
3592   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3593       N0.hasOneUse() && N1.hasOneUse()) {
3594     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3595     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3596     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3597       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3598       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3599       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3600       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3601       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3602     }
3603   }
3604
3605   // Canonicalize equivalent operands to LL == RL.
3606   if (LL == RR && LR == RL) {
3607     CC1 = ISD::getSetCCSwappedOperands(CC1);
3608     std::swap(RL, RR);
3609   }
3610
3611   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3612   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3613   if (LL == RL && LR == RR) {
3614     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3615                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3616     if (NewCC != ISD::SETCC_INVALID &&
3617         (!LegalOperations ||
3618          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3619           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3620       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3621   }
3622
3623   return SDValue();
3624 }
3625
3626 /// This contains all DAGCombine rules which reduce two values combined by
3627 /// an And operation to a single value. This makes them reusable in the context
3628 /// of visitSELECT(). Rules involving constants are not included as
3629 /// visitSELECT() already handles those cases.
3630 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3631   EVT VT = N1.getValueType();
3632   SDLoc DL(N);
3633
3634   // fold (and x, undef) -> 0
3635   if (N0.isUndef() || N1.isUndef())
3636     return DAG.getConstant(0, DL, VT);
3637
3638   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3639     return V;
3640
3641   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3642       VT.getSizeInBits() <= 64) {
3643     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3644       APInt ADDC = ADDI->getAPIntValue();
3645       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3646         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3647         // immediate for an add, but it is legal if its top c2 bits are set,
3648         // transform the ADD so the immediate doesn't need to be materialized
3649         // in a register.
3650         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3651           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3652                                              SRLI->getZExtValue());
3653           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3654             ADDC |= Mask;
3655             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3656               SDLoc DL0(N0);
3657               SDValue NewAdd =
3658                 DAG.getNode(ISD::ADD, DL0, VT,
3659                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3660               CombineTo(N0.getNode(), NewAdd);
3661               // Return N so it doesn't get rechecked!
3662               return SDValue(N, 0);
3663             }
3664           }
3665         }
3666       }
3667     }
3668   }
3669
3670   // Reduce bit extract of low half of an integer to the narrower type.
3671   // (and (srl i64:x, K), KMask) ->
3672   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3673   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3674     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3675       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3676         unsigned Size = VT.getSizeInBits();
3677         const APInt &AndMask = CAnd->getAPIntValue();
3678         unsigned ShiftBits = CShift->getZExtValue();
3679
3680         // Bail out, this node will probably disappear anyway.
3681         if (ShiftBits == 0)
3682           return SDValue();
3683
3684         unsigned MaskBits = AndMask.countTrailingOnes();
3685         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3686
3687         if (AndMask.isMask() &&
3688             // Required bits must not span the two halves of the integer and
3689             // must fit in the half size type.
3690             (ShiftBits + MaskBits <= Size / 2) &&
3691             TLI.isNarrowingProfitable(VT, HalfVT) &&
3692             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3693             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3694             TLI.isTruncateFree(VT, HalfVT) &&
3695             TLI.isZExtFree(HalfVT, VT)) {
3696           // The isNarrowingProfitable is to avoid regressions on PPC and
3697           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3698           // on downstream users of this. Those patterns could probably be
3699           // extended to handle extensions mixed in.
3700
3701           SDValue SL(N0);
3702           assert(MaskBits <= Size);
3703
3704           // Extracting the highest bit of the low half.
3705           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3706           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3707                                       N0.getOperand(0));
3708
3709           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3710           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3711           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3712           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3713           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3714         }
3715       }
3716     }
3717   }
3718
3719   return SDValue();
3720 }
3721
3722 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3723                                    EVT LoadResultTy, EVT &ExtVT) {
3724   if (!AndC->getAPIntValue().isMask())
3725     return false;
3726
3727   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3728
3729   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3730   EVT LoadedVT = LoadN->getMemoryVT();
3731
3732   if (ExtVT == LoadedVT &&
3733       (!LegalOperations ||
3734        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3735     // ZEXTLOAD will match without needing to change the size of the value being
3736     // loaded.
3737     return true;
3738   }
3739
3740   // Do not change the width of a volatile load.
3741   if (LoadN->isVolatile())
3742     return false;
3743
3744   // Do not generate loads of non-round integer types since these can
3745   // be expensive (and would be wrong if the type is not byte sized).
3746   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3747     return false;
3748
3749   if (LegalOperations &&
3750       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3751     return false;
3752
3753   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3754     return false;
3755
3756   return true;
3757 }
3758
3759 bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
3760                                     EVT &ExtVT, unsigned ShAmt) {
3761   // Don't transform one with multiple uses, this would require adding a new
3762   // load.
3763   if (!SDValue(LoadN, 0).hasOneUse())
3764     return false;
3765
3766   if (LegalOperations &&
3767       !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
3768     return false;
3769
3770   // Do not generate loads of non-round integer types since these can
3771   // be expensive (and would be wrong if the type is not byte sized).
3772   if (!ExtVT.isRound())
3773     return false;
3774
3775   // Don't change the width of a volatile load.
3776   if (LoadN->isVolatile())
3777     return false;
3778
3779   // Verify that we are actually reducing a load width here.
3780   if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
3781     return false;
3782
3783   // For the transform to be legal, the load must produce only two values
3784   // (the value loaded and the chain).  Don't transform a pre-increment
3785   // load, for example, which produces an extra value.  Otherwise the
3786   // transformation is not equivalent, and the downstream logic to replace
3787   // uses gets things wrong.
3788   if (LoadN->getNumValues() > 2)
3789     return false;
3790
3791   // If the load that we're shrinking is an extload and we're not just
3792   // discarding the extension we can't simply shrink the load. Bail.
3793   // TODO: It would be possible to merge the extensions in some cases.
3794   if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
3795       LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
3796     return false;
3797
3798   if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
3799     return false;
3800
3801   // It's not possible to generate a constant of extended or untyped type.
3802   EVT PtrType = LoadN->getOperand(1).getValueType();
3803   if (PtrType == MVT::Untyped || PtrType.isExtended())
3804     return false;
3805
3806   return true;
3807 }
3808
3809 bool DAGCombiner::SearchForAndLoads(SDNode *N,
3810                                     SmallPtrSetImpl<LoadSDNode*> &Loads,
3811                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
3812                                     ConstantSDNode *Mask,
3813                                     SDNode *&NodeToMask) {
3814   // Recursively search for the operands, looking for loads which can be
3815   // narrowed.
3816   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
3817     SDValue Op = N->getOperand(i);
3818
3819     if (Op.getValueType().isVector())
3820       return false;
3821
3822     // Some constants may need fixing up later if they are too large.
3823     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3824       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
3825           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
3826         NodesWithConsts.insert(N);
3827       continue;
3828     }
3829
3830     if (!Op.hasOneUse())
3831       return false;
3832
3833     switch(Op.getOpcode()) {
3834     case ISD::LOAD: {
3835       auto *Load = cast<LoadSDNode>(Op);
3836       EVT ExtVT;
3837       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
3838           isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
3839         // Only add this load if we can make it more narrow.
3840         if (ExtVT.bitsLT(Load->getMemoryVT()))
3841           Loads.insert(Load);
3842         continue;
3843       }
3844       return false;
3845     }
3846     case ISD::ZERO_EXTEND:
3847     case ISD::ANY_EXTEND:
3848     case ISD::AssertZext: {
3849       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
3850       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3851       EVT VT = Op.getOpcode() == ISD::AssertZext ?
3852         cast<VTSDNode>(Op.getOperand(1))->getVT() :
3853         Op.getOperand(0).getValueType();
3854
3855       // We can accept extending nodes if the mask is wider or an equal
3856       // width to the original type.
3857       if (ExtVT.bitsGE(VT))
3858         continue;
3859       break;
3860     }
3861     case ISD::OR:
3862     case ISD::XOR:
3863     case ISD::AND:
3864       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
3865                              NodeToMask))
3866         return false;
3867       continue;
3868     }
3869
3870     // Allow one node which will masked along with any loads found.
3871     if (NodeToMask)
3872       return false;
3873     NodeToMask = Op.getNode();
3874   }
3875   return true;
3876 }
3877
3878 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
3879   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
3880   if (!Mask)
3881     return false;
3882
3883   if (!Mask->getAPIntValue().isMask())
3884     return false;
3885
3886   // No need to do anything if the and directly uses a load.
3887   if (isa<LoadSDNode>(N->getOperand(0)))
3888     return false;
3889
3890   SmallPtrSet<LoadSDNode*, 8> Loads;
3891   SmallPtrSet<SDNode*, 2> NodesWithConsts;
3892   SDNode *FixupNode = nullptr;
3893   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
3894     if (Loads.size() == 0)
3895       return false;
3896
3897     SDValue MaskOp = N->getOperand(1);
3898
3899     // If it exists, fixup the single node we allow in the tree that needs
3900     // masking.
3901     if (FixupNode) {
3902       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
3903                                 FixupNode->getValueType(0),
3904                                 SDValue(FixupNode, 0), MaskOp);
3905       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
3906       DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
3907                              MaskOp);
3908     }
3909
3910     // Narrow any constants that need it.
3911     for (auto *LogicN : NodesWithConsts) {
3912       auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
3913       SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
3914                                 SDValue(C, 0), MaskOp);
3915       DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
3916     }
3917
3918     // Create narrow loads.
3919     for (auto *Load : Loads) {
3920       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
3921                                 SDValue(Load, 0), MaskOp);
3922       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
3923       DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
3924       SDValue NewLoad = ReduceLoadWidth(And.getNode());
3925       assert(NewLoad &&
3926              "Shouldn't be masking the load if it can't be narrowed");
3927       CombineTo(Load, NewLoad, NewLoad.getValue(1));
3928     }
3929     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
3930     return true;
3931   }
3932   return false;
3933 }
3934
3935 SDValue DAGCombiner::visitAND(SDNode *N) {
3936   SDValue N0 = N->getOperand(0);
3937   SDValue N1 = N->getOperand(1);
3938   EVT VT = N1.getValueType();
3939
3940   // x & x --> x
3941   if (N0 == N1)
3942     return N0;
3943
3944   // fold vector ops
3945   if (VT.isVector()) {
3946     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3947       return FoldedVOp;
3948
3949     // fold (and x, 0) -> 0, vector edition
3950     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3951       // do not return N0, because undef node may exist in N0
3952       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3953                              SDLoc(N), N0.getValueType());
3954     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3955       // do not return N1, because undef node may exist in N1
3956       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3957                              SDLoc(N), N1.getValueType());
3958
3959     // fold (and x, -1) -> x, vector edition
3960     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3961       return N1;
3962     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3963       return N0;
3964   }
3965
3966   // fold (and c1, c2) -> c1&c2
3967   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3968   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3969   if (N0C && N1C && !N1C->isOpaque())
3970     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3971   // canonicalize constant to RHS
3972   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3973      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3974     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3975   // fold (and x, -1) -> x
3976   if (isAllOnesConstant(N1))
3977     return N0;
3978   // if (and x, c) is known to be zero, return 0
3979   unsigned BitWidth = VT.getScalarSizeInBits();
3980   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3981                                    APInt::getAllOnesValue(BitWidth)))
3982     return DAG.getConstant(0, SDLoc(N), VT);
3983
3984   if (SDValue NewSel = foldBinOpIntoSelect(N))
3985     return NewSel;
3986
3987   // reassociate and
3988   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3989     return RAND;
3990   // fold (and (or x, C), D) -> D if (C & D) == D
3991   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
3992     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
3993   };
3994   if (N0.getOpcode() == ISD::OR &&
3995       matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
3996     return N1;
3997   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3998   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3999     SDValue N0Op0 = N0.getOperand(0);
4000     APInt Mask = ~N1C->getAPIntValue();
4001     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4002     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4003       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4004                                  N0.getValueType(), N0Op0);
4005
4006       // Replace uses of the AND with uses of the Zero extend node.
4007       CombineTo(N, Zext);
4008
4009       // We actually want to replace all uses of the any_extend with the
4010       // zero_extend, to avoid duplicating things.  This will later cause this
4011       // AND to be folded.
4012       CombineTo(N0.getNode(), Zext);
4013       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4014     }
4015   }
4016   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4017   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4018   // already be zero by virtue of the width of the base type of the load.
4019   //
4020   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4021   // more cases.
4022   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4023        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4024        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4025        N0.getOperand(0).getResNo() == 0) ||
4026       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4027     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4028                                          N0 : N0.getOperand(0) );
4029
4030     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4031     // This can be a pure constant or a vector splat, in which case we treat the
4032     // vector as a scalar and use the splat value.
4033     APInt Constant = APInt::getNullValue(1);
4034     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4035       Constant = C->getAPIntValue();
4036     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4037       APInt SplatValue, SplatUndef;
4038       unsigned SplatBitSize;
4039       bool HasAnyUndefs;
4040       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4041                                              SplatBitSize, HasAnyUndefs);
4042       if (IsSplat) {
4043         // Undef bits can contribute to a possible optimisation if set, so
4044         // set them.
4045         SplatValue |= SplatUndef;
4046
4047         // The splat value may be something like "0x00FFFFFF", which means 0 for
4048         // the first vector value and FF for the rest, repeating. We need a mask
4049         // that will apply equally to all members of the vector, so AND all the
4050         // lanes of the constant together.
4051         EVT VT = Vector->getValueType(0);
4052         unsigned BitWidth = VT.getScalarSizeInBits();
4053
4054         // If the splat value has been compressed to a bitlength lower
4055         // than the size of the vector lane, we need to re-expand it to
4056         // the lane size.
4057         if (BitWidth > SplatBitSize)
4058           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4059                SplatBitSize < BitWidth;
4060                SplatBitSize = SplatBitSize * 2)
4061             SplatValue |= SplatValue.shl(SplatBitSize);
4062
4063         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4064         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4065         if (SplatBitSize % BitWidth == 0) {
4066           Constant = APInt::getAllOnesValue(BitWidth);
4067           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4068             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4069         }
4070       }
4071     }
4072
4073     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4074     // actually legal and isn't going to get expanded, else this is a false
4075     // optimisation.
4076     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4077                                                     Load->getValueType(0),
4078                                                     Load->getMemoryVT());
4079
4080     // Resize the constant to the same size as the original memory access before
4081     // extension. If it is still the AllOnesValue then this AND is completely
4082     // unneeded.
4083     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4084
4085     bool B;
4086     switch (Load->getExtensionType()) {
4087     default: B = false; break;
4088     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4089     case ISD::ZEXTLOAD:
4090     case ISD::NON_EXTLOAD: B = true; break;
4091     }
4092
4093     if (B && Constant.isAllOnesValue()) {
4094       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4095       // preserve semantics once we get rid of the AND.
4096       SDValue NewLoad(Load, 0);
4097
4098       // Fold the AND away. NewLoad may get replaced immediately.
4099       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4100
4101       if (Load->getExtensionType() == ISD::EXTLOAD) {
4102         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4103                               Load->getValueType(0), SDLoc(Load),
4104                               Load->getChain(), Load->getBasePtr(),
4105                               Load->getOffset(), Load->getMemoryVT(),
4106                               Load->getMemOperand());
4107         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4108         if (Load->getNumValues() == 3) {
4109           // PRE/POST_INC loads have 3 values.
4110           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4111                            NewLoad.getValue(2) };
4112           CombineTo(Load, To, 3, true);
4113         } else {
4114           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4115         }
4116       }
4117
4118       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4119     }
4120   }
4121
4122   // fold (and (load x), 255) -> (zextload x, i8)
4123   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4124   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4125   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4126                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4127                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4128     if (SDValue Res = ReduceLoadWidth(N)) {
4129       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4130         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4131
4132       AddToWorklist(N);
4133       CombineTo(LN0, Res, Res.getValue(1));
4134       return SDValue(N, 0);
4135     }
4136   }
4137
4138   if (Level >= AfterLegalizeTypes) {
4139     // Attempt to propagate the AND back up to the leaves which, if they're
4140     // loads, can be combined to narrow loads and the AND node can be removed.
4141     // Perform after legalization so that extend nodes will already be
4142     // combined into the loads.
4143     if (BackwardsPropagateMask(N, DAG)) {
4144       return SDValue(N, 0);
4145     }
4146   }
4147
4148   if (SDValue Combined = visitANDLike(N0, N1, N))
4149     return Combined;
4150
4151   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4152   if (N0.getOpcode() == N1.getOpcode())
4153     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4154       return Tmp;
4155
4156   // Masking the negated extension of a boolean is just the zero-extended
4157   // boolean:
4158   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4159   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4160   //
4161   // Note: the SimplifyDemandedBits fold below can make an information-losing
4162   // transform, and then we have no way to find this better fold.
4163   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4164     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4165       SDValue SubRHS = N0.getOperand(1);
4166       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4167           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4168         return SubRHS;
4169       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4170           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4171         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4172     }
4173   }
4174
4175   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4176   // fold (and (sra)) -> (and (srl)) when possible.
4177   if (SimplifyDemandedBits(SDValue(N, 0)))
4178     return SDValue(N, 0);
4179
4180   // fold (zext_inreg (extload x)) -> (zextload x)
4181   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4182     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4183     EVT MemVT = LN0->getMemoryVT();
4184     // If we zero all the possible extended bits, then we can turn this into
4185     // a zextload if we are running before legalize or the operation is legal.
4186     unsigned BitWidth = N1.getScalarValueSizeInBits();
4187     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4188                            BitWidth - MemVT.getScalarSizeInBits())) &&
4189         ((!LegalOperations && !LN0->isVolatile()) ||
4190          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4191       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4192                                        LN0->getChain(), LN0->getBasePtr(),
4193                                        MemVT, LN0->getMemOperand());
4194       AddToWorklist(N);
4195       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4196       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4197     }
4198   }
4199   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4200   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4201       N0.hasOneUse()) {
4202     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4203     EVT MemVT = LN0->getMemoryVT();
4204     // If we zero all the possible extended bits, then we can turn this into
4205     // a zextload if we are running before legalize or the operation is legal.
4206     unsigned BitWidth = N1.getScalarValueSizeInBits();
4207     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4208                            BitWidth - MemVT.getScalarSizeInBits())) &&
4209         ((!LegalOperations && !LN0->isVolatile()) ||
4210          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4211       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4212                                        LN0->getChain(), LN0->getBasePtr(),
4213                                        MemVT, LN0->getMemOperand());
4214       AddToWorklist(N);
4215       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4216       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4217     }
4218   }
4219   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4220   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4221     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4222                                            N0.getOperand(1), false))
4223       return BSwap;
4224   }
4225
4226   return SDValue();
4227 }
4228
4229 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4230 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4231                                         bool DemandHighBits) {
4232   if (!LegalOperations)
4233     return SDValue();
4234
4235   EVT VT = N->getValueType(0);
4236   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4237     return SDValue();
4238   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4239     return SDValue();
4240
4241   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4242   bool LookPassAnd0 = false;
4243   bool LookPassAnd1 = false;
4244   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4245       std::swap(N0, N1);
4246   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4247       std::swap(N0, N1);
4248   if (N0.getOpcode() == ISD::AND) {
4249     if (!N0.getNode()->hasOneUse())
4250       return SDValue();
4251     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4252     if (!N01C || N01C->getZExtValue() != 0xFF00)
4253       return SDValue();
4254     N0 = N0.getOperand(0);
4255     LookPassAnd0 = true;
4256   }
4257
4258   if (N1.getOpcode() == ISD::AND) {
4259     if (!N1.getNode()->hasOneUse())
4260       return SDValue();
4261     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4262     if (!N11C || N11C->getZExtValue() != 0xFF)
4263       return SDValue();
4264     N1 = N1.getOperand(0);
4265     LookPassAnd1 = true;
4266   }
4267
4268   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4269     std::swap(N0, N1);
4270   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4271     return SDValue();
4272   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4273     return SDValue();
4274
4275   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4276   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4277   if (!N01C || !N11C)
4278     return SDValue();
4279   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4280     return SDValue();
4281
4282   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4283   SDValue N00 = N0->getOperand(0);
4284   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4285     if (!N00.getNode()->hasOneUse())
4286       return SDValue();
4287     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4288     if (!N001C || N001C->getZExtValue() != 0xFF)
4289       return SDValue();
4290     N00 = N00.getOperand(0);
4291     LookPassAnd0 = true;
4292   }
4293
4294   SDValue N10 = N1->getOperand(0);
4295   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4296     if (!N10.getNode()->hasOneUse())
4297       return SDValue();
4298     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4299     if (!N101C || N101C->getZExtValue() != 0xFF00)
4300       return SDValue();
4301     N10 = N10.getOperand(0);
4302     LookPassAnd1 = true;
4303   }
4304
4305   if (N00 != N10)
4306     return SDValue();
4307
4308   // Make sure everything beyond the low halfword gets set to zero since the SRL
4309   // 16 will clear the top bits.
4310   unsigned OpSizeInBits = VT.getSizeInBits();
4311   if (DemandHighBits && OpSizeInBits > 16) {
4312     // If the left-shift isn't masked out then the only way this is a bswap is
4313     // if all bits beyond the low 8 are 0. In that case the entire pattern
4314     // reduces to a left shift anyway: leave it for other parts of the combiner.
4315     if (!LookPassAnd0)
4316       return SDValue();
4317
4318     // However, if the right shift isn't masked out then it might be because
4319     // it's not needed. See if we can spot that too.
4320     if (!LookPassAnd1 &&
4321         !DAG.MaskedValueIsZero(
4322             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4323       return SDValue();
4324   }
4325
4326   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4327   if (OpSizeInBits > 16) {
4328     SDLoc DL(N);
4329     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4330                       DAG.getConstant(OpSizeInBits - 16, DL,
4331                                       getShiftAmountTy(VT)));
4332   }
4333   return Res;
4334 }
4335
4336 /// Return true if the specified node is an element that makes up a 32-bit
4337 /// packed halfword byteswap.
4338 /// ((x & 0x000000ff) << 8) |
4339 /// ((x & 0x0000ff00) >> 8) |
4340 /// ((x & 0x00ff0000) << 8) |
4341 /// ((x & 0xff000000) >> 8)
4342 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4343   if (!N.getNode()->hasOneUse())
4344     return false;
4345
4346   unsigned Opc = N.getOpcode();
4347   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4348     return false;
4349
4350   SDValue N0 = N.getOperand(0);
4351   unsigned Opc0 = N0.getOpcode();
4352   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4353     return false;
4354
4355   ConstantSDNode *N1C = nullptr;
4356   // SHL or SRL: look upstream for AND mask operand
4357   if (Opc == ISD::AND)
4358     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4359   else if (Opc0 == ISD::AND)
4360     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4361   if (!N1C)
4362     return false;
4363
4364   unsigned MaskByteOffset;
4365   switch (N1C->getZExtValue()) {
4366   default:
4367     return false;
4368   case 0xFF:       MaskByteOffset = 0; break;
4369   case 0xFF00:     MaskByteOffset = 1; break;
4370   case 0xFF0000:   MaskByteOffset = 2; break;
4371   case 0xFF000000: MaskByteOffset = 3; break;
4372   }
4373
4374   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4375   if (Opc == ISD::AND) {
4376     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4377       // (x >> 8) & 0xff
4378       // (x >> 8) & 0xff0000
4379       if (Opc0 != ISD::SRL)
4380         return false;
4381       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4382       if (!C || C->getZExtValue() != 8)
4383         return false;
4384     } else {
4385       // (x << 8) & 0xff00
4386       // (x << 8) & 0xff000000
4387       if (Opc0 != ISD::SHL)
4388         return false;
4389       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4390       if (!C || C->getZExtValue() != 8)
4391         return false;
4392     }
4393   } else if (Opc == ISD::SHL) {
4394     // (x & 0xff) << 8
4395     // (x & 0xff0000) << 8
4396     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4397       return false;
4398     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4399     if (!C || C->getZExtValue() != 8)
4400       return false;
4401   } else { // Opc == ISD::SRL
4402     // (x & 0xff00) >> 8
4403     // (x & 0xff000000) >> 8
4404     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4405       return false;
4406     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4407     if (!C || C->getZExtValue() != 8)
4408       return false;
4409   }
4410
4411   if (Parts[MaskByteOffset])
4412     return false;
4413
4414   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4415   return true;
4416 }
4417
4418 /// Match a 32-bit packed halfword bswap. That is
4419 /// ((x & 0x000000ff) << 8) |
4420 /// ((x & 0x0000ff00) >> 8) |
4421 /// ((x & 0x00ff0000) << 8) |
4422 /// ((x & 0xff000000) >> 8)
4423 /// => (rotl (bswap x), 16)
4424 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4425   if (!LegalOperations)
4426     return SDValue();
4427
4428   EVT VT = N->getValueType(0);
4429   if (VT != MVT::i32)
4430     return SDValue();
4431   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4432     return SDValue();
4433
4434   // Look for either
4435   // (or (or (and), (and)), (or (and), (and)))
4436   // (or (or (or (and), (and)), (and)), (and))
4437   if (N0.getOpcode() != ISD::OR)
4438     return SDValue();
4439   SDValue N00 = N0.getOperand(0);
4440   SDValue N01 = N0.getOperand(1);
4441   SDNode *Parts[4] = {};
4442
4443   if (N1.getOpcode() == ISD::OR &&
4444       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4445     // (or (or (and), (and)), (or (and), (and)))
4446     if (!isBSwapHWordElement(N00, Parts))
4447       return SDValue();
4448
4449     if (!isBSwapHWordElement(N01, Parts))
4450       return SDValue();
4451     SDValue N10 = N1.getOperand(0);
4452     if (!isBSwapHWordElement(N10, Parts))
4453       return SDValue();
4454     SDValue N11 = N1.getOperand(1);
4455     if (!isBSwapHWordElement(N11, Parts))
4456       return SDValue();
4457   } else {
4458     // (or (or (or (and), (and)), (and)), (and))
4459     if (!isBSwapHWordElement(N1, Parts))
4460       return SDValue();
4461     if (!isBSwapHWordElement(N01, Parts))
4462       return SDValue();
4463     if (N00.getOpcode() != ISD::OR)
4464       return SDValue();
4465     SDValue N000 = N00.getOperand(0);
4466     if (!isBSwapHWordElement(N000, Parts))
4467       return SDValue();
4468     SDValue N001 = N00.getOperand(1);
4469     if (!isBSwapHWordElement(N001, Parts))
4470       return SDValue();
4471   }
4472
4473   // Make sure the parts are all coming from the same node.
4474   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4475     return SDValue();
4476
4477   SDLoc DL(N);
4478   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4479                               SDValue(Parts[0], 0));
4480
4481   // Result of the bswap should be rotated by 16. If it's not legal, then
4482   // do  (x << 16) | (x >> 16).
4483   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4484   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4485     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4486   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4487     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4488   return DAG.getNode(ISD::OR, DL, VT,
4489                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4490                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4491 }
4492
4493 /// This contains all DAGCombine rules which reduce two values combined by
4494 /// an Or operation to a single value \see visitANDLike().
4495 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4496   EVT VT = N1.getValueType();
4497   SDLoc DL(N);
4498
4499   // fold (or x, undef) -> -1
4500   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4501     return DAG.getAllOnesConstant(DL, VT);
4502
4503   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4504     return V;
4505
4506   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4507   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4508       // Don't increase # computations.
4509       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4510     // We can only do this xform if we know that bits from X that are set in C2
4511     // but not in C1 are already zero.  Likewise for Y.
4512     if (const ConstantSDNode *N0O1C =
4513         getAsNonOpaqueConstant(N0.getOperand(1))) {
4514       if (const ConstantSDNode *N1O1C =
4515           getAsNonOpaqueConstant(N1.getOperand(1))) {
4516         // We can only do this xform if we know that bits from X that are set in
4517         // C2 but not in C1 are already zero.  Likewise for Y.
4518         const APInt &LHSMask = N0O1C->getAPIntValue();
4519         const APInt &RHSMask = N1O1C->getAPIntValue();
4520
4521         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4522             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4523           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4524                                   N0.getOperand(0), N1.getOperand(0));
4525           return DAG.getNode(ISD::AND, DL, VT, X,
4526                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4527         }
4528       }
4529     }
4530   }
4531
4532   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4533   if (N0.getOpcode() == ISD::AND &&
4534       N1.getOpcode() == ISD::AND &&
4535       N0.getOperand(0) == N1.getOperand(0) &&
4536       // Don't increase # computations.
4537       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4538     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4539                             N0.getOperand(1), N1.getOperand(1));
4540     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4541   }
4542
4543   return SDValue();
4544 }
4545
4546 SDValue DAGCombiner::visitOR(SDNode *N) {
4547   SDValue N0 = N->getOperand(0);
4548   SDValue N1 = N->getOperand(1);
4549   EVT VT = N1.getValueType();
4550
4551   // x | x --> x
4552   if (N0 == N1)
4553     return N0;
4554
4555   // fold vector ops
4556   if (VT.isVector()) {
4557     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4558       return FoldedVOp;
4559
4560     // fold (or x, 0) -> x, vector edition
4561     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4562       return N1;
4563     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4564       return N0;
4565
4566     // fold (or x, -1) -> -1, vector edition
4567     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4568       // do not return N0, because undef node may exist in N0
4569       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4570     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4571       // do not return N1, because undef node may exist in N1
4572       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4573
4574     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4575     // Do this only if the resulting shuffle is legal.
4576     if (isa<ShuffleVectorSDNode>(N0) &&
4577         isa<ShuffleVectorSDNode>(N1) &&
4578         // Avoid folding a node with illegal type.
4579         TLI.isTypeLegal(VT)) {
4580       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4581       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4582       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4583       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4584       // Ensure both shuffles have a zero input.
4585       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4586         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4587         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4588         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4589         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4590         bool CanFold = true;
4591         int NumElts = VT.getVectorNumElements();
4592         SmallVector<int, 4> Mask(NumElts);
4593
4594         for (int i = 0; i != NumElts; ++i) {
4595           int M0 = SV0->getMaskElt(i);
4596           int M1 = SV1->getMaskElt(i);
4597
4598           // Determine if either index is pointing to a zero vector.
4599           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4600           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4601
4602           // If one element is zero and the otherside is undef, keep undef.
4603           // This also handles the case that both are undef.
4604           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4605             Mask[i] = -1;
4606             continue;
4607           }
4608
4609           // Make sure only one of the elements is zero.
4610           if (M0Zero == M1Zero) {
4611             CanFold = false;
4612             break;
4613           }
4614
4615           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4616
4617           // We have a zero and non-zero element. If the non-zero came from
4618           // SV0 make the index a LHS index. If it came from SV1, make it
4619           // a RHS index. We need to mod by NumElts because we don't care
4620           // which operand it came from in the original shuffles.
4621           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4622         }
4623
4624         if (CanFold) {
4625           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4626           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4627
4628           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4629           if (!LegalMask) {
4630             std::swap(NewLHS, NewRHS);
4631             ShuffleVectorSDNode::commuteMask(Mask);
4632             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4633           }
4634
4635           if (LegalMask)
4636             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4637         }
4638       }
4639     }
4640   }
4641
4642   // fold (or c1, c2) -> c1|c2
4643   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4644   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4645   if (N0C && N1C && !N1C->isOpaque())
4646     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4647   // canonicalize constant to RHS
4648   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4649      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4650     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4651   // fold (or x, 0) -> x
4652   if (isNullConstant(N1))
4653     return N0;
4654   // fold (or x, -1) -> -1
4655   if (isAllOnesConstant(N1))
4656     return N1;
4657
4658   if (SDValue NewSel = foldBinOpIntoSelect(N))
4659     return NewSel;
4660
4661   // fold (or x, c) -> c iff (x & ~c) == 0
4662   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4663     return N1;
4664
4665   if (SDValue Combined = visitORLike(N0, N1, N))
4666     return Combined;
4667
4668   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4669   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4670     return BSwap;
4671   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4672     return BSwap;
4673
4674   // reassociate or
4675   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4676     return ROR;
4677
4678   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4679   // iff (c1 & c2) != 0.
4680   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4681     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
4682   };
4683   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4684       matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
4685     if (SDValue COR = DAG.FoldConstantArithmetic(
4686             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
4687       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
4688       AddToWorklist(IOR.getNode());
4689       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
4690     }
4691   }
4692
4693   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4694   if (N0.getOpcode() == N1.getOpcode())
4695     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4696       return Tmp;
4697
4698   // See if this is some rotate idiom.
4699   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4700     return SDValue(Rot, 0);
4701
4702   if (SDValue Load = MatchLoadCombine(N))
4703     return Load;
4704
4705   // Simplify the operands using demanded-bits information.
4706   if (SimplifyDemandedBits(SDValue(N, 0)))
4707     return SDValue(N, 0);
4708
4709   return SDValue();
4710 }
4711
4712 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4713 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4714   if (Op.getOpcode() == ISD::AND) {
4715     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4716       Mask = Op.getOperand(1);
4717       Op = Op.getOperand(0);
4718     } else {
4719       return false;
4720     }
4721   }
4722
4723   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4724     Shift = Op;
4725     return true;
4726   }
4727
4728   return false;
4729 }
4730
4731 // Return true if we can prove that, whenever Neg and Pos are both in the
4732 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4733 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4734 //
4735 //     (or (shift1 X, Neg), (shift2 X, Pos))
4736 //
4737 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4738 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4739 // to consider shift amounts with defined behavior.
4740 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4741   // If EltSize is a power of 2 then:
4742   //
4743   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4744   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4745   //
4746   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4747   // for the stronger condition:
4748   //
4749   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4750   //
4751   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4752   // we can just replace Neg with Neg' for the rest of the function.
4753   //
4754   // In other cases we check for the even stronger condition:
4755   //
4756   //     Neg == EltSize - Pos                                    [B]
4757   //
4758   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4759   // behavior if Pos == 0 (and consequently Neg == EltSize).
4760   //
4761   // We could actually use [A] whenever EltSize is a power of 2, but the
4762   // only extra cases that it would match are those uninteresting ones
4763   // where Neg and Pos are never in range at the same time.  E.g. for
4764   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4765   // as well as (sub 32, Pos), but:
4766   //
4767   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4768   //
4769   // always invokes undefined behavior for 32-bit X.
4770   //
4771   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4772   unsigned MaskLoBits = 0;
4773   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4774     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4775       if (NegC->getAPIntValue() == EltSize - 1) {
4776         Neg = Neg.getOperand(0);
4777         MaskLoBits = Log2_64(EltSize);
4778       }
4779     }
4780   }
4781
4782   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4783   if (Neg.getOpcode() != ISD::SUB)
4784     return false;
4785   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4786   if (!NegC)
4787     return false;
4788   SDValue NegOp1 = Neg.getOperand(1);
4789
4790   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4791   // Pos'.  The truncation is redundant for the purpose of the equality.
4792   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4793     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4794       if (PosC->getAPIntValue() == EltSize - 1)
4795         Pos = Pos.getOperand(0);
4796
4797   // The condition we need is now:
4798   //
4799   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4800   //
4801   // If NegOp1 == Pos then we need:
4802   //
4803   //              EltSize & Mask == NegC & Mask
4804   //
4805   // (because "x & Mask" is a truncation and distributes through subtraction).
4806   APInt Width;
4807   if (Pos == NegOp1)
4808     Width = NegC->getAPIntValue();
4809
4810   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4811   // Then the condition we want to prove becomes:
4812   //
4813   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4814   //
4815   // which, again because "x & Mask" is a truncation, becomes:
4816   //
4817   //                NegC & Mask == (EltSize - PosC) & Mask
4818   //             EltSize & Mask == (NegC + PosC) & Mask
4819   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4820     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4821       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4822     else
4823       return false;
4824   } else
4825     return false;
4826
4827   // Now we just need to check that EltSize & Mask == Width & Mask.
4828   if (MaskLoBits)
4829     // EltSize & Mask is 0 since Mask is EltSize - 1.
4830     return Width.getLoBits(MaskLoBits) == 0;
4831   return Width == EltSize;
4832 }
4833
4834 // A subroutine of MatchRotate used once we have found an OR of two opposite
4835 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4836 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4837 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4838 // Neg with outer conversions stripped away.
4839 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4840                                        SDValue Neg, SDValue InnerPos,
4841                                        SDValue InnerNeg, unsigned PosOpcode,
4842                                        unsigned NegOpcode, const SDLoc &DL) {
4843   // fold (or (shl x, (*ext y)),
4844   //          (srl x, (*ext (sub 32, y)))) ->
4845   //   (rotl x, y) or (rotr x, (sub 32, y))
4846   //
4847   // fold (or (shl x, (*ext (sub 32, y))),
4848   //          (srl x, (*ext y))) ->
4849   //   (rotr x, y) or (rotl x, (sub 32, y))
4850   EVT VT = Shifted.getValueType();
4851   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4852     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4853     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4854                        HasPos ? Pos : Neg).getNode();
4855   }
4856
4857   return nullptr;
4858 }
4859
4860 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4861 // idioms for rotate, and if the target supports rotation instructions, generate
4862 // a rot[lr].
4863 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4864   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4865   EVT VT = LHS.getValueType();
4866   if (!TLI.isTypeLegal(VT)) return nullptr;
4867
4868   // The target must have at least one rotate flavor.
4869   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4870   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4871   if (!HasROTL && !HasROTR) return nullptr;
4872
4873   // Check for truncated rotate.
4874   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
4875       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
4876     assert(LHS.getValueType() == RHS.getValueType());
4877     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
4878       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
4879                          SDValue(Rot, 0)).getNode();
4880     }
4881   }
4882
4883   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4884   SDValue LHSShift;   // The shift.
4885   SDValue LHSMask;    // AND value if any.
4886   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4887     return nullptr; // Not part of a rotate.
4888
4889   SDValue RHSShift;   // The shift.
4890   SDValue RHSMask;    // AND value if any.
4891   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4892     return nullptr; // Not part of a rotate.
4893
4894   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4895     return nullptr;   // Not shifting the same value.
4896
4897   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4898     return nullptr;   // Shifts must disagree.
4899
4900   // Canonicalize shl to left side in a shl/srl pair.
4901   if (RHSShift.getOpcode() == ISD::SHL) {
4902     std::swap(LHS, RHS);
4903     std::swap(LHSShift, RHSShift);
4904     std::swap(LHSMask, RHSMask);
4905   }
4906
4907   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4908   SDValue LHSShiftArg = LHSShift.getOperand(0);
4909   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4910   SDValue RHSShiftArg = RHSShift.getOperand(0);
4911   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4912
4913   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4914   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4915   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4916                                         ConstantSDNode *RHS) {
4917     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4918   };
4919   if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
4920     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4921                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4922
4923     // If there is an AND of either shifted operand, apply it to the result.
4924     if (LHSMask.getNode() || RHSMask.getNode()) {
4925       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4926       SDValue Mask = AllOnes;
4927
4928       if (LHSMask.getNode()) {
4929         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4930         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4931                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4932       }
4933       if (RHSMask.getNode()) {
4934         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4935         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4936                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4937       }
4938
4939       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4940     }
4941
4942     return Rot.getNode();
4943   }
4944
4945   // If there is a mask here, and we have a variable shift, we can't be sure
4946   // that we're masking out the right stuff.
4947   if (LHSMask.getNode() || RHSMask.getNode())
4948     return nullptr;
4949
4950   // If the shift amount is sign/zext/any-extended just peel it off.
4951   SDValue LExtOp0 = LHSShiftAmt;
4952   SDValue RExtOp0 = RHSShiftAmt;
4953   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4954        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4955        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4956        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4957       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4958        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4959        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4960        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4961     LExtOp0 = LHSShiftAmt.getOperand(0);
4962     RExtOp0 = RHSShiftAmt.getOperand(0);
4963   }
4964
4965   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4966                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4967   if (TryL)
4968     return TryL;
4969
4970   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4971                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4972   if (TryR)
4973     return TryR;
4974
4975   return nullptr;
4976 }
4977
4978 namespace {
4979
4980 /// Represents known origin of an individual byte in load combine pattern. The
4981 /// value of the byte is either constant zero or comes from memory.
4982 struct ByteProvider {
4983   // For constant zero providers Load is set to nullptr. For memory providers
4984   // Load represents the node which loads the byte from memory.
4985   // ByteOffset is the offset of the byte in the value produced by the load.
4986   LoadSDNode *Load = nullptr;
4987   unsigned ByteOffset = 0;
4988
4989   ByteProvider() = default;
4990
4991   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4992     return ByteProvider(Load, ByteOffset);
4993   }
4994
4995   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4996
4997   bool isConstantZero() const { return !Load; }
4998   bool isMemory() const { return Load; }
4999
5000   bool operator==(const ByteProvider &Other) const {
5001     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5002   }
5003
5004 private:
5005   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5006       : Load(Load), ByteOffset(ByteOffset) {}
5007 };
5008
5009 } // end anonymous namespace
5010
5011 /// Recursively traverses the expression calculating the origin of the requested
5012 /// byte of the given value. Returns None if the provider can't be calculated.
5013 ///
5014 /// For all the values except the root of the expression verifies that the value
5015 /// has exactly one use and if it's not true return None. This way if the origin
5016 /// of the byte is returned it's guaranteed that the values which contribute to
5017 /// the byte are not used outside of this expression.
5018 ///
5019 /// Because the parts of the expression are not allowed to have more than one
5020 /// use this function iterates over trees, not DAGs. So it never visits the same
5021 /// node more than once.
5022 static const Optional<ByteProvider>
5023 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5024                       bool Root = false) {
5025   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5026   if (Depth == 10)
5027     return None;
5028
5029   if (!Root && !Op.hasOneUse())
5030     return None;
5031
5032   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5033   unsigned BitWidth = Op.getValueSizeInBits();
5034   if (BitWidth % 8 != 0)
5035     return None;
5036   unsigned ByteWidth = BitWidth / 8;
5037   assert(Index < ByteWidth && "invalid index requested");
5038   (void) ByteWidth;
5039
5040   switch (Op.getOpcode()) {
5041   case ISD::OR: {
5042     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5043     if (!LHS)
5044       return None;
5045     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5046     if (!RHS)
5047       return None;
5048
5049     if (LHS->isConstantZero())
5050       return RHS;
5051     if (RHS->isConstantZero())
5052       return LHS;
5053     return None;
5054   }
5055   case ISD::SHL: {
5056     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5057     if (!ShiftOp)
5058       return None;
5059
5060     uint64_t BitShift = ShiftOp->getZExtValue();
5061     if (BitShift % 8 != 0)
5062       return None;
5063     uint64_t ByteShift = BitShift / 8;
5064
5065     return Index < ByteShift
5066                ? ByteProvider::getConstantZero()
5067                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5068                                        Depth + 1);
5069   }
5070   case ISD::ANY_EXTEND:
5071   case ISD::SIGN_EXTEND:
5072   case ISD::ZERO_EXTEND: {
5073     SDValue NarrowOp = Op->getOperand(0);
5074     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5075     if (NarrowBitWidth % 8 != 0)
5076       return None;
5077     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5078
5079     if (Index >= NarrowByteWidth)
5080       return Op.getOpcode() == ISD::ZERO_EXTEND
5081                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5082                  : None;
5083     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5084   }
5085   case ISD::BSWAP:
5086     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5087                                  Depth + 1);
5088   case ISD::LOAD: {
5089     auto L = cast<LoadSDNode>(Op.getNode());
5090     if (L->isVolatile() || L->isIndexed())
5091       return None;
5092
5093     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5094     if (NarrowBitWidth % 8 != 0)
5095       return None;
5096     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5097
5098     if (Index >= NarrowByteWidth)
5099       return L->getExtensionType() == ISD::ZEXTLOAD
5100                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5101                  : None;
5102     return ByteProvider::getMemory(L, Index);
5103   }
5104   }
5105
5106   return None;
5107 }
5108
5109 /// Match a pattern where a wide type scalar value is loaded by several narrow
5110 /// loads and combined by shifts and ors. Fold it into a single load or a load
5111 /// and a BSWAP if the targets supports it.
5112 ///
5113 /// Assuming little endian target:
5114 ///  i8 *a = ...
5115 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5116 /// =>
5117 ///  i32 val = *((i32)a)
5118 ///
5119 ///  i8 *a = ...
5120 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5121 /// =>
5122 ///  i32 val = BSWAP(*((i32)a))
5123 ///
5124 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5125 /// interact well with the worklist mechanism. When a part of the pattern is
5126 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5127 /// but the root node of the pattern which triggers the load combine is not
5128 /// necessarily a direct user of the changed node. For example, once the address
5129 /// of t28 load is reassociated load combine won't be triggered:
5130 ///             t25: i32 = add t4, Constant:i32<2>
5131 ///           t26: i64 = sign_extend t25
5132 ///        t27: i64 = add t2, t26
5133 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5134 ///     t29: i32 = zero_extend t28
5135 ///   t32: i32 = shl t29, Constant:i8<8>
5136 /// t33: i32 = or t23, t32
5137 /// As a possible fix visitLoad can check if the load can be a part of a load
5138 /// combine pattern and add corresponding OR roots to the worklist.
5139 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5140   assert(N->getOpcode() == ISD::OR &&
5141          "Can only match load combining against OR nodes");
5142
5143   // Handles simple types only
5144   EVT VT = N->getValueType(0);
5145   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5146     return SDValue();
5147   unsigned ByteWidth = VT.getSizeInBits() / 8;
5148
5149   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5150   // Before legalize we can introduce too wide illegal loads which will be later
5151   // split into legal sized loads. This enables us to combine i64 load by i8
5152   // patterns to a couple of i32 loads on 32 bit targets.
5153   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5154     return SDValue();
5155
5156   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5157     unsigned BW, unsigned i) { return i; };
5158   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5159     unsigned BW, unsigned i) { return BW - i - 1; };
5160
5161   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5162   auto MemoryByteOffset = [&] (ByteProvider P) {
5163     assert(P.isMemory() && "Must be a memory byte provider");
5164     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5165     assert(LoadBitWidth % 8 == 0 &&
5166            "can only analyze providers for individual bytes not bit");
5167     unsigned LoadByteWidth = LoadBitWidth / 8;
5168     return IsBigEndianTarget
5169             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5170             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5171   };
5172
5173   Optional<BaseIndexOffset> Base;
5174   SDValue Chain;
5175
5176   SmallSet<LoadSDNode *, 8> Loads;
5177   Optional<ByteProvider> FirstByteProvider;
5178   int64_t FirstOffset = INT64_MAX;
5179
5180   // Check if all the bytes of the OR we are looking at are loaded from the same
5181   // base address. Collect bytes offsets from Base address in ByteOffsets.
5182   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5183   for (unsigned i = 0; i < ByteWidth; i++) {
5184     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5185     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5186       return SDValue();
5187
5188     LoadSDNode *L = P->Load;
5189     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5190            "Must be enforced by calculateByteProvider");
5191     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5192
5193     // All loads must share the same chain
5194     SDValue LChain = L->getChain();
5195     if (!Chain)
5196       Chain = LChain;
5197     else if (Chain != LChain)
5198       return SDValue();
5199
5200     // Loads must share the same base address
5201     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
5202     int64_t ByteOffsetFromBase = 0;
5203     if (!Base)
5204       Base = Ptr;
5205     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5206       return SDValue();
5207
5208     // Calculate the offset of the current byte from the base address
5209     ByteOffsetFromBase += MemoryByteOffset(*P);
5210     ByteOffsets[i] = ByteOffsetFromBase;
5211
5212     // Remember the first byte load
5213     if (ByteOffsetFromBase < FirstOffset) {
5214       FirstByteProvider = P;
5215       FirstOffset = ByteOffsetFromBase;
5216     }
5217
5218     Loads.insert(L);
5219   }
5220   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5221          "memory, so there must be at least one load which produces the value");
5222   assert(Base && "Base address of the accessed memory location must be set");
5223   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5224
5225   // Check if the bytes of the OR we are looking at match with either big or
5226   // little endian value load
5227   bool BigEndian = true, LittleEndian = true;
5228   for (unsigned i = 0; i < ByteWidth; i++) {
5229     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5230     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5231     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5232     if (!BigEndian && !LittleEndian)
5233       return SDValue();
5234   }
5235   assert((BigEndian != LittleEndian) && "should be either or");
5236   assert(FirstByteProvider && "must be set");
5237
5238   // Ensure that the first byte is loaded from zero offset of the first load.
5239   // So the combined value can be loaded from the first load address.
5240   if (MemoryByteOffset(*FirstByteProvider) != 0)
5241     return SDValue();
5242   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5243
5244   // The node we are looking at matches with the pattern, check if we can
5245   // replace it with a single load and bswap if needed.
5246
5247   // If the load needs byte swap check if the target supports it
5248   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5249
5250   // Before legalize we can introduce illegal bswaps which will be later
5251   // converted to an explicit bswap sequence. This way we end up with a single
5252   // load and byte shuffling instead of several loads and byte shuffling.
5253   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5254     return SDValue();
5255
5256   // Check that a load of the wide type is both allowed and fast on the target
5257   bool Fast = false;
5258   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5259                                         VT, FirstLoad->getAddressSpace(),
5260                                         FirstLoad->getAlignment(), &Fast);
5261   if (!Allowed || !Fast)
5262     return SDValue();
5263
5264   SDValue NewLoad =
5265       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5266                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5267
5268   // Transfer chain users from old loads to the new load.
5269   for (LoadSDNode *L : Loads)
5270     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5271
5272   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5273 }
5274
5275 SDValue DAGCombiner::visitXOR(SDNode *N) {
5276   SDValue N0 = N->getOperand(0);
5277   SDValue N1 = N->getOperand(1);
5278   EVT VT = N0.getValueType();
5279
5280   // fold vector ops
5281   if (VT.isVector()) {
5282     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5283       return FoldedVOp;
5284
5285     // fold (xor x, 0) -> x, vector edition
5286     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5287       return N1;
5288     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5289       return N0;
5290   }
5291
5292   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5293   if (N0.isUndef() && N1.isUndef())
5294     return DAG.getConstant(0, SDLoc(N), VT);
5295   // fold (xor x, undef) -> undef
5296   if (N0.isUndef())
5297     return N0;
5298   if (N1.isUndef())
5299     return N1;
5300   // fold (xor c1, c2) -> c1^c2
5301   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5302   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5303   if (N0C && N1C)
5304     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5305   // canonicalize constant to RHS
5306   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5307      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5308     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5309   // fold (xor x, 0) -> x
5310   if (isNullConstant(N1))
5311     return N0;
5312
5313   if (SDValue NewSel = foldBinOpIntoSelect(N))
5314     return NewSel;
5315
5316   // reassociate xor
5317   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5318     return RXOR;
5319
5320   // fold !(x cc y) -> (x !cc y)
5321   SDValue LHS, RHS, CC;
5322   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5323     bool isInt = LHS.getValueType().isInteger();
5324     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5325                                                isInt);
5326
5327     if (!LegalOperations ||
5328         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5329       switch (N0.getOpcode()) {
5330       default:
5331         llvm_unreachable("Unhandled SetCC Equivalent!");
5332       case ISD::SETCC:
5333         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5334       case ISD::SELECT_CC:
5335         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5336                                N0.getOperand(3), NotCC);
5337       }
5338     }
5339   }
5340
5341   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5342   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5343       N0.getNode()->hasOneUse() &&
5344       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5345     SDValue V = N0.getOperand(0);
5346     SDLoc DL(N0);
5347     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5348                     DAG.getConstant(1, DL, V.getValueType()));
5349     AddToWorklist(V.getNode());
5350     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5351   }
5352
5353   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5354   if (isOneConstant(N1) && VT == MVT::i1 &&
5355       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5356     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5357     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5358       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5359       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5360       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5361       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5362       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5363     }
5364   }
5365   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5366   if (isAllOnesConstant(N1) &&
5367       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5368     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5369     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5370       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5371       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5372       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5373       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5374       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5375     }
5376   }
5377   // fold (xor (and x, y), y) -> (and (not x), y)
5378   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5379       N0->getOperand(1) == N1) {
5380     SDValue X = N0->getOperand(0);
5381     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5382     AddToWorklist(NotX.getNode());
5383     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5384   }
5385
5386   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5387   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5388   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5389       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5390       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5391     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5392       if (C->getAPIntValue() == (OpSizeInBits - 1))
5393         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5394   }
5395
5396   // fold (xor x, x) -> 0
5397   if (N0 == N1)
5398     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5399
5400   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5401   // Here is a concrete example of this equivalence:
5402   // i16   x ==  14
5403   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5404   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5405   //
5406   // =>
5407   //
5408   // i16     ~1      == 0b1111111111111110
5409   // i16 rol(~1, 14) == 0b1011111111111111
5410   //
5411   // Some additional tips to help conceptualize this transform:
5412   // - Try to see the operation as placing a single zero in a value of all ones.
5413   // - There exists no value for x which would allow the result to contain zero.
5414   // - Values of x larger than the bitwidth are undefined and do not require a
5415   //   consistent result.
5416   // - Pushing the zero left requires shifting one bits in from the right.
5417   // A rotate left of ~1 is a nice way of achieving the desired result.
5418   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5419       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5420     SDLoc DL(N);
5421     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5422                        N0.getOperand(1));
5423   }
5424
5425   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5426   if (N0.getOpcode() == N1.getOpcode())
5427     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5428       return Tmp;
5429
5430   // Simplify the expression using non-local knowledge.
5431   if (SimplifyDemandedBits(SDValue(N, 0)))
5432     return SDValue(N, 0);
5433
5434   return SDValue();
5435 }
5436
5437 /// Handle transforms common to the three shifts, when the shift amount is a
5438 /// constant.
5439 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5440   SDNode *LHS = N->getOperand(0).getNode();
5441   if (!LHS->hasOneUse()) return SDValue();
5442
5443   // We want to pull some binops through shifts, so that we have (and (shift))
5444   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5445   // thing happens with address calculations, so it's important to canonicalize
5446   // it.
5447   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5448
5449   switch (LHS->getOpcode()) {
5450   default: return SDValue();
5451   case ISD::OR:
5452   case ISD::XOR:
5453     HighBitSet = false; // We can only transform sra if the high bit is clear.
5454     break;
5455   case ISD::AND:
5456     HighBitSet = true;  // We can only transform sra if the high bit is set.
5457     break;
5458   case ISD::ADD:
5459     if (N->getOpcode() != ISD::SHL)
5460       return SDValue(); // only shl(add) not sr[al](add).
5461     HighBitSet = false; // We can only transform sra if the high bit is clear.
5462     break;
5463   }
5464
5465   // We require the RHS of the binop to be a constant and not opaque as well.
5466   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5467   if (!BinOpCst) return SDValue();
5468
5469   // FIXME: disable this unless the input to the binop is a shift by a constant
5470   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5471   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5472   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5473                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5474                  BinOpLHSVal->getOpcode() == ISD::SRL;
5475   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5476                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5477
5478   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5479       !isCopyOrSelect)
5480     return SDValue();
5481
5482   if (isCopyOrSelect && N->hasOneUse())
5483     return SDValue();
5484
5485   EVT VT = N->getValueType(0);
5486
5487   // If this is a signed shift right, and the high bit is modified by the
5488   // logical operation, do not perform the transformation. The highBitSet
5489   // boolean indicates the value of the high bit of the constant which would
5490   // cause it to be modified for this operation.
5491   if (N->getOpcode() == ISD::SRA) {
5492     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5493     if (BinOpRHSSignSet != HighBitSet)
5494       return SDValue();
5495   }
5496
5497   if (!TLI.isDesirableToCommuteWithShift(LHS))
5498     return SDValue();
5499
5500   // Fold the constants, shifting the binop RHS by the shift amount.
5501   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5502                                N->getValueType(0),
5503                                LHS->getOperand(1), N->getOperand(1));
5504   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5505
5506   // Create the new shift.
5507   SDValue NewShift = DAG.getNode(N->getOpcode(),
5508                                  SDLoc(LHS->getOperand(0)),
5509                                  VT, LHS->getOperand(0), N->getOperand(1));
5510
5511   // Create the new binop.
5512   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5513 }
5514
5515 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5516   assert(N->getOpcode() == ISD::TRUNCATE);
5517   assert(N->getOperand(0).getOpcode() == ISD::AND);
5518
5519   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5520   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5521     SDValue N01 = N->getOperand(0).getOperand(1);
5522     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5523       SDLoc DL(N);
5524       EVT TruncVT = N->getValueType(0);
5525       SDValue N00 = N->getOperand(0).getOperand(0);
5526       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5527       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5528       AddToWorklist(Trunc00.getNode());
5529       AddToWorklist(Trunc01.getNode());
5530       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5531     }
5532   }
5533
5534   return SDValue();
5535 }
5536
5537 SDValue DAGCombiner::visitRotate(SDNode *N) {
5538   SDLoc dl(N);
5539   SDValue N0 = N->getOperand(0);
5540   SDValue N1 = N->getOperand(1);
5541   EVT VT = N->getValueType(0);
5542   unsigned Bitsize = VT.getScalarSizeInBits();
5543
5544   // fold (rot x, 0) -> x
5545   if (isNullConstantOrNullSplatConstant(N1))
5546     return N0;
5547
5548   // fold (rot x, c) -> (rot x, c % BitSize)
5549   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
5550     if (Cst->getAPIntValue().uge(Bitsize)) {
5551       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5552       return DAG.getNode(N->getOpcode(), dl, VT, N0,
5553                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
5554     }
5555   }
5556
5557   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5558   if (N1.getOpcode() == ISD::TRUNCATE &&
5559       N1.getOperand(0).getOpcode() == ISD::AND) {
5560     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5561       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5562   }
5563
5564   unsigned NextOp = N0.getOpcode();
5565   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5566   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
5567     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5568     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5569     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
5570       EVT ShiftVT = C1->getValueType(0);
5571       bool SameSide = (N->getOpcode() == NextOp);
5572       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5573       if (SDValue CombinedShift =
5574               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5575         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5576         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5577             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5578             BitsizeC.getNode());
5579         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5580                            CombinedShiftNorm);
5581       }
5582     }
5583   }
5584   return SDValue();
5585 }
5586
5587 SDValue DAGCombiner::visitSHL(SDNode *N) {
5588   SDValue N0 = N->getOperand(0);
5589   SDValue N1 = N->getOperand(1);
5590   EVT VT = N0.getValueType();
5591   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5592
5593   // fold vector ops
5594   if (VT.isVector()) {
5595     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5596       return FoldedVOp;
5597
5598     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5599     // If setcc produces all-one true value then:
5600     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5601     if (N1CV && N1CV->isConstant()) {
5602       if (N0.getOpcode() == ISD::AND) {
5603         SDValue N00 = N0->getOperand(0);
5604         SDValue N01 = N0->getOperand(1);
5605         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5606
5607         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5608             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5609                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5610           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5611                                                      N01CV, N1CV))
5612             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5613         }
5614       }
5615     }
5616   }
5617
5618   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5619
5620   // fold (shl c1, c2) -> c1<<c2
5621   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5622   if (N0C && N1C && !N1C->isOpaque())
5623     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5624   // fold (shl 0, x) -> 0
5625   if (isNullConstantOrNullSplatConstant(N0))
5626     return N0;
5627   // fold (shl x, c >= size(x)) -> undef
5628   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5629   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5630     return Val->getAPIntValue().uge(OpSizeInBits);
5631   };
5632   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5633     return DAG.getUNDEF(VT);
5634   // fold (shl x, 0) -> x
5635   if (N1C && N1C->isNullValue())
5636     return N0;
5637   // fold (shl undef, x) -> 0
5638   if (N0.isUndef())
5639     return DAG.getConstant(0, SDLoc(N), VT);
5640
5641   if (SDValue NewSel = foldBinOpIntoSelect(N))
5642     return NewSel;
5643
5644   // if (shl x, c) is known to be zero, return 0
5645   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5646                             APInt::getAllOnesValue(OpSizeInBits)))
5647     return DAG.getConstant(0, SDLoc(N), VT);
5648   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5649   if (N1.getOpcode() == ISD::TRUNCATE &&
5650       N1.getOperand(0).getOpcode() == ISD::AND) {
5651     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5652       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5653   }
5654
5655   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5656     return SDValue(N, 0);
5657
5658   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5659   if (N0.getOpcode() == ISD::SHL) {
5660     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5661                                           ConstantSDNode *RHS) {
5662       APInt c1 = LHS->getAPIntValue();
5663       APInt c2 = RHS->getAPIntValue();
5664       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5665       return (c1 + c2).uge(OpSizeInBits);
5666     };
5667     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5668       return DAG.getConstant(0, SDLoc(N), VT);
5669
5670     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5671                                        ConstantSDNode *RHS) {
5672       APInt c1 = LHS->getAPIntValue();
5673       APInt c2 = RHS->getAPIntValue();
5674       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5675       return (c1 + c2).ult(OpSizeInBits);
5676     };
5677     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5678       SDLoc DL(N);
5679       EVT ShiftVT = N1.getValueType();
5680       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5681       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
5682     }
5683   }
5684
5685   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5686   // For this to be valid, the second form must not preserve any of the bits
5687   // that are shifted out by the inner shift in the first form.  This means
5688   // the outer shift size must be >= the number of bits added by the ext.
5689   // As a corollary, we don't care what kind of ext it is.
5690   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5691               N0.getOpcode() == ISD::ANY_EXTEND ||
5692               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5693       N0.getOperand(0).getOpcode() == ISD::SHL) {
5694     SDValue N0Op0 = N0.getOperand(0);
5695     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5696       APInt c1 = N0Op0C1->getAPIntValue();
5697       APInt c2 = N1C->getAPIntValue();
5698       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5699
5700       EVT InnerShiftVT = N0Op0.getValueType();
5701       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5702       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5703         SDLoc DL(N0);
5704         APInt Sum = c1 + c2;
5705         if (Sum.uge(OpSizeInBits))
5706           return DAG.getConstant(0, DL, VT);
5707
5708         return DAG.getNode(
5709             ISD::SHL, DL, VT,
5710             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5711             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5712       }
5713     }
5714   }
5715
5716   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5717   // Only fold this if the inner zext has no other uses to avoid increasing
5718   // the total number of instructions.
5719   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5720       N0.getOperand(0).getOpcode() == ISD::SRL) {
5721     SDValue N0Op0 = N0.getOperand(0);
5722     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5723       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5724         uint64_t c1 = N0Op0C1->getZExtValue();
5725         uint64_t c2 = N1C->getZExtValue();
5726         if (c1 == c2) {
5727           SDValue NewOp0 = N0.getOperand(0);
5728           EVT CountVT = NewOp0.getOperand(1).getValueType();
5729           SDLoc DL(N);
5730           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5731                                        NewOp0,
5732                                        DAG.getConstant(c2, DL, CountVT));
5733           AddToWorklist(NewSHL.getNode());
5734           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5735         }
5736       }
5737     }
5738   }
5739
5740   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5741   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5742   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5743       N0->getFlags().hasExact()) {
5744     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5745       uint64_t C1 = N0C1->getZExtValue();
5746       uint64_t C2 = N1C->getZExtValue();
5747       SDLoc DL(N);
5748       if (C1 <= C2)
5749         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5750                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5751       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5752                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5753     }
5754   }
5755
5756   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5757   //                               (and (srl x, (sub c1, c2), MASK)
5758   // Only fold this if the inner shift has no other uses -- if it does, folding
5759   // this will increase the total number of instructions.
5760   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5761     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5762       uint64_t c1 = N0C1->getZExtValue();
5763       if (c1 < OpSizeInBits) {
5764         uint64_t c2 = N1C->getZExtValue();
5765         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5766         SDValue Shift;
5767         if (c2 > c1) {
5768           Mask <<= c2 - c1;
5769           SDLoc DL(N);
5770           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5771                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5772         } else {
5773           Mask.lshrInPlace(c1 - c2);
5774           SDLoc DL(N);
5775           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5776                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5777         }
5778         SDLoc DL(N0);
5779         return DAG.getNode(ISD::AND, DL, VT, Shift,
5780                            DAG.getConstant(Mask, DL, VT));
5781       }
5782     }
5783   }
5784
5785   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5786   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5787       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5788     SDLoc DL(N);
5789     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5790     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5791     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5792   }
5793
5794   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5795   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
5796   // Variant of version done on multiply, except mul by a power of 2 is turned
5797   // into a shift.
5798   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
5799       N0.getNode()->hasOneUse() &&
5800       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5801       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5802     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5803     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5804     AddToWorklist(Shl0.getNode());
5805     AddToWorklist(Shl1.getNode());
5806     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
5807   }
5808
5809   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5810   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5811       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5812       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5813     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5814     if (isConstantOrConstantVector(Shl))
5815       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5816   }
5817
5818   if (N1C && !N1C->isOpaque())
5819     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5820       return NewSHL;
5821
5822   return SDValue();
5823 }
5824
5825 SDValue DAGCombiner::visitSRA(SDNode *N) {
5826   SDValue N0 = N->getOperand(0);
5827   SDValue N1 = N->getOperand(1);
5828   EVT VT = N0.getValueType();
5829   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5830
5831   // Arithmetic shifting an all-sign-bit value is a no-op.
5832   // fold (sra 0, x) -> 0
5833   // fold (sra -1, x) -> -1
5834   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5835     return N0;
5836
5837   // fold vector ops
5838   if (VT.isVector())
5839     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5840       return FoldedVOp;
5841
5842   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5843
5844   // fold (sra c1, c2) -> (sra c1, c2)
5845   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5846   if (N0C && N1C && !N1C->isOpaque())
5847     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5848   // fold (sra x, c >= size(x)) -> undef
5849   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5850   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5851     return Val->getAPIntValue().uge(OpSizeInBits);
5852   };
5853   if (matchUnaryPredicate(N1, MatchShiftTooBig))
5854     return DAG.getUNDEF(VT);
5855   // fold (sra x, 0) -> x
5856   if (N1C && N1C->isNullValue())
5857     return N0;
5858
5859   if (SDValue NewSel = foldBinOpIntoSelect(N))
5860     return NewSel;
5861
5862   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5863   // sext_inreg.
5864   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5865     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5866     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5867     if (VT.isVector())
5868       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5869                                ExtVT, VT.getVectorNumElements());
5870     if ((!LegalOperations ||
5871          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5872       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5873                          N0.getOperand(0), DAG.getValueType(ExtVT));
5874   }
5875
5876   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5877   if (N0.getOpcode() == ISD::SRA) {
5878     SDLoc DL(N);
5879     EVT ShiftVT = N1.getValueType();
5880
5881     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5882                                           ConstantSDNode *RHS) {
5883       APInt c1 = LHS->getAPIntValue();
5884       APInt c2 = RHS->getAPIntValue();
5885       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5886       return (c1 + c2).uge(OpSizeInBits);
5887     };
5888     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5889       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
5890                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
5891
5892     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5893                                        ConstantSDNode *RHS) {
5894       APInt c1 = LHS->getAPIntValue();
5895       APInt c2 = RHS->getAPIntValue();
5896       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5897       return (c1 + c2).ult(OpSizeInBits);
5898     };
5899     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5900       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5901       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
5902     }
5903   }
5904
5905   // fold (sra (shl X, m), (sub result_size, n))
5906   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5907   // result_size - n != m.
5908   // If truncate is free for the target sext(shl) is likely to result in better
5909   // code.
5910   if (N0.getOpcode() == ISD::SHL && N1C) {
5911     // Get the two constanst of the shifts, CN0 = m, CN = n.
5912     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5913     if (N01C) {
5914       LLVMContext &Ctx = *DAG.getContext();
5915       // Determine what the truncate's result bitsize and type would be.
5916       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5917
5918       if (VT.isVector())
5919         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5920
5921       // Determine the residual right-shift amount.
5922       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5923
5924       // If the shift is not a no-op (in which case this should be just a sign
5925       // extend already), the truncated to type is legal, sign_extend is legal
5926       // on that type, and the truncate to that type is both legal and free,
5927       // perform the transform.
5928       if ((ShiftAmt > 0) &&
5929           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5930           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5931           TLI.isTruncateFree(VT, TruncVT)) {
5932         SDLoc DL(N);
5933         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5934             getShiftAmountTy(N0.getOperand(0).getValueType()));
5935         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5936                                     N0.getOperand(0), Amt);
5937         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5938                                     Shift);
5939         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5940                            N->getValueType(0), Trunc);
5941       }
5942     }
5943   }
5944
5945   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5946   if (N1.getOpcode() == ISD::TRUNCATE &&
5947       N1.getOperand(0).getOpcode() == ISD::AND) {
5948     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5949       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5950   }
5951
5952   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5953   //      if c1 is equal to the number of bits the trunc removes
5954   if (N0.getOpcode() == ISD::TRUNCATE &&
5955       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5956        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5957       N0.getOperand(0).hasOneUse() &&
5958       N0.getOperand(0).getOperand(1).hasOneUse() &&
5959       N1C) {
5960     SDValue N0Op0 = N0.getOperand(0);
5961     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5962       unsigned LargeShiftVal = LargeShift->getZExtValue();
5963       EVT LargeVT = N0Op0.getValueType();
5964
5965       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5966         SDLoc DL(N);
5967         SDValue Amt =
5968           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5969                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5970         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5971                                   N0Op0.getOperand(0), Amt);
5972         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5973       }
5974     }
5975   }
5976
5977   // Simplify, based on bits shifted out of the LHS.
5978   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5979     return SDValue(N, 0);
5980
5981   // If the sign bit is known to be zero, switch this to a SRL.
5982   if (DAG.SignBitIsZero(N0))
5983     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5984
5985   if (N1C && !N1C->isOpaque())
5986     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5987       return NewSRA;
5988
5989   return SDValue();
5990 }
5991
5992 SDValue DAGCombiner::visitSRL(SDNode *N) {
5993   SDValue N0 = N->getOperand(0);
5994   SDValue N1 = N->getOperand(1);
5995   EVT VT = N0.getValueType();
5996   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5997
5998   // fold vector ops
5999   if (VT.isVector())
6000     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6001       return FoldedVOp;
6002
6003   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6004
6005   // fold (srl c1, c2) -> c1 >>u c2
6006   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6007   if (N0C && N1C && !N1C->isOpaque())
6008     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6009   // fold (srl 0, x) -> 0
6010   if (isNullConstantOrNullSplatConstant(N0))
6011     return N0;
6012   // fold (srl x, c >= size(x)) -> undef
6013   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6014   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6015     return Val->getAPIntValue().uge(OpSizeInBits);
6016   };
6017   if (matchUnaryPredicate(N1, MatchShiftTooBig))
6018     return DAG.getUNDEF(VT);
6019   // fold (srl x, 0) -> x
6020   if (N1C && N1C->isNullValue())
6021     return N0;
6022
6023   if (SDValue NewSel = foldBinOpIntoSelect(N))
6024     return NewSel;
6025
6026   // if (srl x, c) is known to be zero, return 0
6027   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6028                                    APInt::getAllOnesValue(OpSizeInBits)))
6029     return DAG.getConstant(0, SDLoc(N), VT);
6030
6031   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6032   if (N0.getOpcode() == ISD::SRL) {
6033     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6034                                           ConstantSDNode *RHS) {
6035       APInt c1 = LHS->getAPIntValue();
6036       APInt c2 = RHS->getAPIntValue();
6037       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6038       return (c1 + c2).uge(OpSizeInBits);
6039     };
6040     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6041       return DAG.getConstant(0, SDLoc(N), VT);
6042
6043     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6044                                        ConstantSDNode *RHS) {
6045       APInt c1 = LHS->getAPIntValue();
6046       APInt c2 = RHS->getAPIntValue();
6047       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6048       return (c1 + c2).ult(OpSizeInBits);
6049     };
6050     if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6051       SDLoc DL(N);
6052       EVT ShiftVT = N1.getValueType();
6053       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6054       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6055     }
6056   }
6057
6058   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6059   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6060       N0.getOperand(0).getOpcode() == ISD::SRL) {
6061     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6062       uint64_t c1 = N001C->getZExtValue();
6063       uint64_t c2 = N1C->getZExtValue();
6064       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6065       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6066       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6067       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6068       if (c1 + OpSizeInBits == InnerShiftSize) {
6069         SDLoc DL(N0);
6070         if (c1 + c2 >= InnerShiftSize)
6071           return DAG.getConstant(0, DL, VT);
6072         return DAG.getNode(ISD::TRUNCATE, DL, VT,
6073                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6074                                        N0.getOperand(0).getOperand(0),
6075                                        DAG.getConstant(c1 + c2, DL,
6076                                                        ShiftCountVT)));
6077       }
6078     }
6079   }
6080
6081   // fold (srl (shl x, c), c) -> (and x, cst2)
6082   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6083       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6084     SDLoc DL(N);
6085     SDValue Mask =
6086         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6087     AddToWorklist(Mask.getNode());
6088     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6089   }
6090
6091   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6092   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6093     // Shifting in all undef bits?
6094     EVT SmallVT = N0.getOperand(0).getValueType();
6095     unsigned BitSize = SmallVT.getScalarSizeInBits();
6096     if (N1C->getZExtValue() >= BitSize)
6097       return DAG.getUNDEF(VT);
6098
6099     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6100       uint64_t ShiftAmt = N1C->getZExtValue();
6101       SDLoc DL0(N0);
6102       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6103                                        N0.getOperand(0),
6104                           DAG.getConstant(ShiftAmt, DL0,
6105                                           getShiftAmountTy(SmallVT)));
6106       AddToWorklist(SmallShift.getNode());
6107       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6108       SDLoc DL(N);
6109       return DAG.getNode(ISD::AND, DL, VT,
6110                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6111                          DAG.getConstant(Mask, DL, VT));
6112     }
6113   }
6114
6115   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
6116   // bit, which is unmodified by sra.
6117   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6118     if (N0.getOpcode() == ISD::SRA)
6119       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6120   }
6121
6122   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
6123   if (N1C && N0.getOpcode() == ISD::CTLZ &&
6124       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6125     KnownBits Known;
6126     DAG.computeKnownBits(N0.getOperand(0), Known);
6127
6128     // If any of the input bits are KnownOne, then the input couldn't be all
6129     // zeros, thus the result of the srl will always be zero.
6130     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6131
6132     // If all of the bits input the to ctlz node are known to be zero, then
6133     // the result of the ctlz is "32" and the result of the shift is one.
6134     APInt UnknownBits = ~Known.Zero;
6135     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6136
6137     // Otherwise, check to see if there is exactly one bit input to the ctlz.
6138     if (UnknownBits.isPowerOf2()) {
6139       // Okay, we know that only that the single bit specified by UnknownBits
6140       // could be set on input to the CTLZ node. If this bit is set, the SRL
6141       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6142       // to an SRL/XOR pair, which is likely to simplify more.
6143       unsigned ShAmt = UnknownBits.countTrailingZeros();
6144       SDValue Op = N0.getOperand(0);
6145
6146       if (ShAmt) {
6147         SDLoc DL(N0);
6148         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6149                   DAG.getConstant(ShAmt, DL,
6150                                   getShiftAmountTy(Op.getValueType())));
6151         AddToWorklist(Op.getNode());
6152       }
6153
6154       SDLoc DL(N);
6155       return DAG.getNode(ISD::XOR, DL, VT,
6156                          Op, DAG.getConstant(1, DL, VT));
6157     }
6158   }
6159
6160   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6161   if (N1.getOpcode() == ISD::TRUNCATE &&
6162       N1.getOperand(0).getOpcode() == ISD::AND) {
6163     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6164       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6165   }
6166
6167   // fold operands of srl based on knowledge that the low bits are not
6168   // demanded.
6169   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6170     return SDValue(N, 0);
6171
6172   if (N1C && !N1C->isOpaque())
6173     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6174       return NewSRL;
6175
6176   // Attempt to convert a srl of a load into a narrower zero-extending load.
6177   if (SDValue NarrowLoad = ReduceLoadWidth(N))
6178     return NarrowLoad;
6179
6180   // Here is a common situation. We want to optimize:
6181   //
6182   //   %a = ...
6183   //   %b = and i32 %a, 2
6184   //   %c = srl i32 %b, 1
6185   //   brcond i32 %c ...
6186   //
6187   // into
6188   //
6189   //   %a = ...
6190   //   %b = and %a, 2
6191   //   %c = setcc eq %b, 0
6192   //   brcond %c ...
6193   //
6194   // However when after the source operand of SRL is optimized into AND, the SRL
6195   // itself may not be optimized further. Look for it and add the BRCOND into
6196   // the worklist.
6197   if (N->hasOneUse()) {
6198     SDNode *Use = *N->use_begin();
6199     if (Use->getOpcode() == ISD::BRCOND)
6200       AddToWorklist(Use);
6201     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6202       // Also look pass the truncate.
6203       Use = *Use->use_begin();
6204       if (Use->getOpcode() == ISD::BRCOND)
6205         AddToWorklist(Use);
6206     }
6207   }
6208
6209   return SDValue();
6210 }
6211
6212 SDValue DAGCombiner::visitABS(SDNode *N) {
6213   SDValue N0 = N->getOperand(0);
6214   EVT VT = N->getValueType(0);
6215
6216   // fold (abs c1) -> c2
6217   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6218     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6219   // fold (abs (abs x)) -> (abs x)
6220   if (N0.getOpcode() == ISD::ABS)
6221     return N0;
6222   // fold (abs x) -> x iff not-negative
6223   if (DAG.SignBitIsZero(N0))
6224     return N0;
6225   return SDValue();
6226 }
6227
6228 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6229   SDValue N0 = N->getOperand(0);
6230   EVT VT = N->getValueType(0);
6231
6232   // fold (bswap c1) -> c2
6233   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6234     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6235   // fold (bswap (bswap x)) -> x
6236   if (N0.getOpcode() == ISD::BSWAP)
6237     return N0->getOperand(0);
6238   return SDValue();
6239 }
6240
6241 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6242   SDValue N0 = N->getOperand(0);
6243   EVT VT = N->getValueType(0);
6244
6245   // fold (bitreverse c1) -> c2
6246   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6247     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6248   // fold (bitreverse (bitreverse x)) -> x
6249   if (N0.getOpcode() == ISD::BITREVERSE)
6250     return N0.getOperand(0);
6251   return SDValue();
6252 }
6253
6254 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6255   SDValue N0 = N->getOperand(0);
6256   EVT VT = N->getValueType(0);
6257
6258   // fold (ctlz c1) -> c2
6259   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6260     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6261   return SDValue();
6262 }
6263
6264 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6265   SDValue N0 = N->getOperand(0);
6266   EVT VT = N->getValueType(0);
6267
6268   // fold (ctlz_zero_undef c1) -> c2
6269   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6270     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6271   return SDValue();
6272 }
6273
6274 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6275   SDValue N0 = N->getOperand(0);
6276   EVT VT = N->getValueType(0);
6277
6278   // fold (cttz c1) -> c2
6279   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6280     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6281   return SDValue();
6282 }
6283
6284 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6285   SDValue N0 = N->getOperand(0);
6286   EVT VT = N->getValueType(0);
6287
6288   // fold (cttz_zero_undef c1) -> c2
6289   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6290     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6291   return SDValue();
6292 }
6293
6294 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6295   SDValue N0 = N->getOperand(0);
6296   EVT VT = N->getValueType(0);
6297
6298   // fold (ctpop c1) -> c2
6299   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6300     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6301   return SDValue();
6302 }
6303
6304 /// \brief Generate Min/Max node
6305 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6306                                    SDValue RHS, SDValue True, SDValue False,
6307                                    ISD::CondCode CC, const TargetLowering &TLI,
6308                                    SelectionDAG &DAG) {
6309   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6310     return SDValue();
6311
6312   switch (CC) {
6313   case ISD::SETOLT:
6314   case ISD::SETOLE:
6315   case ISD::SETLT:
6316   case ISD::SETLE:
6317   case ISD::SETULT:
6318   case ISD::SETULE: {
6319     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6320     if (TLI.isOperationLegal(Opcode, VT))
6321       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6322     return SDValue();
6323   }
6324   case ISD::SETOGT:
6325   case ISD::SETOGE:
6326   case ISD::SETGT:
6327   case ISD::SETGE:
6328   case ISD::SETUGT:
6329   case ISD::SETUGE: {
6330     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6331     if (TLI.isOperationLegal(Opcode, VT))
6332       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6333     return SDValue();
6334   }
6335   default:
6336     return SDValue();
6337   }
6338 }
6339
6340 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6341   SDValue Cond = N->getOperand(0);
6342   SDValue N1 = N->getOperand(1);
6343   SDValue N2 = N->getOperand(2);
6344   EVT VT = N->getValueType(0);
6345   EVT CondVT = Cond.getValueType();
6346   SDLoc DL(N);
6347
6348   if (!VT.isInteger())
6349     return SDValue();
6350
6351   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6352   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6353   if (!C1 || !C2)
6354     return SDValue();
6355
6356   // Only do this before legalization to avoid conflicting with target-specific
6357   // transforms in the other direction (create a select from a zext/sext). There
6358   // is also a target-independent combine here in DAGCombiner in the other
6359   // direction for (select Cond, -1, 0) when the condition is not i1.
6360   if (CondVT == MVT::i1 && !LegalOperations) {
6361     if (C1->isNullValue() && C2->isOne()) {
6362       // select Cond, 0, 1 --> zext (!Cond)
6363       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6364       if (VT != MVT::i1)
6365         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6366       return NotCond;
6367     }
6368     if (C1->isNullValue() && C2->isAllOnesValue()) {
6369       // select Cond, 0, -1 --> sext (!Cond)
6370       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6371       if (VT != MVT::i1)
6372         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6373       return NotCond;
6374     }
6375     if (C1->isOne() && C2->isNullValue()) {
6376       // select Cond, 1, 0 --> zext (Cond)
6377       if (VT != MVT::i1)
6378         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6379       return Cond;
6380     }
6381     if (C1->isAllOnesValue() && C2->isNullValue()) {
6382       // select Cond, -1, 0 --> sext (Cond)
6383       if (VT != MVT::i1)
6384         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6385       return Cond;
6386     }
6387
6388     // For any constants that differ by 1, we can transform the select into an
6389     // extend and add. Use a target hook because some targets may prefer to
6390     // transform in the other direction.
6391     if (TLI.convertSelectOfConstantsToMath(VT)) {
6392       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6393         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6394         if (VT != MVT::i1)
6395           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6396         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6397       }
6398       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6399         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6400         if (VT != MVT::i1)
6401           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6402         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6403       }
6404     }
6405
6406     return SDValue();
6407   }
6408
6409   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6410   // We can't do this reliably if integer based booleans have different contents
6411   // to floating point based booleans. This is because we can't tell whether we
6412   // have an integer-based boolean or a floating-point-based boolean unless we
6413   // can find the SETCC that produced it and inspect its operands. This is
6414   // fairly easy if C is the SETCC node, but it can potentially be
6415   // undiscoverable (or not reasonably discoverable). For example, it could be
6416   // in another basic block or it could require searching a complicated
6417   // expression.
6418   if (CondVT.isInteger() &&
6419       TLI.getBooleanContents(false, true) ==
6420           TargetLowering::ZeroOrOneBooleanContent &&
6421       TLI.getBooleanContents(false, false) ==
6422           TargetLowering::ZeroOrOneBooleanContent &&
6423       C1->isNullValue() && C2->isOne()) {
6424     SDValue NotCond =
6425         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6426     if (VT.bitsEq(CondVT))
6427       return NotCond;
6428     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6429   }
6430
6431   return SDValue();
6432 }
6433
6434 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6435   SDValue N0 = N->getOperand(0);
6436   SDValue N1 = N->getOperand(1);
6437   SDValue N2 = N->getOperand(2);
6438   EVT VT = N->getValueType(0);
6439   EVT VT0 = N0.getValueType();
6440   SDLoc DL(N);
6441
6442   // fold (select C, X, X) -> X
6443   if (N1 == N2)
6444     return N1;
6445
6446   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6447     // fold (select true, X, Y) -> X
6448     // fold (select false, X, Y) -> Y
6449     return !N0C->isNullValue() ? N1 : N2;
6450   }
6451
6452   // fold (select X, X, Y) -> (or X, Y)
6453   // fold (select X, 1, Y) -> (or C, Y)
6454   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6455     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6456
6457   if (SDValue V = foldSelectOfConstants(N))
6458     return V;
6459
6460   // fold (select C, 0, X) -> (and (not C), X)
6461   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6462     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6463     AddToWorklist(NOTNode.getNode());
6464     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6465   }
6466   // fold (select C, X, 1) -> (or (not C), X)
6467   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6468     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6469     AddToWorklist(NOTNode.getNode());
6470     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6471   }
6472   // fold (select X, Y, X) -> (and X, Y)
6473   // fold (select X, Y, 0) -> (and X, Y)
6474   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6475     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6476
6477   // If we can fold this based on the true/false value, do so.
6478   if (SimplifySelectOps(N, N1, N2))
6479     return SDValue(N, 0); // Don't revisit N.
6480
6481   if (VT0 == MVT::i1) {
6482     // The code in this block deals with the following 2 equivalences:
6483     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6484     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6485     // The target can specify its preferred form with the
6486     // shouldNormalizeToSelectSequence() callback. However we always transform
6487     // to the right anyway if we find the inner select exists in the DAG anyway
6488     // and we always transform to the left side if we know that we can further
6489     // optimize the combination of the conditions.
6490     bool normalizeToSequence =
6491         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6492     // select (and Cond0, Cond1), X, Y
6493     //   -> select Cond0, (select Cond1, X, Y), Y
6494     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6495       SDValue Cond0 = N0->getOperand(0);
6496       SDValue Cond1 = N0->getOperand(1);
6497       SDValue InnerSelect =
6498           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6499       if (normalizeToSequence || !InnerSelect.use_empty())
6500         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6501                            InnerSelect, N2);
6502     }
6503     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6504     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6505       SDValue Cond0 = N0->getOperand(0);
6506       SDValue Cond1 = N0->getOperand(1);
6507       SDValue InnerSelect =
6508           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6509       if (normalizeToSequence || !InnerSelect.use_empty())
6510         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6511                            InnerSelect);
6512     }
6513
6514     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6515     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6516       SDValue N1_0 = N1->getOperand(0);
6517       SDValue N1_1 = N1->getOperand(1);
6518       SDValue N1_2 = N1->getOperand(2);
6519       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6520         // Create the actual and node if we can generate good code for it.
6521         if (!normalizeToSequence) {
6522           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6523           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6524         }
6525         // Otherwise see if we can optimize the "and" to a better pattern.
6526         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6527           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6528                              N2);
6529       }
6530     }
6531     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6532     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6533       SDValue N2_0 = N2->getOperand(0);
6534       SDValue N2_1 = N2->getOperand(1);
6535       SDValue N2_2 = N2->getOperand(2);
6536       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6537         // Create the actual or node if we can generate good code for it.
6538         if (!normalizeToSequence) {
6539           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6540           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6541         }
6542         // Otherwise see if we can optimize to a better pattern.
6543         if (SDValue Combined = visitORLike(N0, N2_0, N))
6544           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6545                              N2_2);
6546       }
6547     }
6548   }
6549
6550   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6551   if (VT0 == MVT::i1) {
6552     if (N0->getOpcode() == ISD::XOR) {
6553       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6554         SDValue Cond0 = N0->getOperand(0);
6555         if (C->isOne())
6556           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6557       }
6558     }
6559   }
6560
6561   // fold selects based on a setcc into other things, such as min/max/abs
6562   if (N0.getOpcode() == ISD::SETCC) {
6563     // select x, y (fcmp lt x, y) -> fminnum x, y
6564     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6565     //
6566     // This is OK if we don't care about what happens if either operand is a
6567     // NaN.
6568     //
6569
6570     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6571     // no signed zeros as well as no nans.
6572     const TargetOptions &Options = DAG.getTarget().Options;
6573     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6574         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6575       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6576
6577       if (SDValue FMinMax = combineMinNumMaxNum(
6578               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6579         return FMinMax;
6580     }
6581
6582     if ((!LegalOperations &&
6583          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6584         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6585       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6586                          N0.getOperand(1), N1, N2, N0.getOperand(2));
6587     return SimplifySelect(DL, N0, N1, N2);
6588   }
6589
6590   return SDValue();
6591 }
6592
6593 static
6594 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6595   SDLoc DL(N);
6596   EVT LoVT, HiVT;
6597   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6598
6599   // Split the inputs.
6600   SDValue Lo, Hi, LL, LH, RL, RH;
6601   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6602   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6603
6604   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6605   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6606
6607   return std::make_pair(Lo, Hi);
6608 }
6609
6610 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6611 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6612 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6613   SDLoc DL(N);
6614   SDValue Cond = N->getOperand(0);
6615   SDValue LHS = N->getOperand(1);
6616   SDValue RHS = N->getOperand(2);
6617   EVT VT = N->getValueType(0);
6618   int NumElems = VT.getVectorNumElements();
6619   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6620          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6621          Cond.getOpcode() == ISD::BUILD_VECTOR);
6622
6623   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6624   // binary ones here.
6625   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6626     return SDValue();
6627
6628   // We're sure we have an even number of elements due to the
6629   // concat_vectors we have as arguments to vselect.
6630   // Skip BV elements until we find one that's not an UNDEF
6631   // After we find an UNDEF element, keep looping until we get to half the
6632   // length of the BV and see if all the non-undef nodes are the same.
6633   ConstantSDNode *BottomHalf = nullptr;
6634   for (int i = 0; i < NumElems / 2; ++i) {
6635     if (Cond->getOperand(i)->isUndef())
6636       continue;
6637
6638     if (BottomHalf == nullptr)
6639       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6640     else if (Cond->getOperand(i).getNode() != BottomHalf)
6641       return SDValue();
6642   }
6643
6644   // Do the same for the second half of the BuildVector
6645   ConstantSDNode *TopHalf = nullptr;
6646   for (int i = NumElems / 2; i < NumElems; ++i) {
6647     if (Cond->getOperand(i)->isUndef())
6648       continue;
6649
6650     if (TopHalf == nullptr)
6651       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6652     else if (Cond->getOperand(i).getNode() != TopHalf)
6653       return SDValue();
6654   }
6655
6656   assert(TopHalf && BottomHalf &&
6657          "One half of the selector was all UNDEFs and the other was all the "
6658          "same value. This should have been addressed before this function.");
6659   return DAG.getNode(
6660       ISD::CONCAT_VECTORS, DL, VT,
6661       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6662       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6663 }
6664
6665 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6666   if (Level >= AfterLegalizeTypes)
6667     return SDValue();
6668
6669   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6670   SDValue Mask = MSC->getMask();
6671   SDValue Data  = MSC->getValue();
6672   SDLoc DL(N);
6673
6674   // If the MSCATTER data type requires splitting and the mask is provided by a
6675   // SETCC, then split both nodes and its operands before legalization. This
6676   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6677   // and enables future optimizations (e.g. min/max pattern matching on X86).
6678   if (Mask.getOpcode() != ISD::SETCC)
6679     return SDValue();
6680
6681   // Check if any splitting is required.
6682   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6683       TargetLowering::TypeSplitVector)
6684     return SDValue();
6685   SDValue MaskLo, MaskHi, Lo, Hi;
6686   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6687
6688   EVT LoVT, HiVT;
6689   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6690
6691   SDValue Chain = MSC->getChain();
6692
6693   EVT MemoryVT = MSC->getMemoryVT();
6694   unsigned Alignment = MSC->getOriginalAlignment();
6695
6696   EVT LoMemVT, HiMemVT;
6697   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6698
6699   SDValue DataLo, DataHi;
6700   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6701
6702   SDValue BasePtr = MSC->getBasePtr();
6703   SDValue IndexLo, IndexHi;
6704   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6705
6706   MachineMemOperand *MMO = DAG.getMachineFunction().
6707     getMachineMemOperand(MSC->getPointerInfo(),
6708                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6709                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6710
6711   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6712   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6713                             DL, OpsLo, MMO);
6714
6715   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6716   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6717                             DL, OpsHi, MMO);
6718
6719   AddToWorklist(Lo.getNode());
6720   AddToWorklist(Hi.getNode());
6721
6722   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6723 }
6724
6725 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6726   if (Level >= AfterLegalizeTypes)
6727     return SDValue();
6728
6729   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6730   SDValue Mask = MST->getMask();
6731   SDValue Data  = MST->getValue();
6732   EVT VT = Data.getValueType();
6733   SDLoc DL(N);
6734
6735   // If the MSTORE data type requires splitting and the mask is provided by a
6736   // SETCC, then split both nodes and its operands before legalization. This
6737   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6738   // and enables future optimizations (e.g. min/max pattern matching on X86).
6739   if (Mask.getOpcode() == ISD::SETCC) {
6740     // Check if any splitting is required.
6741     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6742         TargetLowering::TypeSplitVector)
6743       return SDValue();
6744
6745     SDValue MaskLo, MaskHi, Lo, Hi;
6746     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6747
6748     SDValue Chain = MST->getChain();
6749     SDValue Ptr   = MST->getBasePtr();
6750
6751     EVT MemoryVT = MST->getMemoryVT();
6752     unsigned Alignment = MST->getOriginalAlignment();
6753
6754     // if Alignment is equal to the vector size,
6755     // take the half of it for the second part
6756     unsigned SecondHalfAlignment =
6757       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6758
6759     EVT LoMemVT, HiMemVT;
6760     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6761
6762     SDValue DataLo, DataHi;
6763     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6764
6765     MachineMemOperand *MMO = DAG.getMachineFunction().
6766       getMachineMemOperand(MST->getPointerInfo(),
6767                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6768                            Alignment, MST->getAAInfo(), MST->getRanges());
6769
6770     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6771                             MST->isTruncatingStore(),
6772                             MST->isCompressingStore());
6773
6774     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6775                                      MST->isCompressingStore());
6776
6777     MMO = DAG.getMachineFunction().
6778       getMachineMemOperand(MST->getPointerInfo(),
6779                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6780                            SecondHalfAlignment, MST->getAAInfo(),
6781                            MST->getRanges());
6782
6783     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6784                             MST->isTruncatingStore(),
6785                             MST->isCompressingStore());
6786
6787     AddToWorklist(Lo.getNode());
6788     AddToWorklist(Hi.getNode());
6789
6790     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6791   }
6792   return SDValue();
6793 }
6794
6795 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6796   if (Level >= AfterLegalizeTypes)
6797     return SDValue();
6798
6799   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
6800   SDValue Mask = MGT->getMask();
6801   SDLoc DL(N);
6802
6803   // If the MGATHER result requires splitting and the mask is provided by a
6804   // SETCC, then split both nodes and its operands before legalization. This
6805   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6806   // and enables future optimizations (e.g. min/max pattern matching on X86).
6807
6808   if (Mask.getOpcode() != ISD::SETCC)
6809     return SDValue();
6810
6811   EVT VT = N->getValueType(0);
6812
6813   // Check if any splitting is required.
6814   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6815       TargetLowering::TypeSplitVector)
6816     return SDValue();
6817
6818   SDValue MaskLo, MaskHi, Lo, Hi;
6819   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6820
6821   SDValue Src0 = MGT->getValue();
6822   SDValue Src0Lo, Src0Hi;
6823   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6824
6825   EVT LoVT, HiVT;
6826   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6827
6828   SDValue Chain = MGT->getChain();
6829   EVT MemoryVT = MGT->getMemoryVT();
6830   unsigned Alignment = MGT->getOriginalAlignment();
6831
6832   EVT LoMemVT, HiMemVT;
6833   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6834
6835   SDValue BasePtr = MGT->getBasePtr();
6836   SDValue Index = MGT->getIndex();
6837   SDValue IndexLo, IndexHi;
6838   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6839
6840   MachineMemOperand *MMO = DAG.getMachineFunction().
6841     getMachineMemOperand(MGT->getPointerInfo(),
6842                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6843                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6844
6845   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6846   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6847                             MMO);
6848
6849   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6850   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6851                             MMO);
6852
6853   AddToWorklist(Lo.getNode());
6854   AddToWorklist(Hi.getNode());
6855
6856   // Build a factor node to remember that this load is independent of the
6857   // other one.
6858   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6859                       Hi.getValue(1));
6860
6861   // Legalized the chain result - switch anything that used the old chain to
6862   // use the new one.
6863   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6864
6865   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6866
6867   SDValue RetOps[] = { GatherRes, Chain };
6868   return DAG.getMergeValues(RetOps, DL);
6869 }
6870
6871 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6872   if (Level >= AfterLegalizeTypes)
6873     return SDValue();
6874
6875   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6876   SDValue Mask = MLD->getMask();
6877   SDLoc DL(N);
6878
6879   // If the MLOAD result requires splitting and the mask is provided by a
6880   // SETCC, then split both nodes and its operands before legalization. This
6881   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6882   // and enables future optimizations (e.g. min/max pattern matching on X86).
6883   if (Mask.getOpcode() == ISD::SETCC) {
6884     EVT VT = N->getValueType(0);
6885
6886     // Check if any splitting is required.
6887     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6888         TargetLowering::TypeSplitVector)
6889       return SDValue();
6890
6891     SDValue MaskLo, MaskHi, Lo, Hi;
6892     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6893
6894     SDValue Src0 = MLD->getSrc0();
6895     SDValue Src0Lo, Src0Hi;
6896     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6897
6898     EVT LoVT, HiVT;
6899     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6900
6901     SDValue Chain = MLD->getChain();
6902     SDValue Ptr   = MLD->getBasePtr();
6903     EVT MemoryVT = MLD->getMemoryVT();
6904     unsigned Alignment = MLD->getOriginalAlignment();
6905
6906     // if Alignment is equal to the vector size,
6907     // take the half of it for the second part
6908     unsigned SecondHalfAlignment =
6909       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6910          Alignment/2 : Alignment;
6911
6912     EVT LoMemVT, HiMemVT;
6913     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6914
6915     MachineMemOperand *MMO = DAG.getMachineFunction().
6916     getMachineMemOperand(MLD->getPointerInfo(),
6917                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6918                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6919
6920     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6921                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6922
6923     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6924                                      MLD->isExpandingLoad());
6925
6926     MMO = DAG.getMachineFunction().
6927     getMachineMemOperand(MLD->getPointerInfo(),
6928                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6929                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6930
6931     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6932                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6933
6934     AddToWorklist(Lo.getNode());
6935     AddToWorklist(Hi.getNode());
6936
6937     // Build a factor node to remember that this load is independent of the
6938     // other one.
6939     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6940                         Hi.getValue(1));
6941
6942     // Legalized the chain result - switch anything that used the old chain to
6943     // use the new one.
6944     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6945
6946     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6947
6948     SDValue RetOps[] = { LoadRes, Chain };
6949     return DAG.getMergeValues(RetOps, DL);
6950   }
6951   return SDValue();
6952 }
6953
6954 /// A vector select of 2 constant vectors can be simplified to math/logic to
6955 /// avoid a variable select instruction and possibly avoid constant loads.
6956 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
6957   SDValue Cond = N->getOperand(0);
6958   SDValue N1 = N->getOperand(1);
6959   SDValue N2 = N->getOperand(2);
6960   EVT VT = N->getValueType(0);
6961   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
6962       !TLI.convertSelectOfConstantsToMath(VT) ||
6963       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
6964       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
6965     return SDValue();
6966
6967   // Check if we can use the condition value to increment/decrement a single
6968   // constant value. This simplifies a select to an add and removes a constant
6969   // load/materialization from the general case.
6970   bool AllAddOne = true;
6971   bool AllSubOne = true;
6972   unsigned Elts = VT.getVectorNumElements();
6973   for (unsigned i = 0; i != Elts; ++i) {
6974     SDValue N1Elt = N1.getOperand(i);
6975     SDValue N2Elt = N2.getOperand(i);
6976     if (N1Elt.isUndef() || N2Elt.isUndef())
6977       continue;
6978
6979     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
6980     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
6981     if (C1 != C2 + 1)
6982       AllAddOne = false;
6983     if (C1 != C2 - 1)
6984       AllSubOne = false;
6985   }
6986
6987   // Further simplifications for the extra-special cases where the constants are
6988   // all 0 or all -1 should be implemented as folds of these patterns.
6989   SDLoc DL(N);
6990   if (AllAddOne || AllSubOne) {
6991     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
6992     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
6993     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
6994     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
6995     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
6996   }
6997
6998   // The general case for select-of-constants:
6999   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7000   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7001   // leave that to a machine-specific pass.
7002   return SDValue();
7003 }
7004
7005 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7006   SDValue N0 = N->getOperand(0);
7007   SDValue N1 = N->getOperand(1);
7008   SDValue N2 = N->getOperand(2);
7009   SDLoc DL(N);
7010
7011   // fold (vselect C, X, X) -> X
7012   if (N1 == N2)
7013     return N1;
7014
7015   // Canonicalize integer abs.
7016   // vselect (setg[te] X,  0),  X, -X ->
7017   // vselect (setgt    X, -1),  X, -X ->
7018   // vselect (setl[te] X,  0), -X,  X ->
7019   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7020   if (N0.getOpcode() == ISD::SETCC) {
7021     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7022     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7023     bool isAbs = false;
7024     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7025
7026     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7027          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7028         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7029       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7030     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7031              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7032       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7033
7034     if (isAbs) {
7035       EVT VT = LHS.getValueType();
7036       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7037         return DAG.getNode(ISD::ABS, DL, VT, LHS);
7038
7039       SDValue Shift = DAG.getNode(
7040           ISD::SRA, DL, VT, LHS,
7041           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7042       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7043       AddToWorklist(Shift.getNode());
7044       AddToWorklist(Add.getNode());
7045       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7046     }
7047   }
7048
7049   if (SimplifySelectOps(N, N1, N2))
7050     return SDValue(N, 0);  // Don't revisit N.
7051
7052   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7053   if (ISD::isBuildVectorAllOnes(N0.getNode()))
7054     return N1;
7055   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7056   if (ISD::isBuildVectorAllZeros(N0.getNode()))
7057     return N2;
7058
7059   // The ConvertSelectToConcatVector function is assuming both the above
7060   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7061   // and addressed.
7062   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7063       N2.getOpcode() == ISD::CONCAT_VECTORS &&
7064       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7065     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7066       return CV;
7067   }
7068
7069   if (SDValue V = foldVSelectOfConstants(N))
7070     return V;
7071
7072   return SDValue();
7073 }
7074
7075 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7076   SDValue N0 = N->getOperand(0);
7077   SDValue N1 = N->getOperand(1);
7078   SDValue N2 = N->getOperand(2);
7079   SDValue N3 = N->getOperand(3);
7080   SDValue N4 = N->getOperand(4);
7081   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7082
7083   // fold select_cc lhs, rhs, x, x, cc -> x
7084   if (N2 == N3)
7085     return N2;
7086
7087   // Determine if the condition we're dealing with is constant
7088   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7089                                   CC, SDLoc(N), false)) {
7090     AddToWorklist(SCC.getNode());
7091
7092     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7093       if (!SCCC->isNullValue())
7094         return N2;    // cond always true -> true val
7095       else
7096         return N3;    // cond always false -> false val
7097     } else if (SCC->isUndef()) {
7098       // When the condition is UNDEF, just return the first operand. This is
7099       // coherent the DAG creation, no setcc node is created in this case
7100       return N2;
7101     } else if (SCC.getOpcode() == ISD::SETCC) {
7102       // Fold to a simpler select_cc
7103       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7104                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7105                          SCC.getOperand(2));
7106     }
7107   }
7108
7109   // If we can fold this based on the true/false value, do so.
7110   if (SimplifySelectOps(N, N2, N3))
7111     return SDValue(N, 0);  // Don't revisit N.
7112
7113   // fold select_cc into other things, such as min/max/abs
7114   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7115 }
7116
7117 SDValue DAGCombiner::visitSETCC(SDNode *N) {
7118   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
7119                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
7120                        SDLoc(N));
7121 }
7122
7123 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
7124   SDValue LHS = N->getOperand(0);
7125   SDValue RHS = N->getOperand(1);
7126   SDValue Carry = N->getOperand(2);
7127   SDValue Cond = N->getOperand(3);
7128
7129   // If Carry is false, fold to a regular SETCC.
7130   if (Carry.getOpcode() == ISD::CARRY_FALSE)
7131     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7132
7133   return SDValue();
7134 }
7135
7136 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7137   SDValue LHS = N->getOperand(0);
7138   SDValue RHS = N->getOperand(1);
7139   SDValue Carry = N->getOperand(2);
7140   SDValue Cond = N->getOperand(3);
7141
7142   // If Carry is false, fold to a regular SETCC.
7143   if (isNullConstant(Carry))
7144     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7145
7146   return SDValue();
7147 }
7148
7149 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7150 /// a build_vector of constants.
7151 /// This function is called by the DAGCombiner when visiting sext/zext/aext
7152 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7153 /// Vector extends are not folded if operations are legal; this is to
7154 /// avoid introducing illegal build_vector dag nodes.
7155 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7156                                          SelectionDAG &DAG, bool LegalTypes,
7157                                          bool LegalOperations) {
7158   unsigned Opcode = N->getOpcode();
7159   SDValue N0 = N->getOperand(0);
7160   EVT VT = N->getValueType(0);
7161
7162   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
7163          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7164          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
7165          && "Expected EXTEND dag node in input!");
7166
7167   // fold (sext c1) -> c1
7168   // fold (zext c1) -> c1
7169   // fold (aext c1) -> c1
7170   if (isa<ConstantSDNode>(N0))
7171     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7172
7173   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7174   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7175   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7176   EVT SVT = VT.getScalarType();
7177   if (!(VT.isVector() &&
7178       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
7179       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7180     return nullptr;
7181
7182   // We can fold this node into a build_vector.
7183   unsigned VTBits = SVT.getSizeInBits();
7184   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7185   SmallVector<SDValue, 8> Elts;
7186   unsigned NumElts = VT.getVectorNumElements();
7187   SDLoc DL(N);
7188
7189   for (unsigned i=0; i != NumElts; ++i) {
7190     SDValue Op = N0->getOperand(i);
7191     if (Op->isUndef()) {
7192       Elts.push_back(DAG.getUNDEF(SVT));
7193       continue;
7194     }
7195
7196     SDLoc DL(Op);
7197     // Get the constant value and if needed trunc it to the size of the type.
7198     // Nodes like build_vector might have constants wider than the scalar type.
7199     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7200     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7201       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7202     else
7203       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7204   }
7205
7206   return DAG.getBuildVector(VT, DL, Elts).getNode();
7207 }
7208
7209 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7210 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7211 // transformation. Returns true if extension are possible and the above
7212 // mentioned transformation is profitable.
7213 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
7214                                     unsigned ExtOpc,
7215                                     SmallVectorImpl<SDNode *> &ExtendNodes,
7216                                     const TargetLowering &TLI) {
7217   bool HasCopyToRegUses = false;
7218   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
7219   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7220                             UE = N0.getNode()->use_end();
7221        UI != UE; ++UI) {
7222     SDNode *User = *UI;
7223     if (User == N)
7224       continue;
7225     if (UI.getUse().getResNo() != N0.getResNo())
7226       continue;
7227     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7228     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7229       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7230       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7231         // Sign bits will be lost after a zext.
7232         return false;
7233       bool Add = false;
7234       for (unsigned i = 0; i != 2; ++i) {
7235         SDValue UseOp = User->getOperand(i);
7236         if (UseOp == N0)
7237           continue;
7238         if (!isa<ConstantSDNode>(UseOp))
7239           return false;
7240         Add = true;
7241       }
7242       if (Add)
7243         ExtendNodes.push_back(User);
7244       continue;
7245     }
7246     // If truncates aren't free and there are users we can't
7247     // extend, it isn't worthwhile.
7248     if (!isTruncFree)
7249       return false;
7250     // Remember if this value is live-out.
7251     if (User->getOpcode() == ISD::CopyToReg)
7252       HasCopyToRegUses = true;
7253   }
7254
7255   if (HasCopyToRegUses) {
7256     bool BothLiveOut = false;
7257     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7258          UI != UE; ++UI) {
7259       SDUse &Use = UI.getUse();
7260       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
7261         BothLiveOut = true;
7262         break;
7263       }
7264     }
7265     if (BothLiveOut)
7266       // Both unextended and extended values are live out. There had better be
7267       // a good reason for the transformation.
7268       return ExtendNodes.size();
7269   }
7270   return true;
7271 }
7272
7273 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7274                                   SDValue Trunc, SDValue ExtLoad,
7275                                   const SDLoc &DL, ISD::NodeType ExtType) {
7276   // Extend SetCC uses if necessary.
7277   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
7278     SDNode *SetCC = SetCCs[i];
7279     SmallVector<SDValue, 4> Ops;
7280
7281     for (unsigned j = 0; j != 2; ++j) {
7282       SDValue SOp = SetCC->getOperand(j);
7283       if (SOp == Trunc)
7284         Ops.push_back(ExtLoad);
7285       else
7286         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7287     }
7288
7289     Ops.push_back(SetCC->getOperand(2));
7290     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7291   }
7292 }
7293
7294 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7295 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7296   SDValue N0 = N->getOperand(0);
7297   EVT DstVT = N->getValueType(0);
7298   EVT SrcVT = N0.getValueType();
7299
7300   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7301           N->getOpcode() == ISD::ZERO_EXTEND) &&
7302          "Unexpected node type (not an extend)!");
7303
7304   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7305   // For example, on a target with legal v4i32, but illegal v8i32, turn:
7306   //   (v8i32 (sext (v8i16 (load x))))
7307   // into:
7308   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7309   //                          (v4i32 (sextload (x + 16)))))
7310   // Where uses of the original load, i.e.:
7311   //   (v8i16 (load x))
7312   // are replaced with:
7313   //   (v8i16 (truncate
7314   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7315   //                            (v4i32 (sextload (x + 16)))))))
7316   //
7317   // This combine is only applicable to illegal, but splittable, vectors.
7318   // All legal types, and illegal non-vector types, are handled elsewhere.
7319   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7320   //
7321   if (N0->getOpcode() != ISD::LOAD)
7322     return SDValue();
7323
7324   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7325
7326   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7327       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7328       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7329     return SDValue();
7330
7331   SmallVector<SDNode *, 4> SetCCs;
7332   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
7333     return SDValue();
7334
7335   ISD::LoadExtType ExtType =
7336       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7337
7338   // Try to split the vector types to get down to legal types.
7339   EVT SplitSrcVT = SrcVT;
7340   EVT SplitDstVT = DstVT;
7341   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7342          SplitSrcVT.getVectorNumElements() > 1) {
7343     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7344     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7345   }
7346
7347   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7348     return SDValue();
7349
7350   SDLoc DL(N);
7351   const unsigned NumSplits =
7352       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7353   const unsigned Stride = SplitSrcVT.getStoreSize();
7354   SmallVector<SDValue, 4> Loads;
7355   SmallVector<SDValue, 4> Chains;
7356
7357   SDValue BasePtr = LN0->getBasePtr();
7358   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7359     const unsigned Offset = Idx * Stride;
7360     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7361
7362     SDValue SplitLoad = DAG.getExtLoad(
7363         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7364         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7365         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7366
7367     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7368                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7369
7370     Loads.push_back(SplitLoad.getValue(0));
7371     Chains.push_back(SplitLoad.getValue(1));
7372   }
7373
7374   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7375   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7376
7377   // Simplify TF.
7378   AddToWorklist(NewChain.getNode());
7379
7380   CombineTo(N, NewValue);
7381
7382   // Replace uses of the original load (before extension)
7383   // with a truncate of the concatenated sextloaded vectors.
7384   SDValue Trunc =
7385       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7386   CombineTo(N0.getNode(), Trunc, NewChain);
7387   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
7388                   (ISD::NodeType)N->getOpcode());
7389   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7390 }
7391
7392 /// If we're narrowing or widening the result of a vector select and the final
7393 /// size is the same size as a setcc (compare) feeding the select, then try to
7394 /// apply the cast operation to the select's operands because matching vector
7395 /// sizes for a select condition and other operands should be more efficient.
7396 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7397   unsigned CastOpcode = Cast->getOpcode();
7398   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7399           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7400           CastOpcode == ISD::FP_ROUND) &&
7401          "Unexpected opcode for vector select narrowing/widening");
7402
7403   // We only do this transform before legal ops because the pattern may be
7404   // obfuscated by target-specific operations after legalization. Do not create
7405   // an illegal select op, however, because that may be difficult to lower.
7406   EVT VT = Cast->getValueType(0);
7407   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7408     return SDValue();
7409
7410   SDValue VSel = Cast->getOperand(0);
7411   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7412       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7413     return SDValue();
7414
7415   // Does the setcc have the same vector size as the casted select?
7416   SDValue SetCC = VSel.getOperand(0);
7417   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7418   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7419     return SDValue();
7420
7421   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7422   SDValue A = VSel.getOperand(1);
7423   SDValue B = VSel.getOperand(2);
7424   SDValue CastA, CastB;
7425   SDLoc DL(Cast);
7426   if (CastOpcode == ISD::FP_ROUND) {
7427     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7428     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7429     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7430   } else {
7431     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7432     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7433   }
7434   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7435 }
7436
7437 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7438   SDValue N0 = N->getOperand(0);
7439   EVT VT = N->getValueType(0);
7440   SDLoc DL(N);
7441
7442   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7443                                               LegalOperations))
7444     return SDValue(Res, 0);
7445
7446   // fold (sext (sext x)) -> (sext x)
7447   // fold (sext (aext x)) -> (sext x)
7448   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7449     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7450
7451   if (N0.getOpcode() == ISD::TRUNCATE) {
7452     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7453     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7454     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7455       SDNode *oye = N0.getOperand(0).getNode();
7456       if (NarrowLoad.getNode() != N0.getNode()) {
7457         CombineTo(N0.getNode(), NarrowLoad);
7458         // CombineTo deleted the truncate, if needed, but not what's under it.
7459         AddToWorklist(oye);
7460       }
7461       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7462     }
7463
7464     // See if the value being truncated is already sign extended.  If so, just
7465     // eliminate the trunc/sext pair.
7466     SDValue Op = N0.getOperand(0);
7467     unsigned OpBits   = Op.getScalarValueSizeInBits();
7468     unsigned MidBits  = N0.getScalarValueSizeInBits();
7469     unsigned DestBits = VT.getScalarSizeInBits();
7470     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7471
7472     if (OpBits == DestBits) {
7473       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7474       // bits, it is already ready.
7475       if (NumSignBits > DestBits-MidBits)
7476         return Op;
7477     } else if (OpBits < DestBits) {
7478       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7479       // bits, just sext from i32.
7480       if (NumSignBits > OpBits-MidBits)
7481         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7482     } else {
7483       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7484       // bits, just truncate to i32.
7485       if (NumSignBits > OpBits-MidBits)
7486         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7487     }
7488
7489     // fold (sext (truncate x)) -> (sextinreg x).
7490     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7491                                                  N0.getValueType())) {
7492       if (OpBits < DestBits)
7493         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7494       else if (OpBits > DestBits)
7495         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7496       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7497                          DAG.getValueType(N0.getValueType()));
7498     }
7499   }
7500
7501   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7502   // Only generate vector extloads when 1) they're legal, and 2) they are
7503   // deemed desirable by the target.
7504   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7505       ((!LegalOperations && !VT.isVector() &&
7506         !cast<LoadSDNode>(N0)->isVolatile()) ||
7507        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7508     bool DoXform = true;
7509     SmallVector<SDNode*, 4> SetCCs;
7510     if (!N0.hasOneUse())
7511       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
7512     if (VT.isVector())
7513       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7514     if (DoXform) {
7515       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7516       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7517                                        LN0->getBasePtr(), N0.getValueType(),
7518                                        LN0->getMemOperand());
7519       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7520                                   N0.getValueType(), ExtLoad);
7521       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7522       // If the load value is used only by N, replace it via CombineTo N.
7523       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7524       CombineTo(N, ExtLoad);
7525       if (NoReplaceTrunc)
7526         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7527       else
7528         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7529       return SDValue(N, 0);
7530     }
7531   }
7532
7533   // fold (sext (load x)) to multiple smaller sextloads.
7534   // Only on illegal but splittable vectors.
7535   if (SDValue ExtLoad = CombineExtLoad(N))
7536     return ExtLoad;
7537
7538   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7539   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7540   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7541       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7542     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7543     EVT MemVT = LN0->getMemoryVT();
7544     if ((!LegalOperations && !LN0->isVolatile()) ||
7545         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7546       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7547                                        LN0->getBasePtr(), MemVT,
7548                                        LN0->getMemOperand());
7549       CombineTo(N, ExtLoad);
7550       CombineTo(N0.getNode(),
7551                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7552                             N0.getValueType(), ExtLoad),
7553                 ExtLoad.getValue(1));
7554       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7555     }
7556   }
7557
7558   // fold (sext (and/or/xor (load x), cst)) ->
7559   //      (and/or/xor (sextload x), (sext cst))
7560   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7561        N0.getOpcode() == ISD::XOR) &&
7562       isa<LoadSDNode>(N0.getOperand(0)) &&
7563       N0.getOperand(1).getOpcode() == ISD::Constant &&
7564       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7565       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7566     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7567     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7568       bool DoXform = true;
7569       SmallVector<SDNode*, 4> SetCCs;
7570       if (!N0.hasOneUse())
7571         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7572                                           SetCCs, TLI);
7573       if (DoXform) {
7574         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7575                                          LN0->getChain(), LN0->getBasePtr(),
7576                                          LN0->getMemoryVT(),
7577                                          LN0->getMemOperand());
7578         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7579         Mask = Mask.sext(VT.getSizeInBits());
7580         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7581                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7582         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7583                                     SDLoc(N0.getOperand(0)),
7584                                     N0.getOperand(0).getValueType(), ExtLoad);
7585         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7586         bool NoReplaceTruncAnd = !N0.hasOneUse();
7587         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7588         CombineTo(N, And);
7589         // If N0 has multiple uses, change other uses as well.
7590         if (NoReplaceTruncAnd) {
7591           SDValue TruncAnd =
7592               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7593           CombineTo(N0.getNode(), TruncAnd);
7594         }
7595         if (NoReplaceTrunc)
7596           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7597         else
7598           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7599         return SDValue(N,0); // Return N so it doesn't get rechecked!
7600       }
7601     }
7602   }
7603
7604   if (N0.getOpcode() == ISD::SETCC) {
7605     SDValue N00 = N0.getOperand(0);
7606     SDValue N01 = N0.getOperand(1);
7607     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7608     EVT N00VT = N0.getOperand(0).getValueType();
7609
7610     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7611     // Only do this before legalize for now.
7612     if (VT.isVector() && !LegalOperations &&
7613         TLI.getBooleanContents(N00VT) ==
7614             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7615       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7616       // of the same size as the compared operands. Only optimize sext(setcc())
7617       // if this is the case.
7618       EVT SVT = getSetCCResultType(N00VT);
7619
7620       // We know that the # elements of the results is the same as the
7621       // # elements of the compare (and the # elements of the compare result
7622       // for that matter).  Check to see that they are the same size.  If so,
7623       // we know that the element size of the sext'd result matches the
7624       // element size of the compare operands.
7625       if (VT.getSizeInBits() == SVT.getSizeInBits())
7626         return DAG.getSetCC(DL, VT, N00, N01, CC);
7627
7628       // If the desired elements are smaller or larger than the source
7629       // elements, we can use a matching integer vector type and then
7630       // truncate/sign extend.
7631       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7632       if (SVT == MatchingVecType) {
7633         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7634         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7635       }
7636     }
7637
7638     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7639     // Here, T can be 1 or -1, depending on the type of the setcc and
7640     // getBooleanContents().
7641     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7642
7643     // To determine the "true" side of the select, we need to know the high bit
7644     // of the value returned by the setcc if it evaluates to true.
7645     // If the type of the setcc is i1, then the true case of the select is just
7646     // sext(i1 1), that is, -1.
7647     // If the type of the setcc is larger (say, i8) then the value of the high
7648     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7649     // of the appropriate width.
7650     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7651                                            : TLI.getConstTrueVal(DAG, VT, DL);
7652     SDValue Zero = DAG.getConstant(0, DL, VT);
7653     if (SDValue SCC =
7654             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7655       return SCC;
7656
7657     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
7658       EVT SetCCVT = getSetCCResultType(N00VT);
7659       // Don't do this transform for i1 because there's a select transform
7660       // that would reverse it.
7661       // TODO: We should not do this transform at all without a target hook
7662       // because a sext is likely cheaper than a select?
7663       if (SetCCVT.getScalarSizeInBits() != 1 &&
7664           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7665         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7666         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7667       }
7668     }
7669   }
7670
7671   // fold (sext x) -> (zext x) if the sign bit is known zero.
7672   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7673       DAG.SignBitIsZero(N0))
7674     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7675
7676   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7677     return NewVSel;
7678
7679   return SDValue();
7680 }
7681
7682 // isTruncateOf - If N is a truncate of some other value, return true, record
7683 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7684 // This function computes KnownBits to avoid a duplicated call to
7685 // computeKnownBits in the caller.
7686 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7687                          KnownBits &Known) {
7688   if (N->getOpcode() == ISD::TRUNCATE) {
7689     Op = N->getOperand(0);
7690     DAG.computeKnownBits(Op, Known);
7691     return true;
7692   }
7693
7694   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7695       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7696     return false;
7697
7698   SDValue Op0 = N->getOperand(0);
7699   SDValue Op1 = N->getOperand(1);
7700   assert(Op0.getValueType() == Op1.getValueType());
7701
7702   if (isNullConstant(Op0))
7703     Op = Op1;
7704   else if (isNullConstant(Op1))
7705     Op = Op0;
7706   else
7707     return false;
7708
7709   DAG.computeKnownBits(Op, Known);
7710
7711   if (!(Known.Zero | 1).isAllOnesValue())
7712     return false;
7713
7714   return true;
7715 }
7716
7717 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7718   SDValue N0 = N->getOperand(0);
7719   EVT VT = N->getValueType(0);
7720
7721   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7722                                               LegalOperations))
7723     return SDValue(Res, 0);
7724
7725   // fold (zext (zext x)) -> (zext x)
7726   // fold (zext (aext x)) -> (zext x)
7727   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7728     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7729                        N0.getOperand(0));
7730
7731   // fold (zext (truncate x)) -> (zext x) or
7732   //      (zext (truncate x)) -> (truncate x)
7733   // This is valid when the truncated bits of x are already zero.
7734   // FIXME: We should extend this to work for vectors too.
7735   SDValue Op;
7736   KnownBits Known;
7737   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7738     APInt TruncatedBits =
7739       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7740       APInt(Op.getValueSizeInBits(), 0) :
7741       APInt::getBitsSet(Op.getValueSizeInBits(),
7742                         N0.getValueSizeInBits(),
7743                         std::min(Op.getValueSizeInBits(),
7744                                  VT.getSizeInBits()));
7745     if (TruncatedBits.isSubsetOf(Known.Zero))
7746       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7747   }
7748
7749   // fold (zext (truncate x)) -> (and x, mask)
7750   if (N0.getOpcode() == ISD::TRUNCATE) {
7751     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7752     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7753     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7754       SDNode *oye = N0.getOperand(0).getNode();
7755       if (NarrowLoad.getNode() != N0.getNode()) {
7756         CombineTo(N0.getNode(), NarrowLoad);
7757         // CombineTo deleted the truncate, if needed, but not what's under it.
7758         AddToWorklist(oye);
7759       }
7760       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7761     }
7762
7763     EVT SrcVT = N0.getOperand(0).getValueType();
7764     EVT MinVT = N0.getValueType();
7765
7766     // Try to mask before the extension to avoid having to generate a larger mask,
7767     // possibly over several sub-vectors.
7768     if (SrcVT.bitsLT(VT)) {
7769       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7770                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7771         SDValue Op = N0.getOperand(0);
7772         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7773         AddToWorklist(Op.getNode());
7774         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7775       }
7776     }
7777
7778     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7779       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7780       AddToWorklist(Op.getNode());
7781       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7782       // We may safely transfer the debug info describing the truncate node over
7783       // to the equivalent and operation.
7784       DAG.transferDbgValues(N0, And);
7785       return And;
7786     }
7787   }
7788
7789   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7790   // if either of the casts is not free.
7791   if (N0.getOpcode() == ISD::AND &&
7792       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7793       N0.getOperand(1).getOpcode() == ISD::Constant &&
7794       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7795                            N0.getValueType()) ||
7796        !TLI.isZExtFree(N0.getValueType(), VT))) {
7797     SDValue X = N0.getOperand(0).getOperand(0);
7798     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7799     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7800     Mask = Mask.zext(VT.getSizeInBits());
7801     SDLoc DL(N);
7802     return DAG.getNode(ISD::AND, DL, VT,
7803                        X, DAG.getConstant(Mask, DL, VT));
7804   }
7805
7806   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7807   // Only generate vector extloads when 1) they're legal, and 2) they are
7808   // deemed desirable by the target.
7809   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7810       ((!LegalOperations && !VT.isVector() &&
7811         !cast<LoadSDNode>(N0)->isVolatile()) ||
7812        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7813     bool DoXform = true;
7814     SmallVector<SDNode*, 4> SetCCs;
7815     if (!N0.hasOneUse())
7816       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7817     if (VT.isVector())
7818       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7819     if (DoXform) {
7820       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7821       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7822                                        LN0->getChain(),
7823                                        LN0->getBasePtr(), N0.getValueType(),
7824                                        LN0->getMemOperand());
7825
7826       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7827                                   N0.getValueType(), ExtLoad);
7828       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7829       // If the load value is used only by N, replace it via CombineTo N.
7830       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7831       CombineTo(N, ExtLoad);
7832       if (NoReplaceTrunc)
7833         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7834       else
7835         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7836       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7837     }
7838   }
7839
7840   // fold (zext (load x)) to multiple smaller zextloads.
7841   // Only on illegal but splittable vectors.
7842   if (SDValue ExtLoad = CombineExtLoad(N))
7843     return ExtLoad;
7844
7845   // fold (zext (and/or/xor (load x), cst)) ->
7846   //      (and/or/xor (zextload x), (zext cst))
7847   // Unless (and (load x) cst) will match as a zextload already and has
7848   // additional users.
7849   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7850        N0.getOpcode() == ISD::XOR) &&
7851       isa<LoadSDNode>(N0.getOperand(0)) &&
7852       N0.getOperand(1).getOpcode() == ISD::Constant &&
7853       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7854       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7855     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7856     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7857       bool DoXform = true;
7858       SmallVector<SDNode*, 4> SetCCs;
7859       if (!N0.hasOneUse()) {
7860         if (N0.getOpcode() == ISD::AND) {
7861           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7862           EVT LoadResultTy = AndC->getValueType(0);
7863           EVT ExtVT;
7864           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT))
7865             DoXform = false;
7866         }
7867         if (DoXform)
7868           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7869                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7870       }
7871       if (DoXform) {
7872         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7873                                          LN0->getChain(), LN0->getBasePtr(),
7874                                          LN0->getMemoryVT(),
7875                                          LN0->getMemOperand());
7876         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7877         Mask = Mask.zext(VT.getSizeInBits());
7878         SDLoc DL(N);
7879         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7880                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7881         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7882                                     SDLoc(N0.getOperand(0)),
7883                                     N0.getOperand(0).getValueType(), ExtLoad);
7884         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
7885         bool NoReplaceTruncAnd = !N0.hasOneUse();
7886         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7887         CombineTo(N, And);
7888         // If N0 has multiple uses, change other uses as well.
7889         if (NoReplaceTruncAnd) {
7890           SDValue TruncAnd =
7891               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7892           CombineTo(N0.getNode(), TruncAnd);
7893         }
7894         if (NoReplaceTrunc)
7895           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7896         else
7897           CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7898         return SDValue(N,0); // Return N so it doesn't get rechecked!
7899       }
7900     }
7901   }
7902
7903   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7904   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7905   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7906       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7907     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7908     EVT MemVT = LN0->getMemoryVT();
7909     if ((!LegalOperations && !LN0->isVolatile()) ||
7910         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7911       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7912                                        LN0->getChain(),
7913                                        LN0->getBasePtr(), MemVT,
7914                                        LN0->getMemOperand());
7915       CombineTo(N, ExtLoad);
7916       CombineTo(N0.getNode(),
7917                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7918                             ExtLoad),
7919                 ExtLoad.getValue(1));
7920       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7921     }
7922   }
7923
7924   if (N0.getOpcode() == ISD::SETCC) {
7925     // Only do this before legalize for now.
7926     if (!LegalOperations && VT.isVector() &&
7927         N0.getValueType().getVectorElementType() == MVT::i1) {
7928       EVT N00VT = N0.getOperand(0).getValueType();
7929       if (getSetCCResultType(N00VT) == N0.getValueType())
7930         return SDValue();
7931
7932       // We know that the # elements of the results is the same as the #
7933       // elements of the compare (and the # elements of the compare result for
7934       // that matter). Check to see that they are the same size. If so, we know
7935       // that the element size of the sext'd result matches the element size of
7936       // the compare operands.
7937       SDLoc DL(N);
7938       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7939       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7940         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7941         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7942                                      N0.getOperand(1), N0.getOperand(2));
7943         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7944       }
7945
7946       // If the desired elements are smaller or larger than the source
7947       // elements we can use a matching integer vector type and then
7948       // truncate/sign extend.
7949       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
7950       SDValue VsetCC =
7951           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7952                       N0.getOperand(1), N0.getOperand(2));
7953       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7954                          VecOnes);
7955     }
7956
7957     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7958     SDLoc DL(N);
7959     if (SDValue SCC = SimplifySelectCC(
7960             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7961             DAG.getConstant(0, DL, VT),
7962             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7963       return SCC;
7964   }
7965
7966   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7967   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7968       isa<ConstantSDNode>(N0.getOperand(1)) &&
7969       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7970       N0.hasOneUse()) {
7971     SDValue ShAmt = N0.getOperand(1);
7972     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7973     if (N0.getOpcode() == ISD::SHL) {
7974       SDValue InnerZExt = N0.getOperand(0);
7975       // If the original shl may be shifting out bits, do not perform this
7976       // transformation.
7977       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7978         InnerZExt.getOperand(0).getValueSizeInBits();
7979       if (ShAmtVal > KnownZeroBits)
7980         return SDValue();
7981     }
7982
7983     SDLoc DL(N);
7984
7985     // Ensure that the shift amount is wide enough for the shifted value.
7986     if (VT.getSizeInBits() >= 256)
7987       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7988
7989     return DAG.getNode(N0.getOpcode(), DL, VT,
7990                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7991                        ShAmt);
7992   }
7993
7994   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7995     return NewVSel;
7996
7997   return SDValue();
7998 }
7999
8000 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8001   SDValue N0 = N->getOperand(0);
8002   EVT VT = N->getValueType(0);
8003
8004   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8005                                               LegalOperations))
8006     return SDValue(Res, 0);
8007
8008   // fold (aext (aext x)) -> (aext x)
8009   // fold (aext (zext x)) -> (zext x)
8010   // fold (aext (sext x)) -> (sext x)
8011   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
8012       N0.getOpcode() == ISD::ZERO_EXTEND ||
8013       N0.getOpcode() == ISD::SIGN_EXTEND)
8014     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8015
8016   // fold (aext (truncate (load x))) -> (aext (smaller load x))
8017   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8018   if (N0.getOpcode() == ISD::TRUNCATE) {
8019     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8020       SDNode *oye = N0.getOperand(0).getNode();
8021       if (NarrowLoad.getNode() != N0.getNode()) {
8022         CombineTo(N0.getNode(), NarrowLoad);
8023         // CombineTo deleted the truncate, if needed, but not what's under it.
8024         AddToWorklist(oye);
8025       }
8026       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8027     }
8028   }
8029
8030   // fold (aext (truncate x))
8031   if (N0.getOpcode() == ISD::TRUNCATE)
8032     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8033
8034   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8035   // if the trunc is not free.
8036   if (N0.getOpcode() == ISD::AND &&
8037       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8038       N0.getOperand(1).getOpcode() == ISD::Constant &&
8039       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8040                           N0.getValueType())) {
8041     SDLoc DL(N);
8042     SDValue X = N0.getOperand(0).getOperand(0);
8043     X = DAG.getAnyExtOrTrunc(X, DL, VT);
8044     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8045     Mask = Mask.zext(VT.getSizeInBits());
8046     return DAG.getNode(ISD::AND, DL, VT,
8047                        X, DAG.getConstant(Mask, DL, VT));
8048   }
8049
8050   // fold (aext (load x)) -> (aext (truncate (extload x)))
8051   // None of the supported targets knows how to perform load and any_ext
8052   // on vectors in one instruction.  We only perform this transformation on
8053   // scalars.
8054   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8055       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8056       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8057     bool DoXform = true;
8058     SmallVector<SDNode*, 4> SetCCs;
8059     if (!N0.hasOneUse())
8060       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
8061     if (DoXform) {
8062       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8063       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8064                                        LN0->getChain(),
8065                                        LN0->getBasePtr(), N0.getValueType(),
8066                                        LN0->getMemOperand());
8067       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8068                                   N0.getValueType(), ExtLoad);
8069       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
8070                       ISD::ANY_EXTEND);
8071       // If the load value is used only by N, replace it via CombineTo N.
8072       bool NoReplaceTrunc = N0.hasOneUse();
8073       CombineTo(N, ExtLoad);
8074       if (NoReplaceTrunc)
8075         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8076       else
8077         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8078       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8079     }
8080   }
8081
8082   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8083   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8084   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
8085   if (N0.getOpcode() == ISD::LOAD &&
8086       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8087       N0.hasOneUse()) {
8088     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8089     ISD::LoadExtType ExtType = LN0->getExtensionType();
8090     EVT MemVT = LN0->getMemoryVT();
8091     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8092       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8093                                        VT, LN0->getChain(), LN0->getBasePtr(),
8094                                        MemVT, LN0->getMemOperand());
8095       CombineTo(N, ExtLoad);
8096       CombineTo(N0.getNode(),
8097                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8098                             N0.getValueType(), ExtLoad),
8099                 ExtLoad.getValue(1));
8100       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8101     }
8102   }
8103
8104   if (N0.getOpcode() == ISD::SETCC) {
8105     // For vectors:
8106     // aext(setcc) -> vsetcc
8107     // aext(setcc) -> truncate(vsetcc)
8108     // aext(setcc) -> aext(vsetcc)
8109     // Only do this before legalize for now.
8110     if (VT.isVector() && !LegalOperations) {
8111       EVT N00VT = N0.getOperand(0).getValueType();
8112       if (getSetCCResultType(N00VT) == N0.getValueType())
8113         return SDValue();
8114
8115       // We know that the # elements of the results is the same as the
8116       // # elements of the compare (and the # elements of the compare result
8117       // for that matter).  Check to see that they are the same size.  If so,
8118       // we know that the element size of the sext'd result matches the
8119       // element size of the compare operands.
8120       if (VT.getSizeInBits() == N00VT.getSizeInBits())
8121         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
8122                              N0.getOperand(1),
8123                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
8124       // If the desired elements are smaller or larger than the source
8125       // elements we can use a matching integer vector type and then
8126       // truncate/any extend
8127       else {
8128         EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8129         SDValue VsetCC =
8130           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
8131                         N0.getOperand(1),
8132                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
8133         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
8134       }
8135     }
8136
8137     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8138     SDLoc DL(N);
8139     if (SDValue SCC = SimplifySelectCC(
8140             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8141             DAG.getConstant(0, DL, VT),
8142             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8143       return SCC;
8144   }
8145
8146   return SDValue();
8147 }
8148
8149 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
8150   unsigned Opcode = N->getOpcode();
8151   SDValue N0 = N->getOperand(0);
8152   SDValue N1 = N->getOperand(1);
8153   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
8154
8155   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
8156   if (N0.getOpcode() == Opcode &&
8157       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
8158     return N0;
8159
8160   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
8161       N0.getOperand(0).getOpcode() == Opcode) {
8162     // We have an assert, truncate, assert sandwich. Make one stronger assert
8163     // by asserting on the smallest asserted type to the larger source type.
8164     // This eliminates the later assert:
8165     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
8166     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
8167     SDValue BigA = N0.getOperand(0);
8168     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
8169     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
8170            "Asserting zero/sign-extended bits to a type larger than the "
8171            "truncated destination does not provide information");
8172
8173     SDLoc DL(N);
8174     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
8175     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
8176     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
8177                                     BigA.getOperand(0), MinAssertVTVal);
8178     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
8179   }
8180
8181   return SDValue();
8182 }
8183
8184 /// If the result of a wider load is shifted to right of N  bits and then
8185 /// truncated to a narrower type and where N is a multiple of number of bits of
8186 /// the narrower type, transform it to a narrower load from address + N / num of
8187 /// bits of new type. Also narrow the load if the result is masked with an AND
8188 /// to effectively produce a smaller type. If the result is to be extended, also
8189 /// fold the extension to form a extending load.
8190 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
8191   unsigned Opc = N->getOpcode();
8192
8193   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
8194   SDValue N0 = N->getOperand(0);
8195   EVT VT = N->getValueType(0);
8196   EVT ExtVT = VT;
8197
8198   // This transformation isn't valid for vector loads.
8199   if (VT.isVector())
8200     return SDValue();
8201
8202   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
8203   // extended to VT.
8204   if (Opc == ISD::SIGN_EXTEND_INREG) {
8205     ExtType = ISD::SEXTLOAD;
8206     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8207   } else if (Opc == ISD::SRL) {
8208     // Another special-case: SRL is basically zero-extending a narrower value,
8209     // or it maybe shifting a higher subword, half or byte into the lowest
8210     // bits.
8211     ExtType = ISD::ZEXTLOAD;
8212     N0 = SDValue(N, 0);
8213
8214     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
8215     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8216     if (!N01 || !LN0)
8217       return SDValue();
8218
8219     uint64_t ShiftAmt = N01->getZExtValue();
8220     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
8221     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
8222       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
8223     else
8224       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
8225                                 VT.getSizeInBits() - ShiftAmt);
8226   } else if (Opc == ISD::AND) {
8227     // An AND with a constant mask is the same as a truncate + zero-extend.
8228     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
8229     if (!AndC || !AndC->getAPIntValue().isMask())
8230       return SDValue();
8231
8232     unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
8233     ExtType = ISD::ZEXTLOAD;
8234     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
8235   }
8236
8237   unsigned ShAmt = 0;
8238   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
8239     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8240       ShAmt = N01->getZExtValue();
8241       unsigned EVTBits = ExtVT.getSizeInBits();
8242       // Is the shift amount a multiple of size of VT?
8243       if ((ShAmt & (EVTBits-1)) == 0) {
8244         N0 = N0.getOperand(0);
8245         // Is the load width a multiple of size of VT?
8246         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
8247           return SDValue();
8248       }
8249
8250       // At this point, we must have a load or else we can't do the transform.
8251       if (!isa<LoadSDNode>(N0)) return SDValue();
8252
8253       // Because a SRL must be assumed to *need* to zero-extend the high bits
8254       // (as opposed to anyext the high bits), we can't combine the zextload
8255       // lowering of SRL and an sextload.
8256       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
8257         return SDValue();
8258
8259       // If the shift amount is larger than the input type then we're not
8260       // accessing any of the loaded bytes.  If the load was a zextload/extload
8261       // then the result of the shift+trunc is zero/undef (handled elsewhere).
8262       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
8263         return SDValue();
8264     }
8265   }
8266
8267   // If the load is shifted left (and the result isn't shifted back right),
8268   // we can fold the truncate through the shift.
8269   unsigned ShLeftAmt = 0;
8270   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8271       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
8272     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8273       ShLeftAmt = N01->getZExtValue();
8274       N0 = N0.getOperand(0);
8275     }
8276   }
8277
8278   // If we haven't found a load, we can't narrow it.
8279   if (!isa<LoadSDNode>(N0))
8280     return SDValue();
8281
8282   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8283   if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
8284     return SDValue();
8285
8286   // For big endian targets, we need to adjust the offset to the pointer to
8287   // load the correct bytes.
8288   if (DAG.getDataLayout().isBigEndian()) {
8289     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8290     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8291     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8292   }
8293
8294   EVT PtrType = N0.getOperand(1).getValueType();
8295   uint64_t PtrOff = ShAmt / 8;
8296   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8297   SDLoc DL(LN0);
8298   // The original load itself didn't wrap, so an offset within it doesn't.
8299   SDNodeFlags Flags;
8300   Flags.setNoUnsignedWrap(true);
8301   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8302                                PtrType, LN0->getBasePtr(),
8303                                DAG.getConstant(PtrOff, DL, PtrType),
8304                                Flags);
8305   AddToWorklist(NewPtr.getNode());
8306
8307   SDValue Load;
8308   if (ExtType == ISD::NON_EXTLOAD)
8309     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8310                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8311                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8312   else
8313     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8314                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8315                           NewAlign, LN0->getMemOperand()->getFlags(),
8316                           LN0->getAAInfo());
8317
8318   // Replace the old load's chain with the new load's chain.
8319   WorklistRemover DeadNodes(*this);
8320   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8321
8322   // Shift the result left, if we've swallowed a left shift.
8323   SDValue Result = Load;
8324   if (ShLeftAmt != 0) {
8325     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8326     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8327       ShImmTy = VT;
8328     // If the shift amount is as large as the result size (but, presumably,
8329     // no larger than the source) then the useful bits of the result are
8330     // zero; we can't simply return the shortened shift, because the result
8331     // of that operation is undefined.
8332     SDLoc DL(N0);
8333     if (ShLeftAmt >= VT.getSizeInBits())
8334       Result = DAG.getConstant(0, DL, VT);
8335     else
8336       Result = DAG.getNode(ISD::SHL, DL, VT,
8337                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8338   }
8339
8340   // Return the new loaded value.
8341   return Result;
8342 }
8343
8344 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8345   SDValue N0 = N->getOperand(0);
8346   SDValue N1 = N->getOperand(1);
8347   EVT VT = N->getValueType(0);
8348   EVT EVT = cast<VTSDNode>(N1)->getVT();
8349   unsigned VTBits = VT.getScalarSizeInBits();
8350   unsigned EVTBits = EVT.getScalarSizeInBits();
8351
8352   if (N0.isUndef())
8353     return DAG.getUNDEF(VT);
8354
8355   // fold (sext_in_reg c1) -> c1
8356   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8357     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8358
8359   // If the input is already sign extended, just drop the extension.
8360   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8361     return N0;
8362
8363   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8364   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8365       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8366     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8367                        N0.getOperand(0), N1);
8368
8369   // fold (sext_in_reg (sext x)) -> (sext x)
8370   // fold (sext_in_reg (aext x)) -> (sext x)
8371   // if x is small enough.
8372   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8373     SDValue N00 = N0.getOperand(0);
8374     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8375         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8376       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8377   }
8378
8379   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8380   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8381        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8382        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8383       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8384     if (!LegalOperations ||
8385         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8386       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8387   }
8388
8389   // fold (sext_in_reg (zext x)) -> (sext x)
8390   // iff we are extending the source sign bit.
8391   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8392     SDValue N00 = N0.getOperand(0);
8393     if (N00.getScalarValueSizeInBits() == EVTBits &&
8394         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8395       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8396   }
8397
8398   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8399   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8400     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8401
8402   // fold operands of sext_in_reg based on knowledge that the top bits are not
8403   // demanded.
8404   if (SimplifyDemandedBits(SDValue(N, 0)))
8405     return SDValue(N, 0);
8406
8407   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8408   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8409   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8410     return NarrowLoad;
8411
8412   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8413   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8414   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8415   if (N0.getOpcode() == ISD::SRL) {
8416     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8417       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8418         // We can turn this into an SRA iff the input to the SRL is already sign
8419         // extended enough.
8420         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8421         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8422           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8423                              N0.getOperand(0), N0.getOperand(1));
8424       }
8425   }
8426
8427   // fold (sext_inreg (extload x)) -> (sextload x)
8428   // If sextload is not supported by target, we can only do the combine when
8429   // load has one use. Doing otherwise can block folding the extload with other
8430   // extends that the target does support.
8431   if (ISD::isEXTLoad(N0.getNode()) &&
8432       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8433       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8434       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
8435         N0.hasOneUse()) ||
8436        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8437     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8438     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8439                                      LN0->getChain(),
8440                                      LN0->getBasePtr(), EVT,
8441                                      LN0->getMemOperand());
8442     CombineTo(N, ExtLoad);
8443     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8444     AddToWorklist(ExtLoad.getNode());
8445     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8446   }
8447   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8448   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8449       N0.hasOneUse() &&
8450       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8451       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8452        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8453     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8454     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8455                                      LN0->getChain(),
8456                                      LN0->getBasePtr(), EVT,
8457                                      LN0->getMemOperand());
8458     CombineTo(N, ExtLoad);
8459     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8460     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8461   }
8462
8463   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8464   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8465     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8466                                            N0.getOperand(1), false))
8467       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8468                          BSwap, N1);
8469   }
8470
8471   return SDValue();
8472 }
8473
8474 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8475   SDValue N0 = N->getOperand(0);
8476   EVT VT = N->getValueType(0);
8477
8478   if (N0.isUndef())
8479     return DAG.getUNDEF(VT);
8480
8481   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8482                                               LegalOperations))
8483     return SDValue(Res, 0);
8484
8485   return SDValue();
8486 }
8487
8488 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8489   SDValue N0 = N->getOperand(0);
8490   EVT VT = N->getValueType(0);
8491
8492   if (N0.isUndef())
8493     return DAG.getUNDEF(VT);
8494
8495   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8496                                               LegalOperations))
8497     return SDValue(Res, 0);
8498
8499   return SDValue();
8500 }
8501
8502 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8503   SDValue N0 = N->getOperand(0);
8504   EVT VT = N->getValueType(0);
8505   bool isLE = DAG.getDataLayout().isLittleEndian();
8506
8507   // noop truncate
8508   if (N0.getValueType() == N->getValueType(0))
8509     return N0;
8510
8511   // fold (truncate (truncate x)) -> (truncate x)
8512   if (N0.getOpcode() == ISD::TRUNCATE)
8513     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8514
8515   // fold (truncate c1) -> c1
8516   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
8517     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8518     if (C.getNode() != N)
8519       return C;
8520   }
8521
8522   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8523   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8524       N0.getOpcode() == ISD::SIGN_EXTEND ||
8525       N0.getOpcode() == ISD::ANY_EXTEND) {
8526     // if the source is smaller than the dest, we still need an extend.
8527     if (N0.getOperand(0).getValueType().bitsLT(VT))
8528       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8529     // if the source is larger than the dest, than we just need the truncate.
8530     if (N0.getOperand(0).getValueType().bitsGT(VT))
8531       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8532     // if the source and dest are the same type, we can drop both the extend
8533     // and the truncate.
8534     return N0.getOperand(0);
8535   }
8536
8537   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8538   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8539     return SDValue();
8540
8541   // Fold extract-and-trunc into a narrow extract. For example:
8542   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8543   //   i32 y = TRUNCATE(i64 x)
8544   //        -- becomes --
8545   //   v16i8 b = BITCAST (v2i64 val)
8546   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8547   //
8548   // Note: We only run this optimization after type legalization (which often
8549   // creates this pattern) and before operation legalization after which
8550   // we need to be more careful about the vector instructions that we generate.
8551   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8552       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8553     EVT VecTy = N0.getOperand(0).getValueType();
8554     EVT ExTy = N0.getValueType();
8555     EVT TrTy = N->getValueType(0);
8556
8557     unsigned NumElem = VecTy.getVectorNumElements();
8558     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8559
8560     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8561     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8562
8563     SDValue EltNo = N0->getOperand(1);
8564     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8565       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8566       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8567       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8568
8569       SDLoc DL(N);
8570       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8571                          DAG.getBitcast(NVT, N0.getOperand(0)),
8572                          DAG.getConstant(Index, DL, IndexTy));
8573     }
8574   }
8575
8576   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8577   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8578     EVT SrcVT = N0.getValueType();
8579     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8580         TLI.isTruncateFree(SrcVT, VT)) {
8581       SDLoc SL(N0);
8582       SDValue Cond = N0.getOperand(0);
8583       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8584       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8585       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8586     }
8587   }
8588
8589   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8590   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8591       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8592       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8593     SDValue Amt = N0.getOperand(1);
8594     KnownBits Known;
8595     DAG.computeKnownBits(Amt, Known);
8596     unsigned Size = VT.getScalarSizeInBits();
8597     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
8598       SDLoc SL(N);
8599       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8600
8601       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8602       if (AmtVT != Amt.getValueType()) {
8603         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8604         AddToWorklist(Amt.getNode());
8605       }
8606       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8607     }
8608   }
8609
8610   // Fold a series of buildvector, bitcast, and truncate if possible.
8611   // For example fold
8612   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8613   //   (2xi32 (buildvector x, y)).
8614   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8615       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8616       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8617       N0.getOperand(0).hasOneUse()) {
8618     SDValue BuildVect = N0.getOperand(0);
8619     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8620     EVT TruncVecEltTy = VT.getVectorElementType();
8621
8622     // Check that the element types match.
8623     if (BuildVectEltTy == TruncVecEltTy) {
8624       // Now we only need to compute the offset of the truncated elements.
8625       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8626       unsigned TruncVecNumElts = VT.getVectorNumElements();
8627       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8628
8629       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8630              "Invalid number of elements");
8631
8632       SmallVector<SDValue, 8> Opnds;
8633       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8634         Opnds.push_back(BuildVect.getOperand(i));
8635
8636       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8637     }
8638   }
8639
8640   // See if we can simplify the input to this truncate through knowledge that
8641   // only the low bits are being used.
8642   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8643   // Currently we only perform this optimization on scalars because vectors
8644   // may have different active low bits.
8645   if (!VT.isVector()) {
8646     APInt Mask =
8647         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
8648     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
8649       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8650   }
8651
8652   // fold (truncate (load x)) -> (smaller load x)
8653   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8654   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8655     if (SDValue Reduced = ReduceLoadWidth(N))
8656       return Reduced;
8657
8658     // Handle the case where the load remains an extending load even
8659     // after truncation.
8660     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8661       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8662       if (!LN0->isVolatile() &&
8663           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8664         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8665                                          VT, LN0->getChain(), LN0->getBasePtr(),
8666                                          LN0->getMemoryVT(),
8667                                          LN0->getMemOperand());
8668         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8669         return NewLoad;
8670       }
8671     }
8672   }
8673
8674   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8675   // where ... are all 'undef'.
8676   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8677     SmallVector<EVT, 8> VTs;
8678     SDValue V;
8679     unsigned Idx = 0;
8680     unsigned NumDefs = 0;
8681
8682     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8683       SDValue X = N0.getOperand(i);
8684       if (!X.isUndef()) {
8685         V = X;
8686         Idx = i;
8687         NumDefs++;
8688       }
8689       // Stop if more than one members are non-undef.
8690       if (NumDefs > 1)
8691         break;
8692       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8693                                      VT.getVectorElementType(),
8694                                      X.getValueType().getVectorNumElements()));
8695     }
8696
8697     if (NumDefs == 0)
8698       return DAG.getUNDEF(VT);
8699
8700     if (NumDefs == 1) {
8701       assert(V.getNode() && "The single defined operand is empty!");
8702       SmallVector<SDValue, 8> Opnds;
8703       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8704         if (i != Idx) {
8705           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8706           continue;
8707         }
8708         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8709         AddToWorklist(NV.getNode());
8710         Opnds.push_back(NV);
8711       }
8712       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8713     }
8714   }
8715
8716   // Fold truncate of a bitcast of a vector to an extract of the low vector
8717   // element.
8718   //
8719   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
8720   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8721     SDValue VecSrc = N0.getOperand(0);
8722     EVT SrcVT = VecSrc.getValueType();
8723     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8724         (!LegalOperations ||
8725          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8726       SDLoc SL(N);
8727
8728       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8729       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
8730       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8731                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
8732     }
8733   }
8734
8735   // Simplify the operands using demanded-bits information.
8736   if (!VT.isVector() &&
8737       SimplifyDemandedBits(SDValue(N, 0)))
8738     return SDValue(N, 0);
8739
8740   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8741   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8742   // When the adde's carry is not used.
8743   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8744       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8745       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8746     SDLoc SL(N);
8747     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8748     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8749     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8750     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8751   }
8752
8753   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8754     return NewVSel;
8755
8756   return SDValue();
8757 }
8758
8759 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8760   SDValue Elt = N->getOperand(i);
8761   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8762     return Elt.getNode();
8763   return Elt.getOperand(Elt.getResNo()).getNode();
8764 }
8765
8766 /// build_pair (load, load) -> load
8767 /// if load locations are consecutive.
8768 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8769   assert(N->getOpcode() == ISD::BUILD_PAIR);
8770
8771   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8772   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8773
8774   // A BUILD_PAIR is always having the least significant part in elt 0 and the
8775   // most significant part in elt 1. So when combining into one large load, we
8776   // need to consider the endianness.
8777   if (DAG.getDataLayout().isBigEndian())
8778     std::swap(LD1, LD2);
8779
8780   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8781       LD1->getAddressSpace() != LD2->getAddressSpace())
8782     return SDValue();
8783   EVT LD1VT = LD1->getValueType(0);
8784   unsigned LD1Bytes = LD1VT.getStoreSize();
8785   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8786       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8787     unsigned Align = LD1->getAlignment();
8788     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8789         VT.getTypeForEVT(*DAG.getContext()));
8790
8791     if (NewAlign <= Align &&
8792         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8793       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8794                          LD1->getPointerInfo(), Align);
8795   }
8796
8797   return SDValue();
8798 }
8799
8800 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8801   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8802   // and Lo parts; on big-endian machines it doesn't.
8803   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8804 }
8805
8806 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8807                                     const TargetLowering &TLI) {
8808   // If this is not a bitcast to an FP type or if the target doesn't have
8809   // IEEE754-compliant FP logic, we're done.
8810   EVT VT = N->getValueType(0);
8811   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8812     return SDValue();
8813
8814   // TODO: Use splat values for the constant-checking below and remove this
8815   // restriction.
8816   SDValue N0 = N->getOperand(0);
8817   EVT SourceVT = N0.getValueType();
8818   if (SourceVT.isVector())
8819     return SDValue();
8820
8821   unsigned FPOpcode;
8822   APInt SignMask;
8823   switch (N0.getOpcode()) {
8824   case ISD::AND:
8825     FPOpcode = ISD::FABS;
8826     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8827     break;
8828   case ISD::XOR:
8829     FPOpcode = ISD::FNEG;
8830     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8831     break;
8832   // TODO: ISD::OR --> ISD::FNABS?
8833   default:
8834     return SDValue();
8835   }
8836
8837   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8838   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8839   SDValue LogicOp0 = N0.getOperand(0);
8840   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8841   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8842       LogicOp0.getOpcode() == ISD::BITCAST &&
8843       LogicOp0->getOperand(0).getValueType() == VT)
8844     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8845
8846   return SDValue();
8847 }
8848
8849 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8850   SDValue N0 = N->getOperand(0);
8851   EVT VT = N->getValueType(0);
8852
8853   if (N0.isUndef())
8854     return DAG.getUNDEF(VT);
8855
8856   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8857   // Only do this before legalize, since afterward the target may be depending
8858   // on the bitconvert.
8859   // First check to see if this is all constant.
8860   if (!LegalTypes &&
8861       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8862       VT.isVector()) {
8863     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8864
8865     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8866     assert(!DestEltVT.isVector() &&
8867            "Element type of vector ValueType must not be vector!");
8868     if (isSimple)
8869       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8870   }
8871
8872   // If the input is a constant, let getNode fold it.
8873   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8874     // If we can't allow illegal operations, we need to check that this is just
8875     // a fp -> int or int -> conversion and that the resulting operation will
8876     // be legal.
8877     if (!LegalOperations ||
8878         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8879          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8880         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8881          TLI.isOperationLegal(ISD::Constant, VT)))
8882       return DAG.getBitcast(VT, N0);
8883   }
8884
8885   // (conv (conv x, t1), t2) -> (conv x, t2)
8886   if (N0.getOpcode() == ISD::BITCAST)
8887     return DAG.getBitcast(VT, N0.getOperand(0));
8888
8889   // fold (conv (load x)) -> (load (conv*)x)
8890   // If the resultant load doesn't need a higher alignment than the original!
8891   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8892       // Do not change the width of a volatile load.
8893       !cast<LoadSDNode>(N0)->isVolatile() &&
8894       // Do not remove the cast if the types differ in endian layout.
8895       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8896           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8897       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8898       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8899     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8900     unsigned OrigAlign = LN0->getAlignment();
8901
8902     bool Fast = false;
8903     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8904                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8905         Fast) {
8906       SDValue Load =
8907           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8908                       LN0->getPointerInfo(), OrigAlign,
8909                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8910       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8911       return Load;
8912     }
8913   }
8914
8915   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8916     return V;
8917
8918   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8919   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8920   //
8921   // For ppc_fp128:
8922   // fold (bitcast (fneg x)) ->
8923   //     flipbit = signbit
8924   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8925   //
8926   // fold (bitcast (fabs x)) ->
8927   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8928   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8929   // This often reduces constant pool loads.
8930   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8931        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8932       N0.getNode()->hasOneUse() && VT.isInteger() &&
8933       !VT.isVector() && !N0.getValueType().isVector()) {
8934     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8935     AddToWorklist(NewConv.getNode());
8936
8937     SDLoc DL(N);
8938     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8939       assert(VT.getSizeInBits() == 128);
8940       SDValue SignBit = DAG.getConstant(
8941           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8942       SDValue FlipBit;
8943       if (N0.getOpcode() == ISD::FNEG) {
8944         FlipBit = SignBit;
8945         AddToWorklist(FlipBit.getNode());
8946       } else {
8947         assert(N0.getOpcode() == ISD::FABS);
8948         SDValue Hi =
8949             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8950                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8951                                               SDLoc(NewConv)));
8952         AddToWorklist(Hi.getNode());
8953         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8954         AddToWorklist(FlipBit.getNode());
8955       }
8956       SDValue FlipBits =
8957           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8958       AddToWorklist(FlipBits.getNode());
8959       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8960     }
8961     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
8962     if (N0.getOpcode() == ISD::FNEG)
8963       return DAG.getNode(ISD::XOR, DL, VT,
8964                          NewConv, DAG.getConstant(SignBit, DL, VT));
8965     assert(N0.getOpcode() == ISD::FABS);
8966     return DAG.getNode(ISD::AND, DL, VT,
8967                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8968   }
8969
8970   // fold (bitconvert (fcopysign cst, x)) ->
8971   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8972   // Note that we don't handle (copysign x, cst) because this can always be
8973   // folded to an fneg or fabs.
8974   //
8975   // For ppc_fp128:
8976   // fold (bitcast (fcopysign cst, x)) ->
8977   //     flipbit = (and (extract_element
8978   //                     (xor (bitcast cst), (bitcast x)), 0),
8979   //                    signbit)
8980   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8981   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8982       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8983       VT.isInteger() && !VT.isVector()) {
8984     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8985     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8986     if (isTypeLegal(IntXVT)) {
8987       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8988       AddToWorklist(X.getNode());
8989
8990       // If X has a different width than the result/lhs, sext it or truncate it.
8991       unsigned VTWidth = VT.getSizeInBits();
8992       if (OrigXWidth < VTWidth) {
8993         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8994         AddToWorklist(X.getNode());
8995       } else if (OrigXWidth > VTWidth) {
8996         // To get the sign bit in the right place, we have to shift it right
8997         // before truncating.
8998         SDLoc DL(X);
8999         X = DAG.getNode(ISD::SRL, DL,
9000                         X.getValueType(), X,
9001                         DAG.getConstant(OrigXWidth-VTWidth, DL,
9002                                         X.getValueType()));
9003         AddToWorklist(X.getNode());
9004         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9005         AddToWorklist(X.getNode());
9006       }
9007
9008       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9009         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9010         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9011         AddToWorklist(Cst.getNode());
9012         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9013         AddToWorklist(X.getNode());
9014         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9015         AddToWorklist(XorResult.getNode());
9016         SDValue XorResult64 = DAG.getNode(
9017             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
9018             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9019                                   SDLoc(XorResult)));
9020         AddToWorklist(XorResult64.getNode());
9021         SDValue FlipBit =
9022             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
9023                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
9024         AddToWorklist(FlipBit.getNode());
9025         SDValue FlipBits =
9026             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9027         AddToWorklist(FlipBits.getNode());
9028         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
9029       }
9030       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9031       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
9032                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
9033       AddToWorklist(X.getNode());
9034
9035       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9036       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
9037                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
9038       AddToWorklist(Cst.getNode());
9039
9040       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
9041     }
9042   }
9043
9044   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
9045   if (N0.getOpcode() == ISD::BUILD_PAIR)
9046     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
9047       return CombineLD;
9048
9049   // Remove double bitcasts from shuffles - this is often a legacy of
9050   // XformToShuffleWithZero being used to combine bitmaskings (of
9051   // float vectors bitcast to integer vectors) into shuffles.
9052   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
9053   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
9054       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
9055       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
9056       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
9057     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
9058
9059     // If operands are a bitcast, peek through if it casts the original VT.
9060     // If operands are a constant, just bitcast back to original VT.
9061     auto PeekThroughBitcast = [&](SDValue Op) {
9062       if (Op.getOpcode() == ISD::BITCAST &&
9063           Op.getOperand(0).getValueType() == VT)
9064         return SDValue(Op.getOperand(0));
9065       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
9066           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
9067         return DAG.getBitcast(VT, Op);
9068       return SDValue();
9069     };
9070
9071     // FIXME: If either input vector is bitcast, try to convert the shuffle to
9072     // the result type of this bitcast. This would eliminate at least one
9073     // bitcast. See the transform in InstCombine.
9074     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
9075     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
9076     if (!(SV0 && SV1))
9077       return SDValue();
9078
9079     int MaskScale =
9080         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
9081     SmallVector<int, 8> NewMask;
9082     for (int M : SVN->getMask())
9083       for (int i = 0; i != MaskScale; ++i)
9084         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
9085
9086     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9087     if (!LegalMask) {
9088       std::swap(SV0, SV1);
9089       ShuffleVectorSDNode::commuteMask(NewMask);
9090       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9091     }
9092
9093     if (LegalMask)
9094       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
9095   }
9096
9097   return SDValue();
9098 }
9099
9100 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
9101   EVT VT = N->getValueType(0);
9102   return CombineConsecutiveLoads(N, VT);
9103 }
9104
9105 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
9106 /// operands. DstEltVT indicates the destination element value type.
9107 SDValue DAGCombiner::
9108 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
9109   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
9110
9111   // If this is already the right type, we're done.
9112   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
9113
9114   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
9115   unsigned DstBitSize = DstEltVT.getSizeInBits();
9116
9117   // If this is a conversion of N elements of one type to N elements of another
9118   // type, convert each element.  This handles FP<->INT cases.
9119   if (SrcBitSize == DstBitSize) {
9120     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9121                               BV->getValueType(0).getVectorNumElements());
9122
9123     // Due to the FP element handling below calling this routine recursively,
9124     // we can end up with a scalar-to-vector node here.
9125     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
9126       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
9127                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
9128
9129     SmallVector<SDValue, 8> Ops;
9130     for (SDValue Op : BV->op_values()) {
9131       // If the vector element type is not legal, the BUILD_VECTOR operands
9132       // are promoted and implicitly truncated.  Make that explicit here.
9133       if (Op.getValueType() != SrcEltVT)
9134         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
9135       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
9136       AddToWorklist(Ops.back().getNode());
9137     }
9138     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
9139   }
9140
9141   // Otherwise, we're growing or shrinking the elements.  To avoid having to
9142   // handle annoying details of growing/shrinking FP values, we convert them to
9143   // int first.
9144   if (SrcEltVT.isFloatingPoint()) {
9145     // Convert the input float vector to a int vector where the elements are the
9146     // same sizes.
9147     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
9148     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
9149     SrcEltVT = IntVT;
9150   }
9151
9152   // Now we know the input is an integer vector.  If the output is a FP type,
9153   // convert to integer first, then to FP of the right size.
9154   if (DstEltVT.isFloatingPoint()) {
9155     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
9156     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
9157
9158     // Next, convert to FP elements of the same size.
9159     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
9160   }
9161
9162   SDLoc DL(BV);
9163
9164   // Okay, we know the src/dst types are both integers of differing types.
9165   // Handling growing first.
9166   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
9167   if (SrcBitSize < DstBitSize) {
9168     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
9169
9170     SmallVector<SDValue, 8> Ops;
9171     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
9172          i += NumInputsPerOutput) {
9173       bool isLE = DAG.getDataLayout().isLittleEndian();
9174       APInt NewBits = APInt(DstBitSize, 0);
9175       bool EltIsUndef = true;
9176       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
9177         // Shift the previously computed bits over.
9178         NewBits <<= SrcBitSize;
9179         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
9180         if (Op.isUndef()) continue;
9181         EltIsUndef = false;
9182
9183         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
9184                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
9185       }
9186
9187       if (EltIsUndef)
9188         Ops.push_back(DAG.getUNDEF(DstEltVT));
9189       else
9190         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
9191     }
9192
9193     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
9194     return DAG.getBuildVector(VT, DL, Ops);
9195   }
9196
9197   // Finally, this must be the case where we are shrinking elements: each input
9198   // turns into multiple outputs.
9199   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
9200   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9201                             NumOutputsPerInput*BV->getNumOperands());
9202   SmallVector<SDValue, 8> Ops;
9203
9204   for (const SDValue &Op : BV->op_values()) {
9205     if (Op.isUndef()) {
9206       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
9207       continue;
9208     }
9209
9210     APInt OpVal = cast<ConstantSDNode>(Op)->
9211                   getAPIntValue().zextOrTrunc(SrcBitSize);
9212
9213     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
9214       APInt ThisVal = OpVal.trunc(DstBitSize);
9215       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
9216       OpVal.lshrInPlace(DstBitSize);
9217     }
9218
9219     // For big endian targets, swap the order of the pieces of each element.
9220     if (DAG.getDataLayout().isBigEndian())
9221       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
9222   }
9223
9224   return DAG.getBuildVector(VT, DL, Ops);
9225 }
9226
9227 static bool isContractable(SDNode *N) {
9228   SDNodeFlags F = N->getFlags();
9229   return F.hasAllowContract() || F.hasUnsafeAlgebra();
9230 }
9231
9232 /// Try to perform FMA combining on a given FADD node.
9233 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
9234   SDValue N0 = N->getOperand(0);
9235   SDValue N1 = N->getOperand(1);
9236   EVT VT = N->getValueType(0);
9237   SDLoc SL(N);
9238
9239   const TargetOptions &Options = DAG.getTarget().Options;
9240
9241   // Floating-point multiply-add with intermediate rounding.
9242   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9243
9244   // Floating-point multiply-add without intermediate rounding.
9245   bool HasFMA =
9246       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9247       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9248
9249   // No valid opcode, do not combine.
9250   if (!HasFMAD && !HasFMA)
9251     return SDValue();
9252
9253   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9254                               Options.UnsafeFPMath || HasFMAD);
9255   // If the addition is not contractable, do not combine.
9256   if (!AllowFusionGlobally && !isContractable(N))
9257     return SDValue();
9258
9259   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9260   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9261     return SDValue();
9262
9263   // Always prefer FMAD to FMA for precision.
9264   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9265   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9266
9267   // Is the node an FMUL and contractable either due to global flags or
9268   // SDNodeFlags.
9269   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9270     if (N.getOpcode() != ISD::FMUL)
9271       return false;
9272     return AllowFusionGlobally || isContractable(N.getNode());
9273   };
9274   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
9275   // prefer to fold the multiply with fewer uses.
9276   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
9277     if (N0.getNode()->use_size() > N1.getNode()->use_size())
9278       std::swap(N0, N1);
9279   }
9280
9281   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9282   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9283     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9284                        N0.getOperand(0), N0.getOperand(1), N1);
9285   }
9286
9287   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9288   // Note: Commutes FADD operands.
9289   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9290     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9291                        N1.getOperand(0), N1.getOperand(1), N0);
9292   }
9293
9294   // Look through FP_EXTEND nodes to do more combining.
9295
9296   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9297   if (N0.getOpcode() == ISD::FP_EXTEND) {
9298     SDValue N00 = N0.getOperand(0);
9299     if (isContractableFMUL(N00) &&
9300         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9301       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9302                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9303                                      N00.getOperand(0)),
9304                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9305                                      N00.getOperand(1)), N1);
9306     }
9307   }
9308
9309   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9310   // Note: Commutes FADD operands.
9311   if (N1.getOpcode() == ISD::FP_EXTEND) {
9312     SDValue N10 = N1.getOperand(0);
9313     if (isContractableFMUL(N10) &&
9314         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9315       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9316                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9317                                      N10.getOperand(0)),
9318                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9319                                      N10.getOperand(1)), N0);
9320     }
9321   }
9322
9323   // More folding opportunities when target permits.
9324   if (Aggressive) {
9325     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9326     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9327     // are currently only supported on binary nodes.
9328     if (Options.UnsafeFPMath &&
9329         N0.getOpcode() == PreferredFusedOpcode &&
9330         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9331         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9332       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9333                          N0.getOperand(0), N0.getOperand(1),
9334                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9335                                      N0.getOperand(2).getOperand(0),
9336                                      N0.getOperand(2).getOperand(1),
9337                                      N1));
9338     }
9339
9340     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9341     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9342     // are currently only supported on binary nodes.
9343     if (Options.UnsafeFPMath &&
9344         N1->getOpcode() == PreferredFusedOpcode &&
9345         N1.getOperand(2).getOpcode() == ISD::FMUL &&
9346         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9347       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9348                          N1.getOperand(0), N1.getOperand(1),
9349                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9350                                      N1.getOperand(2).getOperand(0),
9351                                      N1.getOperand(2).getOperand(1),
9352                                      N0));
9353     }
9354
9355
9356     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9357     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9358     auto FoldFAddFMAFPExtFMul = [&] (
9359       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9360       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9361                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9362                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9363                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9364                                      Z));
9365     };
9366     if (N0.getOpcode() == PreferredFusedOpcode) {
9367       SDValue N02 = N0.getOperand(2);
9368       if (N02.getOpcode() == ISD::FP_EXTEND) {
9369         SDValue N020 = N02.getOperand(0);
9370         if (isContractableFMUL(N020) &&
9371             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
9372           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9373                                       N020.getOperand(0), N020.getOperand(1),
9374                                       N1);
9375         }
9376       }
9377     }
9378
9379     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9380     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9381     // FIXME: This turns two single-precision and one double-precision
9382     // operation into two double-precision operations, which might not be
9383     // interesting for all targets, especially GPUs.
9384     auto FoldFAddFPExtFMAFMul = [&] (
9385       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9386       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9387                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9388                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9389                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9390                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9391                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9392                                      Z));
9393     };
9394     if (N0.getOpcode() == ISD::FP_EXTEND) {
9395       SDValue N00 = N0.getOperand(0);
9396       if (N00.getOpcode() == PreferredFusedOpcode) {
9397         SDValue N002 = N00.getOperand(2);
9398         if (isContractableFMUL(N002) &&
9399             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9400           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9401                                       N002.getOperand(0), N002.getOperand(1),
9402                                       N1);
9403         }
9404       }
9405     }
9406
9407     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9408     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9409     if (N1.getOpcode() == PreferredFusedOpcode) {
9410       SDValue N12 = N1.getOperand(2);
9411       if (N12.getOpcode() == ISD::FP_EXTEND) {
9412         SDValue N120 = N12.getOperand(0);
9413         if (isContractableFMUL(N120) &&
9414             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
9415           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9416                                       N120.getOperand(0), N120.getOperand(1),
9417                                       N0);
9418         }
9419       }
9420     }
9421
9422     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9423     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9424     // FIXME: This turns two single-precision and one double-precision
9425     // operation into two double-precision operations, which might not be
9426     // interesting for all targets, especially GPUs.
9427     if (N1.getOpcode() == ISD::FP_EXTEND) {
9428       SDValue N10 = N1.getOperand(0);
9429       if (N10.getOpcode() == PreferredFusedOpcode) {
9430         SDValue N102 = N10.getOperand(2);
9431         if (isContractableFMUL(N102) &&
9432             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9433           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9434                                       N102.getOperand(0), N102.getOperand(1),
9435                                       N0);
9436         }
9437       }
9438     }
9439   }
9440
9441   return SDValue();
9442 }
9443
9444 /// Try to perform FMA combining on a given FSUB node.
9445 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9446   SDValue N0 = N->getOperand(0);
9447   SDValue N1 = N->getOperand(1);
9448   EVT VT = N->getValueType(0);
9449   SDLoc SL(N);
9450
9451   const TargetOptions &Options = DAG.getTarget().Options;
9452   // Floating-point multiply-add with intermediate rounding.
9453   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9454
9455   // Floating-point multiply-add without intermediate rounding.
9456   bool HasFMA =
9457       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9458       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9459
9460   // No valid opcode, do not combine.
9461   if (!HasFMAD && !HasFMA)
9462     return SDValue();
9463
9464   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9465                               Options.UnsafeFPMath || HasFMAD);
9466   // If the subtraction is not contractable, do not combine.
9467   if (!AllowFusionGlobally && !isContractable(N))
9468     return SDValue();
9469
9470   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9471   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9472     return SDValue();
9473
9474   // Always prefer FMAD to FMA for precision.
9475   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9476   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9477
9478   // Is the node an FMUL and contractable either due to global flags or
9479   // SDNodeFlags.
9480   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9481     if (N.getOpcode() != ISD::FMUL)
9482       return false;
9483     return AllowFusionGlobally || isContractable(N.getNode());
9484   };
9485
9486   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9487   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9488     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9489                        N0.getOperand(0), N0.getOperand(1),
9490                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9491   }
9492
9493   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9494   // Note: Commutes FSUB operands.
9495   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9496     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9497                        DAG.getNode(ISD::FNEG, SL, VT,
9498                                    N1.getOperand(0)),
9499                        N1.getOperand(1), N0);
9500
9501   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9502   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9503       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9504     SDValue N00 = N0.getOperand(0).getOperand(0);
9505     SDValue N01 = N0.getOperand(0).getOperand(1);
9506     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9507                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9508                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9509   }
9510
9511   // Look through FP_EXTEND nodes to do more combining.
9512
9513   // fold (fsub (fpext (fmul x, y)), z)
9514   //   -> (fma (fpext x), (fpext y), (fneg z))
9515   if (N0.getOpcode() == ISD::FP_EXTEND) {
9516     SDValue N00 = N0.getOperand(0);
9517     if (isContractableFMUL(N00) &&
9518         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9519       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9520                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9521                                      N00.getOperand(0)),
9522                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9523                                      N00.getOperand(1)),
9524                          DAG.getNode(ISD::FNEG, SL, VT, N1));
9525     }
9526   }
9527
9528   // fold (fsub x, (fpext (fmul y, z)))
9529   //   -> (fma (fneg (fpext y)), (fpext z), x)
9530   // Note: Commutes FSUB operands.
9531   if (N1.getOpcode() == ISD::FP_EXTEND) {
9532     SDValue N10 = N1.getOperand(0);
9533     if (isContractableFMUL(N10) &&
9534         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9535       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9536                          DAG.getNode(ISD::FNEG, SL, VT,
9537                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
9538                                                  N10.getOperand(0))),
9539                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9540                                      N10.getOperand(1)),
9541                          N0);
9542     }
9543   }
9544
9545   // fold (fsub (fpext (fneg (fmul, x, y))), z)
9546   //   -> (fneg (fma (fpext x), (fpext y), z))
9547   // Note: This could be removed with appropriate canonicalization of the
9548   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9549   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9550   // from implementing the canonicalization in visitFSUB.
9551   if (N0.getOpcode() == ISD::FP_EXTEND) {
9552     SDValue N00 = N0.getOperand(0);
9553     if (N00.getOpcode() == ISD::FNEG) {
9554       SDValue N000 = N00.getOperand(0);
9555       if (isContractableFMUL(N000) &&
9556           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9557         return DAG.getNode(ISD::FNEG, SL, VT,
9558                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9559                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9560                                                    N000.getOperand(0)),
9561                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9562                                                    N000.getOperand(1)),
9563                                        N1));
9564       }
9565     }
9566   }
9567
9568   // fold (fsub (fneg (fpext (fmul, x, y))), z)
9569   //   -> (fneg (fma (fpext x)), (fpext y), z)
9570   // Note: This could be removed with appropriate canonicalization of the
9571   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9572   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9573   // from implementing the canonicalization in visitFSUB.
9574   if (N0.getOpcode() == ISD::FNEG) {
9575     SDValue N00 = N0.getOperand(0);
9576     if (N00.getOpcode() == ISD::FP_EXTEND) {
9577       SDValue N000 = N00.getOperand(0);
9578       if (isContractableFMUL(N000) &&
9579           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
9580         return DAG.getNode(ISD::FNEG, SL, VT,
9581                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9582                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9583                                                    N000.getOperand(0)),
9584                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9585                                                    N000.getOperand(1)),
9586                                        N1));
9587       }
9588     }
9589   }
9590
9591   // More folding opportunities when target permits.
9592   if (Aggressive) {
9593     // fold (fsub (fma x, y, (fmul u, v)), z)
9594     //   -> (fma x, y (fma u, v, (fneg z)))
9595     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9596     // are currently only supported on binary nodes.
9597     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9598         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9599         N0.getOperand(2)->hasOneUse()) {
9600       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9601                          N0.getOperand(0), N0.getOperand(1),
9602                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9603                                      N0.getOperand(2).getOperand(0),
9604                                      N0.getOperand(2).getOperand(1),
9605                                      DAG.getNode(ISD::FNEG, SL, VT,
9606                                                  N1)));
9607     }
9608
9609     // fold (fsub x, (fma y, z, (fmul u, v)))
9610     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9611     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9612     // are currently only supported on binary nodes.
9613     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9614         isContractableFMUL(N1.getOperand(2))) {
9615       SDValue N20 = N1.getOperand(2).getOperand(0);
9616       SDValue N21 = N1.getOperand(2).getOperand(1);
9617       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9618                          DAG.getNode(ISD::FNEG, SL, VT,
9619                                      N1.getOperand(0)),
9620                          N1.getOperand(1),
9621                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9622                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9623
9624                                      N21, N0));
9625     }
9626
9627
9628     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9629     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9630     if (N0.getOpcode() == PreferredFusedOpcode) {
9631       SDValue N02 = N0.getOperand(2);
9632       if (N02.getOpcode() == ISD::FP_EXTEND) {
9633         SDValue N020 = N02.getOperand(0);
9634         if (isContractableFMUL(N020) &&
9635             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
9636           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9637                              N0.getOperand(0), N0.getOperand(1),
9638                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9639                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9640                                                      N020.getOperand(0)),
9641                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9642                                                      N020.getOperand(1)),
9643                                          DAG.getNode(ISD::FNEG, SL, VT,
9644                                                      N1)));
9645         }
9646       }
9647     }
9648
9649     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9650     //   -> (fma (fpext x), (fpext y),
9651     //           (fma (fpext u), (fpext v), (fneg z)))
9652     // FIXME: This turns two single-precision and one double-precision
9653     // operation into two double-precision operations, which might not be
9654     // interesting for all targets, especially GPUs.
9655     if (N0.getOpcode() == ISD::FP_EXTEND) {
9656       SDValue N00 = N0.getOperand(0);
9657       if (N00.getOpcode() == PreferredFusedOpcode) {
9658         SDValue N002 = N00.getOperand(2);
9659         if (isContractableFMUL(N002) &&
9660             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9661           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9662                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
9663                                          N00.getOperand(0)),
9664                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
9665                                          N00.getOperand(1)),
9666                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9667                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9668                                                      N002.getOperand(0)),
9669                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9670                                                      N002.getOperand(1)),
9671                                          DAG.getNode(ISD::FNEG, SL, VT,
9672                                                      N1)));
9673         }
9674       }
9675     }
9676
9677     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9678     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9679     if (N1.getOpcode() == PreferredFusedOpcode &&
9680         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9681       SDValue N120 = N1.getOperand(2).getOperand(0);
9682       if (isContractableFMUL(N120) &&
9683           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
9684         SDValue N1200 = N120.getOperand(0);
9685         SDValue N1201 = N120.getOperand(1);
9686         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9687                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9688                            N1.getOperand(1),
9689                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9690                                        DAG.getNode(ISD::FNEG, SL, VT,
9691                                                    DAG.getNode(ISD::FP_EXTEND, SL,
9692                                                                VT, N1200)),
9693                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9694                                                    N1201),
9695                                        N0));
9696       }
9697     }
9698
9699     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9700     //   -> (fma (fneg (fpext y)), (fpext z),
9701     //           (fma (fneg (fpext u)), (fpext v), x))
9702     // FIXME: This turns two single-precision and one double-precision
9703     // operation into two double-precision operations, which might not be
9704     // interesting for all targets, especially GPUs.
9705     if (N1.getOpcode() == ISD::FP_EXTEND &&
9706         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9707       SDValue CvtSrc = N1.getOperand(0);
9708       SDValue N100 = CvtSrc.getOperand(0);
9709       SDValue N101 = CvtSrc.getOperand(1);
9710       SDValue N102 = CvtSrc.getOperand(2);
9711       if (isContractableFMUL(N102) &&
9712           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
9713         SDValue N1020 = N102.getOperand(0);
9714         SDValue N1021 = N102.getOperand(1);
9715         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9716                            DAG.getNode(ISD::FNEG, SL, VT,
9717                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9718                                                    N100)),
9719                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9720                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9721                                        DAG.getNode(ISD::FNEG, SL, VT,
9722                                                    DAG.getNode(ISD::FP_EXTEND, SL,
9723                                                                VT, N1020)),
9724                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9725                                                    N1021),
9726                                        N0));
9727       }
9728     }
9729   }
9730
9731   return SDValue();
9732 }
9733
9734 /// Try to perform FMA combining on a given FMUL node based on the distributive
9735 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9736 /// subtraction instead of addition).
9737 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9738   SDValue N0 = N->getOperand(0);
9739   SDValue N1 = N->getOperand(1);
9740   EVT VT = N->getValueType(0);
9741   SDLoc SL(N);
9742
9743   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9744
9745   const TargetOptions &Options = DAG.getTarget().Options;
9746
9747   // The transforms below are incorrect when x == 0 and y == inf, because the
9748   // intermediate multiplication produces a nan.
9749   if (!Options.NoInfsFPMath)
9750     return SDValue();
9751
9752   // Floating-point multiply-add without intermediate rounding.
9753   bool HasFMA =
9754       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9755       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9756       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9757
9758   // Floating-point multiply-add with intermediate rounding. This can result
9759   // in a less precise result due to the changed rounding order.
9760   bool HasFMAD = Options.UnsafeFPMath &&
9761                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9762
9763   // No valid opcode, do not combine.
9764   if (!HasFMAD && !HasFMA)
9765     return SDValue();
9766
9767   // Always prefer FMAD to FMA for precision.
9768   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9769   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9770
9771   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9772   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9773   auto FuseFADD = [&](SDValue X, SDValue Y) {
9774     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9775       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9776       if (XC1 && XC1->isExactlyValue(+1.0))
9777         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9778       if (XC1 && XC1->isExactlyValue(-1.0))
9779         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9780                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9781     }
9782     return SDValue();
9783   };
9784
9785   if (SDValue FMA = FuseFADD(N0, N1))
9786     return FMA;
9787   if (SDValue FMA = FuseFADD(N1, N0))
9788     return FMA;
9789
9790   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9791   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9792   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9793   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9794   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9795     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9796       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9797       if (XC0 && XC0->isExactlyValue(+1.0))
9798         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9799                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9800                            Y);
9801       if (XC0 && XC0->isExactlyValue(-1.0))
9802         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9803                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9804                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9805
9806       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9807       if (XC1 && XC1->isExactlyValue(+1.0))
9808         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9809                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9810       if (XC1 && XC1->isExactlyValue(-1.0))
9811         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9812     }
9813     return SDValue();
9814   };
9815
9816   if (SDValue FMA = FuseFSUB(N0, N1))
9817     return FMA;
9818   if (SDValue FMA = FuseFSUB(N1, N0))
9819     return FMA;
9820
9821   return SDValue();
9822 }
9823
9824 static bool isFMulNegTwo(SDValue &N) {
9825   if (N.getOpcode() != ISD::FMUL)
9826     return false;
9827   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9828     return CFP->isExactlyValue(-2.0);
9829   return false;
9830 }
9831
9832 SDValue DAGCombiner::visitFADD(SDNode *N) {
9833   SDValue N0 = N->getOperand(0);
9834   SDValue N1 = N->getOperand(1);
9835   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9836   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9837   EVT VT = N->getValueType(0);
9838   SDLoc DL(N);
9839   const TargetOptions &Options = DAG.getTarget().Options;
9840   const SDNodeFlags Flags = N->getFlags();
9841
9842   // fold vector ops
9843   if (VT.isVector())
9844     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9845       return FoldedVOp;
9846
9847   // fold (fadd c1, c2) -> c1 + c2
9848   if (N0CFP && N1CFP)
9849     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9850
9851   // canonicalize constant to RHS
9852   if (N0CFP && !N1CFP)
9853     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9854
9855   if (SDValue NewSel = foldBinOpIntoSelect(N))
9856     return NewSel;
9857
9858   // fold (fadd A, (fneg B)) -> (fsub A, B)
9859   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9860       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9861     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9862                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9863
9864   // fold (fadd (fneg A), B) -> (fsub B, A)
9865   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9866       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9867     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9868                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9869
9870   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9871   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9872   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9873       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9874     bool N1IsFMul = isFMulNegTwo(N1);
9875     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9876     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9877     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9878   }
9879
9880   // FIXME: Auto-upgrade the target/function-level option.
9881   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9882     // fold (fadd A, 0) -> A
9883     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9884       if (N1C->isZero())
9885         return N0;
9886   }
9887
9888   // If 'unsafe math' is enabled, fold lots of things.
9889   if (Options.UnsafeFPMath) {
9890     // No FP constant should be created after legalization as Instruction
9891     // Selection pass has a hard time dealing with FP constants.
9892     bool AllowNewConst = (Level < AfterLegalizeDAG);
9893
9894     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9895     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9896         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9897       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9898                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9899                                      Flags),
9900                          Flags);
9901
9902     // If allowed, fold (fadd (fneg x), x) -> 0.0
9903     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9904       return DAG.getConstantFP(0.0, DL, VT);
9905
9906     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9907     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9908       return DAG.getConstantFP(0.0, DL, VT);
9909
9910     // We can fold chains of FADD's of the same value into multiplications.
9911     // This transform is not safe in general because we are reducing the number
9912     // of rounding steps.
9913     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9914       if (N0.getOpcode() == ISD::FMUL) {
9915         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9916         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9917
9918         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9919         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9920           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9921                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9922           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9923         }
9924
9925         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9926         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9927             N1.getOperand(0) == N1.getOperand(1) &&
9928             N0.getOperand(0) == N1.getOperand(0)) {
9929           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9930                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9931           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9932         }
9933       }
9934
9935       if (N1.getOpcode() == ISD::FMUL) {
9936         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9937         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9938
9939         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9940         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9941           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9942                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9943           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9944         }
9945
9946         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9947         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9948             N0.getOperand(0) == N0.getOperand(1) &&
9949             N1.getOperand(0) == N0.getOperand(0)) {
9950           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9951                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9952           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9953         }
9954       }
9955
9956       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9957         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9958         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9959         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9960             (N0.getOperand(0) == N1)) {
9961           return DAG.getNode(ISD::FMUL, DL, VT,
9962                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9963         }
9964       }
9965
9966       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9967         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9968         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9969         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9970             N1.getOperand(0) == N0) {
9971           return DAG.getNode(ISD::FMUL, DL, VT,
9972                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9973         }
9974       }
9975
9976       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9977       if (AllowNewConst &&
9978           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9979           N0.getOperand(0) == N0.getOperand(1) &&
9980           N1.getOperand(0) == N1.getOperand(1) &&
9981           N0.getOperand(0) == N1.getOperand(0)) {
9982         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9983                            DAG.getConstantFP(4.0, DL, VT), Flags);
9984       }
9985     }
9986   } // enable-unsafe-fp-math
9987
9988   // FADD -> FMA combines:
9989   if (SDValue Fused = visitFADDForFMACombine(N)) {
9990     AddToWorklist(Fused.getNode());
9991     return Fused;
9992   }
9993   return SDValue();
9994 }
9995
9996 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9997   SDValue N0 = N->getOperand(0);
9998   SDValue N1 = N->getOperand(1);
9999   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10000   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10001   EVT VT = N->getValueType(0);
10002   SDLoc DL(N);
10003   const TargetOptions &Options = DAG.getTarget().Options;
10004   const SDNodeFlags Flags = N->getFlags();
10005
10006   // fold vector ops
10007   if (VT.isVector())
10008     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10009       return FoldedVOp;
10010
10011   // fold (fsub c1, c2) -> c1-c2
10012   if (N0CFP && N1CFP)
10013     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10014
10015   if (SDValue NewSel = foldBinOpIntoSelect(N))
10016     return NewSel;
10017
10018   // fold (fsub A, (fneg B)) -> (fadd A, B)
10019   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10020     return DAG.getNode(ISD::FADD, DL, VT, N0,
10021                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10022
10023   // FIXME: Auto-upgrade the target/function-level option.
10024   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
10025     // (fsub 0, B) -> -B
10026     if (N0CFP && N0CFP->isZero()) {
10027       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10028         return GetNegatedExpression(N1, DAG, LegalOperations);
10029       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10030         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
10031     }
10032   }
10033
10034   // If 'unsafe math' is enabled, fold lots of things.
10035   if (Options.UnsafeFPMath) {
10036     // (fsub A, 0) -> A
10037     if (N1CFP && N1CFP->isZero())
10038       return N0;
10039
10040     // (fsub x, x) -> 0.0
10041     if (N0 == N1)
10042       return DAG.getConstantFP(0.0f, DL, VT);
10043
10044     // (fsub x, (fadd x, y)) -> (fneg y)
10045     // (fsub x, (fadd y, x)) -> (fneg y)
10046     if (N1.getOpcode() == ISD::FADD) {
10047       SDValue N10 = N1->getOperand(0);
10048       SDValue N11 = N1->getOperand(1);
10049
10050       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
10051         return GetNegatedExpression(N11, DAG, LegalOperations);
10052
10053       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
10054         return GetNegatedExpression(N10, DAG, LegalOperations);
10055     }
10056   }
10057
10058   // FSUB -> FMA combines:
10059   if (SDValue Fused = visitFSUBForFMACombine(N)) {
10060     AddToWorklist(Fused.getNode());
10061     return Fused;
10062   }
10063
10064   return SDValue();
10065 }
10066
10067 SDValue DAGCombiner::visitFMUL(SDNode *N) {
10068   SDValue N0 = N->getOperand(0);
10069   SDValue N1 = N->getOperand(1);
10070   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10071   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10072   EVT VT = N->getValueType(0);
10073   SDLoc DL(N);
10074   const TargetOptions &Options = DAG.getTarget().Options;
10075   const SDNodeFlags Flags = N->getFlags();
10076
10077   // fold vector ops
10078   if (VT.isVector()) {
10079     // This just handles C1 * C2 for vectors. Other vector folds are below.
10080     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10081       return FoldedVOp;
10082   }
10083
10084   // fold (fmul c1, c2) -> c1*c2
10085   if (N0CFP && N1CFP)
10086     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
10087
10088   // canonicalize constant to RHS
10089   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10090      !isConstantFPBuildVectorOrConstantFP(N1))
10091     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
10092
10093   // fold (fmul A, 1.0) -> A
10094   if (N1CFP && N1CFP->isExactlyValue(1.0))
10095     return N0;
10096
10097   if (SDValue NewSel = foldBinOpIntoSelect(N))
10098     return NewSel;
10099
10100   if (Options.UnsafeFPMath) {
10101     // fold (fmul A, 0) -> 0
10102     if (N1CFP && N1CFP->isZero())
10103       return N1;
10104
10105     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
10106     if (N0.getOpcode() == ISD::FMUL) {
10107       // Fold scalars or any vector constants (not just splats).
10108       // This fold is done in general by InstCombine, but extra fmul insts
10109       // may have been generated during lowering.
10110       SDValue N00 = N0.getOperand(0);
10111       SDValue N01 = N0.getOperand(1);
10112       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
10113       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
10114       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
10115
10116       // Check 1: Make sure that the first operand of the inner multiply is NOT
10117       // a constant. Otherwise, we may induce infinite looping.
10118       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
10119         // Check 2: Make sure that the second operand of the inner multiply and
10120         // the second operand of the outer multiply are constants.
10121         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
10122             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
10123           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
10124           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
10125         }
10126       }
10127     }
10128
10129     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
10130     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
10131     // during an early run of DAGCombiner can prevent folding with fmuls
10132     // inserted during lowering.
10133     if (N0.getOpcode() == ISD::FADD &&
10134         (N0.getOperand(0) == N0.getOperand(1)) &&
10135         N0.hasOneUse()) {
10136       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
10137       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
10138       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
10139     }
10140   }
10141
10142   // fold (fmul X, 2.0) -> (fadd X, X)
10143   if (N1CFP && N1CFP->isExactlyValue(+2.0))
10144     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
10145
10146   // fold (fmul X, -1.0) -> (fneg X)
10147   if (N1CFP && N1CFP->isExactlyValue(-1.0))
10148     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10149       return DAG.getNode(ISD::FNEG, DL, VT, N0);
10150
10151   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
10152   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10153     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10154       // Both can be negated for free, check to see if at least one is cheaper
10155       // negated.
10156       if (LHSNeg == 2 || RHSNeg == 2)
10157         return DAG.getNode(ISD::FMUL, DL, VT,
10158                            GetNegatedExpression(N0, DAG, LegalOperations),
10159                            GetNegatedExpression(N1, DAG, LegalOperations),
10160                            Flags);
10161     }
10162   }
10163
10164   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
10165   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
10166   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
10167       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
10168       TLI.isOperationLegal(ISD::FABS, VT)) {
10169     SDValue Select = N0, X = N1;
10170     if (Select.getOpcode() != ISD::SELECT)
10171       std::swap(Select, X);
10172
10173     SDValue Cond = Select.getOperand(0);
10174     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
10175     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
10176
10177     if (TrueOpnd && FalseOpnd &&
10178         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
10179         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
10180         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
10181       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10182       switch (CC) {
10183       default: break;
10184       case ISD::SETOLT:
10185       case ISD::SETULT:
10186       case ISD::SETOLE:
10187       case ISD::SETULE:
10188       case ISD::SETLT:
10189       case ISD::SETLE:
10190         std::swap(TrueOpnd, FalseOpnd);
10191         LLVM_FALLTHROUGH;
10192       case ISD::SETOGT:
10193       case ISD::SETUGT:
10194       case ISD::SETOGE:
10195       case ISD::SETUGE:
10196       case ISD::SETGT:
10197       case ISD::SETGE:
10198         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
10199             TLI.isOperationLegal(ISD::FNEG, VT))
10200           return DAG.getNode(ISD::FNEG, DL, VT,
10201                    DAG.getNode(ISD::FABS, DL, VT, X));
10202         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
10203           return DAG.getNode(ISD::FABS, DL, VT, X);
10204
10205         break;
10206       }
10207     }
10208   }
10209
10210   // FMUL -> FMA combines:
10211   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
10212     AddToWorklist(Fused.getNode());
10213     return Fused;
10214   }
10215
10216   return SDValue();
10217 }
10218
10219 SDValue DAGCombiner::visitFMA(SDNode *N) {
10220   SDValue N0 = N->getOperand(0);
10221   SDValue N1 = N->getOperand(1);
10222   SDValue N2 = N->getOperand(2);
10223   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10224   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10225   EVT VT = N->getValueType(0);
10226   SDLoc DL(N);
10227   const TargetOptions &Options = DAG.getTarget().Options;
10228
10229   // Constant fold FMA.
10230   if (isa<ConstantFPSDNode>(N0) &&
10231       isa<ConstantFPSDNode>(N1) &&
10232       isa<ConstantFPSDNode>(N2)) {
10233     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
10234   }
10235
10236   if (Options.UnsafeFPMath) {
10237     if (N0CFP && N0CFP->isZero())
10238       return N2;
10239     if (N1CFP && N1CFP->isZero())
10240       return N2;
10241   }
10242   // TODO: The FMA node should have flags that propagate to these nodes.
10243   if (N0CFP && N0CFP->isExactlyValue(1.0))
10244     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
10245   if (N1CFP && N1CFP->isExactlyValue(1.0))
10246     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
10247
10248   // Canonicalize (fma c, x, y) -> (fma x, c, y)
10249   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10250      !isConstantFPBuildVectorOrConstantFP(N1))
10251     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
10252
10253   // TODO: FMA nodes should have flags that propagate to the created nodes.
10254   // For now, create a Flags object for use with all unsafe math transforms.
10255   SDNodeFlags Flags;
10256   Flags.setUnsafeAlgebra(true);
10257
10258   if (Options.UnsafeFPMath) {
10259     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
10260     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
10261         isConstantFPBuildVectorOrConstantFP(N1) &&
10262         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
10263       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10264                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
10265                                      Flags), Flags);
10266     }
10267
10268     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
10269     if (N0.getOpcode() == ISD::FMUL &&
10270         isConstantFPBuildVectorOrConstantFP(N1) &&
10271         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10272       return DAG.getNode(ISD::FMA, DL, VT,
10273                          N0.getOperand(0),
10274                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
10275                                      Flags),
10276                          N2);
10277     }
10278   }
10279
10280   // (fma x, 1, y) -> (fadd x, y)
10281   // (fma x, -1, y) -> (fadd (fneg x), y)
10282   if (N1CFP) {
10283     if (N1CFP->isExactlyValue(1.0))
10284       // TODO: The FMA node should have flags that propagate to this node.
10285       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
10286
10287     if (N1CFP->isExactlyValue(-1.0) &&
10288         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
10289       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
10290       AddToWorklist(RHSNeg.getNode());
10291       // TODO: The FMA node should have flags that propagate to this node.
10292       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
10293     }
10294
10295     // fma (fneg x), K, y -> fma x -K, y
10296     if (N0.getOpcode() == ISD::FNEG &&
10297         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10298          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
10299       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
10300                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
10301     }
10302   }
10303
10304   if (Options.UnsafeFPMath) {
10305     // (fma x, c, x) -> (fmul x, (c+1))
10306     if (N1CFP && N0 == N2) {
10307       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10308                          DAG.getNode(ISD::FADD, DL, VT, N1,
10309                                      DAG.getConstantFP(1.0, DL, VT), Flags),
10310                          Flags);
10311     }
10312
10313     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10314     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
10315       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10316                          DAG.getNode(ISD::FADD, DL, VT, N1,
10317                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
10318                          Flags);
10319     }
10320   }
10321
10322   return SDValue();
10323 }
10324
10325 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10326 // reciprocal.
10327 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10328 // Notice that this is not always beneficial. One reason is different targets
10329 // may have different costs for FDIV and FMUL, so sometimes the cost of two
10330 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10331 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10332 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10333   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10334   const SDNodeFlags Flags = N->getFlags();
10335   if (!UnsafeMath && !Flags.hasAllowReciprocal())
10336     return SDValue();
10337
10338   // Skip if current node is a reciprocal.
10339   SDValue N0 = N->getOperand(0);
10340   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10341   if (N0CFP && N0CFP->isExactlyValue(1.0))
10342     return SDValue();
10343
10344   // Exit early if the target does not want this transform or if there can't
10345   // possibly be enough uses of the divisor to make the transform worthwhile.
10346   SDValue N1 = N->getOperand(1);
10347   unsigned MinUses = TLI.combineRepeatedFPDivisors();
10348   if (!MinUses || N1->use_size() < MinUses)
10349     return SDValue();
10350
10351   // Find all FDIV users of the same divisor.
10352   // Use a set because duplicates may be present in the user list.
10353   SetVector<SDNode *> Users;
10354   for (auto *U : N1->uses()) {
10355     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
10356       // This division is eligible for optimization only if global unsafe math
10357       // is enabled or if this division allows reciprocal formation.
10358       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
10359         Users.insert(U);
10360     }
10361   }
10362
10363   // Now that we have the actual number of divisor uses, make sure it meets
10364   // the minimum threshold specified by the target.
10365   if (Users.size() < MinUses)
10366     return SDValue();
10367
10368   EVT VT = N->getValueType(0);
10369   SDLoc DL(N);
10370   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10371   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10372
10373   // Dividend / Divisor -> Dividend * Reciprocal
10374   for (auto *U : Users) {
10375     SDValue Dividend = U->getOperand(0);
10376     if (Dividend != FPOne) {
10377       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10378                                     Reciprocal, Flags);
10379       CombineTo(U, NewNode);
10380     } else if (U != Reciprocal.getNode()) {
10381       // In the absence of fast-math-flags, this user node is always the
10382       // same node as Reciprocal, but with FMF they may be different nodes.
10383       CombineTo(U, Reciprocal);
10384     }
10385   }
10386   return SDValue(N, 0);  // N was replaced.
10387 }
10388
10389 SDValue DAGCombiner::visitFDIV(SDNode *N) {
10390   SDValue N0 = N->getOperand(0);
10391   SDValue N1 = N->getOperand(1);
10392   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10393   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10394   EVT VT = N->getValueType(0);
10395   SDLoc DL(N);
10396   const TargetOptions &Options = DAG.getTarget().Options;
10397   SDNodeFlags Flags = N->getFlags();
10398
10399   // fold vector ops
10400   if (VT.isVector())
10401     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10402       return FoldedVOp;
10403
10404   // fold (fdiv c1, c2) -> c1/c2
10405   if (N0CFP && N1CFP)
10406     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10407
10408   if (SDValue NewSel = foldBinOpIntoSelect(N))
10409     return NewSel;
10410
10411   if (Options.UnsafeFPMath) {
10412     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10413     if (N1CFP) {
10414       // Compute the reciprocal 1.0 / c2.
10415       const APFloat &N1APF = N1CFP->getValueAPF();
10416       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10417       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10418       // Only do the transform if the reciprocal is a legal fp immediate that
10419       // isn't too nasty (eg NaN, denormal, ...).
10420       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10421           (!LegalOperations ||
10422            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10423            // backend)... we should handle this gracefully after Legalize.
10424            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
10425            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10426            TLI.isFPImmLegal(Recip, VT)))
10427         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10428                            DAG.getConstantFP(Recip, DL, VT), Flags);
10429     }
10430
10431     // If this FDIV is part of a reciprocal square root, it may be folded
10432     // into a target-specific square root estimate instruction.
10433     if (N1.getOpcode() == ISD::FSQRT) {
10434       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10435         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10436       }
10437     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10438                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10439       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10440                                           Flags)) {
10441         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10442         AddToWorklist(RV.getNode());
10443         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10444       }
10445     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10446                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10447       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10448                                           Flags)) {
10449         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10450         AddToWorklist(RV.getNode());
10451         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10452       }
10453     } else if (N1.getOpcode() == ISD::FMUL) {
10454       // Look through an FMUL. Even though this won't remove the FDIV directly,
10455       // it's still worthwhile to get rid of the FSQRT if possible.
10456       SDValue SqrtOp;
10457       SDValue OtherOp;
10458       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10459         SqrtOp = N1.getOperand(0);
10460         OtherOp = N1.getOperand(1);
10461       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10462         SqrtOp = N1.getOperand(1);
10463         OtherOp = N1.getOperand(0);
10464       }
10465       if (SqrtOp.getNode()) {
10466         // We found a FSQRT, so try to make this fold:
10467         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10468         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10469           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10470           AddToWorklist(RV.getNode());
10471           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10472         }
10473       }
10474     }
10475
10476     // Fold into a reciprocal estimate and multiply instead of a real divide.
10477     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10478       AddToWorklist(RV.getNode());
10479       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10480     }
10481   }
10482
10483   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10484   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10485     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10486       // Both can be negated for free, check to see if at least one is cheaper
10487       // negated.
10488       if (LHSNeg == 2 || RHSNeg == 2)
10489         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10490                            GetNegatedExpression(N0, DAG, LegalOperations),
10491                            GetNegatedExpression(N1, DAG, LegalOperations),
10492                            Flags);
10493     }
10494   }
10495
10496   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10497     return CombineRepeatedDivisors;
10498
10499   return SDValue();
10500 }
10501
10502 SDValue DAGCombiner::visitFREM(SDNode *N) {
10503   SDValue N0 = N->getOperand(0);
10504   SDValue N1 = N->getOperand(1);
10505   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10506   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10507   EVT VT = N->getValueType(0);
10508
10509   // fold (frem c1, c2) -> fmod(c1,c2)
10510   if (N0CFP && N1CFP)
10511     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10512
10513   if (SDValue NewSel = foldBinOpIntoSelect(N))
10514     return NewSel;
10515
10516   return SDValue();
10517 }
10518
10519 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10520   if (!DAG.getTarget().Options.UnsafeFPMath)
10521     return SDValue();
10522
10523   SDValue N0 = N->getOperand(0);
10524   if (TLI.isFsqrtCheap(N0, DAG))
10525     return SDValue();
10526
10527   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10528   // For now, create a Flags object for use with all unsafe math transforms.
10529   SDNodeFlags Flags;
10530   Flags.setUnsafeAlgebra(true);
10531   return buildSqrtEstimate(N0, Flags);
10532 }
10533
10534 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10535 /// copysign(x, fp_round(y)) -> copysign(x, y)
10536 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10537   SDValue N1 = N->getOperand(1);
10538   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10539        N1.getOpcode() == ISD::FP_ROUND)) {
10540     // Do not optimize out type conversion of f128 type yet.
10541     // For some targets like x86_64, configuration is changed to keep one f128
10542     // value in one SSE register, but instruction selection cannot handle
10543     // FCOPYSIGN on SSE registers yet.
10544     EVT N1VT = N1->getValueType(0);
10545     EVT N1Op0VT = N1->getOperand(0).getValueType();
10546     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10547   }
10548   return false;
10549 }
10550
10551 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10552   SDValue N0 = N->getOperand(0);
10553   SDValue N1 = N->getOperand(1);
10554   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10555   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10556   EVT VT = N->getValueType(0);
10557
10558   if (N0CFP && N1CFP) // Constant fold
10559     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10560
10561   if (N1CFP) {
10562     const APFloat &V = N1CFP->getValueAPF();
10563     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10564     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10565     if (!V.isNegative()) {
10566       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10567         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10568     } else {
10569       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10570         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10571                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10572     }
10573   }
10574
10575   // copysign(fabs(x), y) -> copysign(x, y)
10576   // copysign(fneg(x), y) -> copysign(x, y)
10577   // copysign(copysign(x,z), y) -> copysign(x, y)
10578   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10579       N0.getOpcode() == ISD::FCOPYSIGN)
10580     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10581
10582   // copysign(x, abs(y)) -> abs(x)
10583   if (N1.getOpcode() == ISD::FABS)
10584     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10585
10586   // copysign(x, copysign(y,z)) -> copysign(x, z)
10587   if (N1.getOpcode() == ISD::FCOPYSIGN)
10588     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10589
10590   // copysign(x, fp_extend(y)) -> copysign(x, y)
10591   // copysign(x, fp_round(y)) -> copysign(x, y)
10592   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10593     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10594
10595   return SDValue();
10596 }
10597
10598 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10599   SDValue N0 = N->getOperand(0);
10600   EVT VT = N->getValueType(0);
10601   EVT OpVT = N0.getValueType();
10602
10603   // fold (sint_to_fp c1) -> c1fp
10604   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10605       // ...but only if the target supports immediate floating-point values
10606       (!LegalOperations ||
10607        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10608     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10609
10610   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10611   // but UINT_TO_FP is legal on this target, try to convert.
10612   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10613       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10614     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10615     if (DAG.SignBitIsZero(N0))
10616       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10617   }
10618
10619   // The next optimizations are desirable only if SELECT_CC can be lowered.
10620   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10621     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10622     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10623         !VT.isVector() &&
10624         (!LegalOperations ||
10625          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10626       SDLoc DL(N);
10627       SDValue Ops[] =
10628         { N0.getOperand(0), N0.getOperand(1),
10629           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10630           N0.getOperand(2) };
10631       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10632     }
10633
10634     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10635     //      (select_cc x, y, 1.0, 0.0,, cc)
10636     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10637         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10638         (!LegalOperations ||
10639          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10640       SDLoc DL(N);
10641       SDValue Ops[] =
10642         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10643           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10644           N0.getOperand(0).getOperand(2) };
10645       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10646     }
10647   }
10648
10649   return SDValue();
10650 }
10651
10652 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10653   SDValue N0 = N->getOperand(0);
10654   EVT VT = N->getValueType(0);
10655   EVT OpVT = N0.getValueType();
10656
10657   // fold (uint_to_fp c1) -> c1fp
10658   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10659       // ...but only if the target supports immediate floating-point values
10660       (!LegalOperations ||
10661        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10662     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10663
10664   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10665   // but SINT_TO_FP is legal on this target, try to convert.
10666   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10667       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10668     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10669     if (DAG.SignBitIsZero(N0))
10670       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10671   }
10672
10673   // The next optimizations are desirable only if SELECT_CC can be lowered.
10674   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10675     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10676     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10677         (!LegalOperations ||
10678          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10679       SDLoc DL(N);
10680       SDValue Ops[] =
10681         { N0.getOperand(0), N0.getOperand(1),
10682           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10683           N0.getOperand(2) };
10684       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10685     }
10686   }
10687
10688   return SDValue();
10689 }
10690
10691 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10692 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10693   SDValue N0 = N->getOperand(0);
10694   EVT VT = N->getValueType(0);
10695
10696   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10697     return SDValue();
10698
10699   SDValue Src = N0.getOperand(0);
10700   EVT SrcVT = Src.getValueType();
10701   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10702   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10703
10704   // We can safely assume the conversion won't overflow the output range,
10705   // because (for example) (uint8_t)18293.f is undefined behavior.
10706
10707   // Since we can assume the conversion won't overflow, our decision as to
10708   // whether the input will fit in the float should depend on the minimum
10709   // of the input range and output range.
10710
10711   // This means this is also safe for a signed input and unsigned output, since
10712   // a negative input would lead to undefined behavior.
10713   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10714   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10715   unsigned ActualSize = std::min(InputSize, OutputSize);
10716   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10717
10718   // We can only fold away the float conversion if the input range can be
10719   // represented exactly in the float range.
10720   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10721     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10722       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10723                                                        : ISD::ZERO_EXTEND;
10724       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10725     }
10726     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10727       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10728     return DAG.getBitcast(VT, Src);
10729   }
10730   return SDValue();
10731 }
10732
10733 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10734   SDValue N0 = N->getOperand(0);
10735   EVT VT = N->getValueType(0);
10736
10737   // fold (fp_to_sint c1fp) -> c1
10738   if (isConstantFPBuildVectorOrConstantFP(N0))
10739     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10740
10741   return FoldIntToFPToInt(N, DAG);
10742 }
10743
10744 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10745   SDValue N0 = N->getOperand(0);
10746   EVT VT = N->getValueType(0);
10747
10748   // fold (fp_to_uint c1fp) -> c1
10749   if (isConstantFPBuildVectorOrConstantFP(N0))
10750     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10751
10752   return FoldIntToFPToInt(N, DAG);
10753 }
10754
10755 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10756   SDValue N0 = N->getOperand(0);
10757   SDValue N1 = N->getOperand(1);
10758   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10759   EVT VT = N->getValueType(0);
10760
10761   // fold (fp_round c1fp) -> c1fp
10762   if (N0CFP)
10763     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10764
10765   // fold (fp_round (fp_extend x)) -> x
10766   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10767     return N0.getOperand(0);
10768
10769   // fold (fp_round (fp_round x)) -> (fp_round x)
10770   if (N0.getOpcode() == ISD::FP_ROUND) {
10771     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10772     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10773
10774     // Skip this folding if it results in an fp_round from f80 to f16.
10775     //
10776     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10777     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10778     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10779     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10780     // x86.
10781     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10782       return SDValue();
10783
10784     // If the first fp_round isn't a value preserving truncation, it might
10785     // introduce a tie in the second fp_round, that wouldn't occur in the
10786     // single-step fp_round we want to fold to.
10787     // In other words, double rounding isn't the same as rounding.
10788     // Also, this is a value preserving truncation iff both fp_round's are.
10789     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10790       SDLoc DL(N);
10791       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10792                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10793     }
10794   }
10795
10796   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10797   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10798     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10799                               N0.getOperand(0), N1);
10800     AddToWorklist(Tmp.getNode());
10801     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10802                        Tmp, N0.getOperand(1));
10803   }
10804
10805   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10806     return NewVSel;
10807
10808   return SDValue();
10809 }
10810
10811 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10812   SDValue N0 = N->getOperand(0);
10813   EVT VT = N->getValueType(0);
10814   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10815   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10816
10817   // fold (fp_round_inreg c1fp) -> c1fp
10818   if (N0CFP && isTypeLegal(EVT)) {
10819     SDLoc DL(N);
10820     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10821     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10822   }
10823
10824   return SDValue();
10825 }
10826
10827 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10828   SDValue N0 = N->getOperand(0);
10829   EVT VT = N->getValueType(0);
10830
10831   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10832   if (N->hasOneUse() &&
10833       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10834     return SDValue();
10835
10836   // fold (fp_extend c1fp) -> c1fp
10837   if (isConstantFPBuildVectorOrConstantFP(N0))
10838     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10839
10840   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10841   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10842       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10843     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10844
10845   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10846   // value of X.
10847   if (N0.getOpcode() == ISD::FP_ROUND
10848       && N0.getConstantOperandVal(1) == 1) {
10849     SDValue In = N0.getOperand(0);
10850     if (In.getValueType() == VT) return In;
10851     if (VT.bitsLT(In.getValueType()))
10852       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10853                          In, N0.getOperand(1));
10854     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10855   }
10856
10857   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10858   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10859        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10860     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10861     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10862                                      LN0->getChain(),
10863                                      LN0->getBasePtr(), N0.getValueType(),
10864                                      LN0->getMemOperand());
10865     CombineTo(N, ExtLoad);
10866     CombineTo(N0.getNode(),
10867               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10868                           N0.getValueType(), ExtLoad,
10869                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10870               ExtLoad.getValue(1));
10871     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10872   }
10873
10874   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10875     return NewVSel;
10876
10877   return SDValue();
10878 }
10879
10880 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10881   SDValue N0 = N->getOperand(0);
10882   EVT VT = N->getValueType(0);
10883
10884   // fold (fceil c1) -> fceil(c1)
10885   if (isConstantFPBuildVectorOrConstantFP(N0))
10886     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10887
10888   return SDValue();
10889 }
10890
10891 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10892   SDValue N0 = N->getOperand(0);
10893   EVT VT = N->getValueType(0);
10894
10895   // fold (ftrunc c1) -> ftrunc(c1)
10896   if (isConstantFPBuildVectorOrConstantFP(N0))
10897     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10898
10899   // fold ftrunc (known rounded int x) -> x
10900   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
10901   // likely to be generated to extract integer from a rounded floating value.
10902   switch (N0.getOpcode()) {
10903   default: break;
10904   case ISD::FRINT:
10905   case ISD::FTRUNC:
10906   case ISD::FNEARBYINT:
10907   case ISD::FFLOOR:
10908   case ISD::FCEIL:
10909     return N0;
10910   }
10911
10912   return SDValue();
10913 }
10914
10915 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10916   SDValue N0 = N->getOperand(0);
10917   EVT VT = N->getValueType(0);
10918
10919   // fold (ffloor c1) -> ffloor(c1)
10920   if (isConstantFPBuildVectorOrConstantFP(N0))
10921     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10922
10923   return SDValue();
10924 }
10925
10926 // FIXME: FNEG and FABS have a lot in common; refactor.
10927 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10928   SDValue N0 = N->getOperand(0);
10929   EVT VT = N->getValueType(0);
10930
10931   // Constant fold FNEG.
10932   if (isConstantFPBuildVectorOrConstantFP(N0))
10933     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10934
10935   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10936                          &DAG.getTarget().Options))
10937     return GetNegatedExpression(N0, DAG, LegalOperations);
10938
10939   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10940   // constant pool values.
10941   if (!TLI.isFNegFree(VT) &&
10942       N0.getOpcode() == ISD::BITCAST &&
10943       N0.getNode()->hasOneUse()) {
10944     SDValue Int = N0.getOperand(0);
10945     EVT IntVT = Int.getValueType();
10946     if (IntVT.isInteger() && !IntVT.isVector()) {
10947       APInt SignMask;
10948       if (N0.getValueType().isVector()) {
10949         // For a vector, get a mask such as 0x80... per scalar element
10950         // and splat it.
10951         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
10952         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10953       } else {
10954         // For a scalar, just generate 0x80...
10955         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
10956       }
10957       SDLoc DL0(N0);
10958       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10959                         DAG.getConstant(SignMask, DL0, IntVT));
10960       AddToWorklist(Int.getNode());
10961       return DAG.getBitcast(VT, Int);
10962     }
10963   }
10964
10965   // (fneg (fmul c, x)) -> (fmul -c, x)
10966   if (N0.getOpcode() == ISD::FMUL &&
10967       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10968     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10969     if (CFP1) {
10970       APFloat CVal = CFP1->getValueAPF();
10971       CVal.changeSign();
10972       if (Level >= AfterLegalizeDAG &&
10973           (TLI.isFPImmLegal(CVal, VT) ||
10974            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10975         return DAG.getNode(
10976             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10977             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
10978             N0->getFlags());
10979     }
10980   }
10981
10982   return SDValue();
10983 }
10984
10985 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10986   SDValue N0 = N->getOperand(0);
10987   SDValue N1 = N->getOperand(1);
10988   EVT VT = N->getValueType(0);
10989   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10990   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10991
10992   if (N0CFP && N1CFP) {
10993     const APFloat &C0 = N0CFP->getValueAPF();
10994     const APFloat &C1 = N1CFP->getValueAPF();
10995     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10996   }
10997
10998   // Canonicalize to constant on RHS.
10999   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11000      !isConstantFPBuildVectorOrConstantFP(N1))
11001     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
11002
11003   return SDValue();
11004 }
11005
11006 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
11007   SDValue N0 = N->getOperand(0);
11008   SDValue N1 = N->getOperand(1);
11009   EVT VT = N->getValueType(0);
11010   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11011   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11012
11013   if (N0CFP && N1CFP) {
11014     const APFloat &C0 = N0CFP->getValueAPF();
11015     const APFloat &C1 = N1CFP->getValueAPF();
11016     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
11017   }
11018
11019   // Canonicalize to constant on RHS.
11020   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11021      !isConstantFPBuildVectorOrConstantFP(N1))
11022     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
11023
11024   return SDValue();
11025 }
11026
11027 SDValue DAGCombiner::visitFABS(SDNode *N) {
11028   SDValue N0 = N->getOperand(0);
11029   EVT VT = N->getValueType(0);
11030
11031   // fold (fabs c1) -> fabs(c1)
11032   if (isConstantFPBuildVectorOrConstantFP(N0))
11033     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11034
11035   // fold (fabs (fabs x)) -> (fabs x)
11036   if (N0.getOpcode() == ISD::FABS)
11037     return N->getOperand(0);
11038
11039   // fold (fabs (fneg x)) -> (fabs x)
11040   // fold (fabs (fcopysign x, y)) -> (fabs x)
11041   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
11042     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
11043
11044   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
11045   // constant pool values.
11046   if (!TLI.isFAbsFree(VT) &&
11047       N0.getOpcode() == ISD::BITCAST &&
11048       N0.getNode()->hasOneUse()) {
11049     SDValue Int = N0.getOperand(0);
11050     EVT IntVT = Int.getValueType();
11051     if (IntVT.isInteger() && !IntVT.isVector()) {
11052       APInt SignMask;
11053       if (N0.getValueType().isVector()) {
11054         // For a vector, get a mask such as 0x7f... per scalar element
11055         // and splat it.
11056         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
11057         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11058       } else {
11059         // For a scalar, just generate 0x7f...
11060         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
11061       }
11062       SDLoc DL(N0);
11063       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
11064                         DAG.getConstant(SignMask, DL, IntVT));
11065       AddToWorklist(Int.getNode());
11066       return DAG.getBitcast(N->getValueType(0), Int);
11067     }
11068   }
11069
11070   return SDValue();
11071 }
11072
11073 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
11074   SDValue Chain = N->getOperand(0);
11075   SDValue N1 = N->getOperand(1);
11076   SDValue N2 = N->getOperand(2);
11077
11078   // If N is a constant we could fold this into a fallthrough or unconditional
11079   // branch. However that doesn't happen very often in normal code, because
11080   // Instcombine/SimplifyCFG should have handled the available opportunities.
11081   // If we did this folding here, it would be necessary to update the
11082   // MachineBasicBlock CFG, which is awkward.
11083
11084   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
11085   // on the target.
11086   if (N1.getOpcode() == ISD::SETCC &&
11087       TLI.isOperationLegalOrCustom(ISD::BR_CC,
11088                                    N1.getOperand(0).getValueType())) {
11089     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11090                        Chain, N1.getOperand(2),
11091                        N1.getOperand(0), N1.getOperand(1), N2);
11092   }
11093
11094   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
11095       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
11096        (N1.getOperand(0).hasOneUse() &&
11097         N1.getOperand(0).getOpcode() == ISD::SRL))) {
11098     SDNode *Trunc = nullptr;
11099     if (N1.getOpcode() == ISD::TRUNCATE) {
11100       // Look pass the truncate.
11101       Trunc = N1.getNode();
11102       N1 = N1.getOperand(0);
11103     }
11104
11105     // Match this pattern so that we can generate simpler code:
11106     //
11107     //   %a = ...
11108     //   %b = and i32 %a, 2
11109     //   %c = srl i32 %b, 1
11110     //   brcond i32 %c ...
11111     //
11112     // into
11113     //
11114     //   %a = ...
11115     //   %b = and i32 %a, 2
11116     //   %c = setcc eq %b, 0
11117     //   brcond %c ...
11118     //
11119     // This applies only when the AND constant value has one bit set and the
11120     // SRL constant is equal to the log2 of the AND constant. The back-end is
11121     // smart enough to convert the result into a TEST/JMP sequence.
11122     SDValue Op0 = N1.getOperand(0);
11123     SDValue Op1 = N1.getOperand(1);
11124
11125     if (Op0.getOpcode() == ISD::AND &&
11126         Op1.getOpcode() == ISD::Constant) {
11127       SDValue AndOp1 = Op0.getOperand(1);
11128
11129       if (AndOp1.getOpcode() == ISD::Constant) {
11130         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
11131
11132         if (AndConst.isPowerOf2() &&
11133             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
11134           SDLoc DL(N);
11135           SDValue SetCC =
11136             DAG.getSetCC(DL,
11137                          getSetCCResultType(Op0.getValueType()),
11138                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
11139                          ISD::SETNE);
11140
11141           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
11142                                           MVT::Other, Chain, SetCC, N2);
11143           // Don't add the new BRCond into the worklist or else SimplifySelectCC
11144           // will convert it back to (X & C1) >> C2.
11145           CombineTo(N, NewBRCond, false);
11146           // Truncate is dead.
11147           if (Trunc)
11148             deleteAndRecombine(Trunc);
11149           // Replace the uses of SRL with SETCC
11150           WorklistRemover DeadNodes(*this);
11151           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
11152           deleteAndRecombine(N1.getNode());
11153           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11154         }
11155       }
11156     }
11157
11158     if (Trunc)
11159       // Restore N1 if the above transformation doesn't match.
11160       N1 = N->getOperand(1);
11161   }
11162
11163   // Transform br(xor(x, y)) -> br(x != y)
11164   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
11165   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
11166     SDNode *TheXor = N1.getNode();
11167     SDValue Op0 = TheXor->getOperand(0);
11168     SDValue Op1 = TheXor->getOperand(1);
11169     if (Op0.getOpcode() == Op1.getOpcode()) {
11170       // Avoid missing important xor optimizations.
11171       if (SDValue Tmp = visitXOR(TheXor)) {
11172         if (Tmp.getNode() != TheXor) {
11173           DEBUG(dbgs() << "\nReplacing.8 ";
11174                 TheXor->dump(&DAG);
11175                 dbgs() << "\nWith: ";
11176                 Tmp.getNode()->dump(&DAG);
11177                 dbgs() << '\n');
11178           WorklistRemover DeadNodes(*this);
11179           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
11180           deleteAndRecombine(TheXor);
11181           return DAG.getNode(ISD::BRCOND, SDLoc(N),
11182                              MVT::Other, Chain, Tmp, N2);
11183         }
11184
11185         // visitXOR has changed XOR's operands or replaced the XOR completely,
11186         // bail out.
11187         return SDValue(N, 0);
11188       }
11189     }
11190
11191     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
11192       bool Equal = false;
11193       if (isOneConstant(Op0) && Op0.hasOneUse() &&
11194           Op0.getOpcode() == ISD::XOR) {
11195         TheXor = Op0.getNode();
11196         Equal = true;
11197       }
11198
11199       EVT SetCCVT = N1.getValueType();
11200       if (LegalTypes)
11201         SetCCVT = getSetCCResultType(SetCCVT);
11202       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
11203                                    SetCCVT,
11204                                    Op0, Op1,
11205                                    Equal ? ISD::SETEQ : ISD::SETNE);
11206       // Replace the uses of XOR with SETCC
11207       WorklistRemover DeadNodes(*this);
11208       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
11209       deleteAndRecombine(N1.getNode());
11210       return DAG.getNode(ISD::BRCOND, SDLoc(N),
11211                          MVT::Other, Chain, SetCC, N2);
11212     }
11213   }
11214
11215   return SDValue();
11216 }
11217
11218 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
11219 //
11220 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
11221   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
11222   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
11223
11224   // If N is a constant we could fold this into a fallthrough or unconditional
11225   // branch. However that doesn't happen very often in normal code, because
11226   // Instcombine/SimplifyCFG should have handled the available opportunities.
11227   // If we did this folding here, it would be necessary to update the
11228   // MachineBasicBlock CFG, which is awkward.
11229
11230   // Use SimplifySetCC to simplify SETCC's.
11231   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
11232                                CondLHS, CondRHS, CC->get(), SDLoc(N),
11233                                false);
11234   if (Simp.getNode()) AddToWorklist(Simp.getNode());
11235
11236   // fold to a simpler setcc
11237   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
11238     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11239                        N->getOperand(0), Simp.getOperand(2),
11240                        Simp.getOperand(0), Simp.getOperand(1),
11241                        N->getOperand(4));
11242
11243   return SDValue();
11244 }
11245
11246 /// Return true if 'Use' is a load or a store that uses N as its base pointer
11247 /// and that N may be folded in the load / store addressing mode.
11248 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
11249                                     SelectionDAG &DAG,
11250                                     const TargetLowering &TLI) {
11251   EVT VT;
11252   unsigned AS;
11253
11254   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
11255     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
11256       return false;
11257     VT = LD->getMemoryVT();
11258     AS = LD->getAddressSpace();
11259   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
11260     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
11261       return false;
11262     VT = ST->getMemoryVT();
11263     AS = ST->getAddressSpace();
11264   } else
11265     return false;
11266
11267   TargetLowering::AddrMode AM;
11268   if (N->getOpcode() == ISD::ADD) {
11269     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11270     if (Offset)
11271       // [reg +/- imm]
11272       AM.BaseOffs = Offset->getSExtValue();
11273     else
11274       // [reg +/- reg]
11275       AM.Scale = 1;
11276   } else if (N->getOpcode() == ISD::SUB) {
11277     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11278     if (Offset)
11279       // [reg +/- imm]
11280       AM.BaseOffs = -Offset->getSExtValue();
11281     else
11282       // [reg +/- reg]
11283       AM.Scale = 1;
11284   } else
11285     return false;
11286
11287   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
11288                                    VT.getTypeForEVT(*DAG.getContext()), AS);
11289 }
11290
11291 /// Try turning a load/store into a pre-indexed load/store when the base
11292 /// pointer is an add or subtract and it has other uses besides the load/store.
11293 /// After the transformation, the new indexed load/store has effectively folded
11294 /// the add/subtract in and all of its other uses are redirected to the
11295 /// new load/store.
11296 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
11297   if (Level < AfterLegalizeDAG)
11298     return false;
11299
11300   bool isLoad = true;
11301   SDValue Ptr;
11302   EVT VT;
11303   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11304     if (LD->isIndexed())
11305       return false;
11306     VT = LD->getMemoryVT();
11307     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
11308         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
11309       return false;
11310     Ptr = LD->getBasePtr();
11311   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11312     if (ST->isIndexed())
11313       return false;
11314     VT = ST->getMemoryVT();
11315     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
11316         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
11317       return false;
11318     Ptr = ST->getBasePtr();
11319     isLoad = false;
11320   } else {
11321     return false;
11322   }
11323
11324   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11325   // out.  There is no reason to make this a preinc/predec.
11326   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
11327       Ptr.getNode()->hasOneUse())
11328     return false;
11329
11330   // Ask the target to do addressing mode selection.
11331   SDValue BasePtr;
11332   SDValue Offset;
11333   ISD::MemIndexedMode AM = ISD::UNINDEXED;
11334   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11335     return false;
11336
11337   // Backends without true r+i pre-indexed forms may need to pass a
11338   // constant base with a variable offset so that constant coercion
11339   // will work with the patterns in canonical form.
11340   bool Swapped = false;
11341   if (isa<ConstantSDNode>(BasePtr)) {
11342     std::swap(BasePtr, Offset);
11343     Swapped = true;
11344   }
11345
11346   // Don't create a indexed load / store with zero offset.
11347   if (isNullConstant(Offset))
11348     return false;
11349
11350   // Try turning it into a pre-indexed load / store except when:
11351   // 1) The new base ptr is a frame index.
11352   // 2) If N is a store and the new base ptr is either the same as or is a
11353   //    predecessor of the value being stored.
11354   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11355   //    that would create a cycle.
11356   // 4) All uses are load / store ops that use it as old base ptr.
11357
11358   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
11359   // (plus the implicit offset) to a register to preinc anyway.
11360   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11361     return false;
11362
11363   // Check #2.
11364   if (!isLoad) {
11365     SDValue Val = cast<StoreSDNode>(N)->getValue();
11366     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
11367       return false;
11368   }
11369
11370   // Caches for hasPredecessorHelper.
11371   SmallPtrSet<const SDNode *, 32> Visited;
11372   SmallVector<const SDNode *, 16> Worklist;
11373   Worklist.push_back(N);
11374
11375   // If the offset is a constant, there may be other adds of constants that
11376   // can be folded with this one. We should do this to avoid having to keep
11377   // a copy of the original base pointer.
11378   SmallVector<SDNode *, 16> OtherUses;
11379   if (isa<ConstantSDNode>(Offset))
11380     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11381                               UE = BasePtr.getNode()->use_end();
11382          UI != UE; ++UI) {
11383       SDUse &Use = UI.getUse();
11384       // Skip the use that is Ptr and uses of other results from BasePtr's
11385       // node (important for nodes that return multiple results).
11386       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11387         continue;
11388
11389       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11390         continue;
11391
11392       if (Use.getUser()->getOpcode() != ISD::ADD &&
11393           Use.getUser()->getOpcode() != ISD::SUB) {
11394         OtherUses.clear();
11395         break;
11396       }
11397
11398       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11399       if (!isa<ConstantSDNode>(Op1)) {
11400         OtherUses.clear();
11401         break;
11402       }
11403
11404       // FIXME: In some cases, we can be smarter about this.
11405       if (Op1.getValueType() != Offset.getValueType()) {
11406         OtherUses.clear();
11407         break;
11408       }
11409
11410       OtherUses.push_back(Use.getUser());
11411     }
11412
11413   if (Swapped)
11414     std::swap(BasePtr, Offset);
11415
11416   // Now check for #3 and #4.
11417   bool RealUse = false;
11418
11419   for (SDNode *Use : Ptr.getNode()->uses()) {
11420     if (Use == N)
11421       continue;
11422     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11423       return false;
11424
11425     // If Ptr may be folded in addressing mode of other use, then it's
11426     // not profitable to do this transformation.
11427     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11428       RealUse = true;
11429   }
11430
11431   if (!RealUse)
11432     return false;
11433
11434   SDValue Result;
11435   if (isLoad)
11436     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11437                                 BasePtr, Offset, AM);
11438   else
11439     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11440                                  BasePtr, Offset, AM);
11441   ++PreIndexedNodes;
11442   ++NodesCombined;
11443   DEBUG(dbgs() << "\nReplacing.4 ";
11444         N->dump(&DAG);
11445         dbgs() << "\nWith: ";
11446         Result.getNode()->dump(&DAG);
11447         dbgs() << '\n');
11448   WorklistRemover DeadNodes(*this);
11449   if (isLoad) {
11450     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11451     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11452   } else {
11453     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11454   }
11455
11456   // Finally, since the node is now dead, remove it from the graph.
11457   deleteAndRecombine(N);
11458
11459   if (Swapped)
11460     std::swap(BasePtr, Offset);
11461
11462   // Replace other uses of BasePtr that can be updated to use Ptr
11463   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11464     unsigned OffsetIdx = 1;
11465     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11466       OffsetIdx = 0;
11467     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11468            BasePtr.getNode() && "Expected BasePtr operand");
11469
11470     // We need to replace ptr0 in the following expression:
11471     //   x0 * offset0 + y0 * ptr0 = t0
11472     // knowing that
11473     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11474     //
11475     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11476     // indexed load/store and the expression that needs to be re-written.
11477     //
11478     // Therefore, we have:
11479     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11480
11481     ConstantSDNode *CN =
11482       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11483     int X0, X1, Y0, Y1;
11484     const APInt &Offset0 = CN->getAPIntValue();
11485     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11486
11487     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11488     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11489     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11490     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11491
11492     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11493
11494     APInt CNV = Offset0;
11495     if (X0 < 0) CNV = -CNV;
11496     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11497     else CNV = CNV - Offset1;
11498
11499     SDLoc DL(OtherUses[i]);
11500
11501     // We can now generate the new expression.
11502     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11503     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11504
11505     SDValue NewUse = DAG.getNode(Opcode,
11506                                  DL,
11507                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11508     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11509     deleteAndRecombine(OtherUses[i]);
11510   }
11511
11512   // Replace the uses of Ptr with uses of the updated base value.
11513   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11514   deleteAndRecombine(Ptr.getNode());
11515   AddToWorklist(Result.getNode());
11516
11517   return true;
11518 }
11519
11520 /// Try to combine a load/store with a add/sub of the base pointer node into a
11521 /// post-indexed load/store. The transformation folded the add/subtract into the
11522 /// new indexed load/store effectively and all of its uses are redirected to the
11523 /// new load/store.
11524 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11525   if (Level < AfterLegalizeDAG)
11526     return false;
11527
11528   bool isLoad = true;
11529   SDValue Ptr;
11530   EVT VT;
11531   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11532     if (LD->isIndexed())
11533       return false;
11534     VT = LD->getMemoryVT();
11535     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11536         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11537       return false;
11538     Ptr = LD->getBasePtr();
11539   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11540     if (ST->isIndexed())
11541       return false;
11542     VT = ST->getMemoryVT();
11543     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11544         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11545       return false;
11546     Ptr = ST->getBasePtr();
11547     isLoad = false;
11548   } else {
11549     return false;
11550   }
11551
11552   if (Ptr.getNode()->hasOneUse())
11553     return false;
11554
11555   for (SDNode *Op : Ptr.getNode()->uses()) {
11556     if (Op == N ||
11557         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11558       continue;
11559
11560     SDValue BasePtr;
11561     SDValue Offset;
11562     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11563     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11564       // Don't create a indexed load / store with zero offset.
11565       if (isNullConstant(Offset))
11566         continue;
11567
11568       // Try turning it into a post-indexed load / store except when
11569       // 1) All uses are load / store ops that use it as base ptr (and
11570       //    it may be folded as addressing mmode).
11571       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11572       //    nor a successor of N. Otherwise, if Op is folded that would
11573       //    create a cycle.
11574
11575       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11576         continue;
11577
11578       // Check for #1.
11579       bool TryNext = false;
11580       for (SDNode *Use : BasePtr.getNode()->uses()) {
11581         if (Use == Ptr.getNode())
11582           continue;
11583
11584         // If all the uses are load / store addresses, then don't do the
11585         // transformation.
11586         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11587           bool RealUse = false;
11588           for (SDNode *UseUse : Use->uses()) {
11589             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11590               RealUse = true;
11591           }
11592
11593           if (!RealUse) {
11594             TryNext = true;
11595             break;
11596           }
11597         }
11598       }
11599
11600       if (TryNext)
11601         continue;
11602
11603       // Check for #2
11604       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11605         SDValue Result = isLoad
11606           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11607                                BasePtr, Offset, AM)
11608           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11609                                 BasePtr, Offset, AM);
11610         ++PostIndexedNodes;
11611         ++NodesCombined;
11612         DEBUG(dbgs() << "\nReplacing.5 ";
11613               N->dump(&DAG);
11614               dbgs() << "\nWith: ";
11615               Result.getNode()->dump(&DAG);
11616               dbgs() << '\n');
11617         WorklistRemover DeadNodes(*this);
11618         if (isLoad) {
11619           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11620           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11621         } else {
11622           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11623         }
11624
11625         // Finally, since the node is now dead, remove it from the graph.
11626         deleteAndRecombine(N);
11627
11628         // Replace the uses of Use with uses of the updated base value.
11629         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11630                                       Result.getValue(isLoad ? 1 : 0));
11631         deleteAndRecombine(Op);
11632         return true;
11633       }
11634     }
11635   }
11636
11637   return false;
11638 }
11639
11640 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11641 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11642   ISD::MemIndexedMode AM = LD->getAddressingMode();
11643   assert(AM != ISD::UNINDEXED);
11644   SDValue BP = LD->getOperand(1);
11645   SDValue Inc = LD->getOperand(2);
11646
11647   // Some backends use TargetConstants for load offsets, but don't expect
11648   // TargetConstants in general ADD nodes. We can convert these constants into
11649   // regular Constants (if the constant is not opaque).
11650   assert((Inc.getOpcode() != ISD::TargetConstant ||
11651           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11652          "Cannot split out indexing using opaque target constants");
11653   if (Inc.getOpcode() == ISD::TargetConstant) {
11654     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11655     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11656                           ConstInc->getValueType(0));
11657   }
11658
11659   unsigned Opc =
11660       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11661   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11662 }
11663
11664 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11665   LoadSDNode *LD  = cast<LoadSDNode>(N);
11666   SDValue Chain = LD->getChain();
11667   SDValue Ptr   = LD->getBasePtr();
11668
11669   // If load is not volatile and there are no uses of the loaded value (and
11670   // the updated indexed value in case of indexed loads), change uses of the
11671   // chain value into uses of the chain input (i.e. delete the dead load).
11672   if (!LD->isVolatile()) {
11673     if (N->getValueType(1) == MVT::Other) {
11674       // Unindexed loads.
11675       if (!N->hasAnyUseOfValue(0)) {
11676         // It's not safe to use the two value CombineTo variant here. e.g.
11677         // v1, chain2 = load chain1, loc
11678         // v2, chain3 = load chain2, loc
11679         // v3         = add v2, c
11680         // Now we replace use of chain2 with chain1.  This makes the second load
11681         // isomorphic to the one we are deleting, and thus makes this load live.
11682         DEBUG(dbgs() << "\nReplacing.6 ";
11683               N->dump(&DAG);
11684               dbgs() << "\nWith chain: ";
11685               Chain.getNode()->dump(&DAG);
11686               dbgs() << "\n");
11687         WorklistRemover DeadNodes(*this);
11688         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11689         AddUsersToWorklist(Chain.getNode());
11690         if (N->use_empty())
11691           deleteAndRecombine(N);
11692
11693         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11694       }
11695     } else {
11696       // Indexed loads.
11697       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11698
11699       // If this load has an opaque TargetConstant offset, then we cannot split
11700       // the indexing into an add/sub directly (that TargetConstant may not be
11701       // valid for a different type of node, and we cannot convert an opaque
11702       // target constant into a regular constant).
11703       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11704                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11705
11706       if (!N->hasAnyUseOfValue(0) &&
11707           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11708         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11709         SDValue Index;
11710         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11711           Index = SplitIndexingFromLoad(LD);
11712           // Try to fold the base pointer arithmetic into subsequent loads and
11713           // stores.
11714           AddUsersToWorklist(N);
11715         } else
11716           Index = DAG.getUNDEF(N->getValueType(1));
11717         DEBUG(dbgs() << "\nReplacing.7 ";
11718               N->dump(&DAG);
11719               dbgs() << "\nWith: ";
11720               Undef.getNode()->dump(&DAG);
11721               dbgs() << " and 2 other values\n");
11722         WorklistRemover DeadNodes(*this);
11723         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11724         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11725         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11726         deleteAndRecombine(N);
11727         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11728       }
11729     }
11730   }
11731
11732   // If this load is directly stored, replace the load value with the stored
11733   // value.
11734   // TODO: Handle store large -> read small portion.
11735   // TODO: Handle TRUNCSTORE/LOADEXT
11736   if (OptLevel != CodeGenOpt::None &&
11737       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11738     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11739       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11740       if (PrevST->getBasePtr() == Ptr &&
11741           PrevST->getValue().getValueType() == N->getValueType(0))
11742         return CombineTo(N, PrevST->getOperand(1), Chain);
11743     }
11744   }
11745
11746   // Try to infer better alignment information than the load already has.
11747   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11748     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11749       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11750         SDValue NewLoad = DAG.getExtLoad(
11751             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11752             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11753             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11754         if (NewLoad.getNode() != N)
11755           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11756       }
11757     }
11758   }
11759
11760   if (LD->isUnindexed()) {
11761     // Walk up chain skipping non-aliasing memory nodes.
11762     SDValue BetterChain = FindBetterChain(N, Chain);
11763
11764     // If there is a better chain.
11765     if (Chain != BetterChain) {
11766       SDValue ReplLoad;
11767
11768       // Replace the chain to void dependency.
11769       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11770         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11771                                BetterChain, Ptr, LD->getMemOperand());
11772       } else {
11773         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11774                                   LD->getValueType(0),
11775                                   BetterChain, Ptr, LD->getMemoryVT(),
11776                                   LD->getMemOperand());
11777       }
11778
11779       // Create token factor to keep old chain connected.
11780       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11781                                   MVT::Other, Chain, ReplLoad.getValue(1));
11782
11783       // Replace uses with load result and token factor
11784       return CombineTo(N, ReplLoad.getValue(0), Token);
11785     }
11786   }
11787
11788   // Try transforming N to an indexed load.
11789   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11790     return SDValue(N, 0);
11791
11792   // Try to slice up N to more direct loads if the slices are mapped to
11793   // different register banks or pairing can take place.
11794   if (SliceUpLoad(N))
11795     return SDValue(N, 0);
11796
11797   return SDValue();
11798 }
11799
11800 namespace {
11801
11802 /// \brief Helper structure used to slice a load in smaller loads.
11803 /// Basically a slice is obtained from the following sequence:
11804 /// Origin = load Ty1, Base
11805 /// Shift = srl Ty1 Origin, CstTy Amount
11806 /// Inst = trunc Shift to Ty2
11807 ///
11808 /// Then, it will be rewritten into:
11809 /// Slice = load SliceTy, Base + SliceOffset
11810 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11811 ///
11812 /// SliceTy is deduced from the number of bits that are actually used to
11813 /// build Inst.
11814 struct LoadedSlice {
11815   /// \brief Helper structure used to compute the cost of a slice.
11816   struct Cost {
11817     /// Are we optimizing for code size.
11818     bool ForCodeSize;
11819
11820     /// Various cost.
11821     unsigned Loads = 0;
11822     unsigned Truncates = 0;
11823     unsigned CrossRegisterBanksCopies = 0;
11824     unsigned ZExts = 0;
11825     unsigned Shift = 0;
11826
11827     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
11828
11829     /// \brief Get the cost of one isolated slice.
11830     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11831         : ForCodeSize(ForCodeSize), Loads(1) {
11832       EVT TruncType = LS.Inst->getValueType(0);
11833       EVT LoadedType = LS.getLoadedType();
11834       if (TruncType != LoadedType &&
11835           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11836         ZExts = 1;
11837     }
11838
11839     /// \brief Account for slicing gain in the current cost.
11840     /// Slicing provide a few gains like removing a shift or a
11841     /// truncate. This method allows to grow the cost of the original
11842     /// load with the gain from this slice.
11843     void addSliceGain(const LoadedSlice &LS) {
11844       // Each slice saves a truncate.
11845       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11846       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11847                               LS.Inst->getValueType(0)))
11848         ++Truncates;
11849       // If there is a shift amount, this slice gets rid of it.
11850       if (LS.Shift)
11851         ++Shift;
11852       // If this slice can merge a cross register bank copy, account for it.
11853       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11854         ++CrossRegisterBanksCopies;
11855     }
11856
11857     Cost &operator+=(const Cost &RHS) {
11858       Loads += RHS.Loads;
11859       Truncates += RHS.Truncates;
11860       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11861       ZExts += RHS.ZExts;
11862       Shift += RHS.Shift;
11863       return *this;
11864     }
11865
11866     bool operator==(const Cost &RHS) const {
11867       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11868              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11869              ZExts == RHS.ZExts && Shift == RHS.Shift;
11870     }
11871
11872     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11873
11874     bool operator<(const Cost &RHS) const {
11875       // Assume cross register banks copies are as expensive as loads.
11876       // FIXME: Do we want some more target hooks?
11877       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11878       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11879       // Unless we are optimizing for code size, consider the
11880       // expensive operation first.
11881       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11882         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11883       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11884              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11885     }
11886
11887     bool operator>(const Cost &RHS) const { return RHS < *this; }
11888
11889     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11890
11891     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11892   };
11893
11894   // The last instruction that represent the slice. This should be a
11895   // truncate instruction.
11896   SDNode *Inst;
11897
11898   // The original load instruction.
11899   LoadSDNode *Origin;
11900
11901   // The right shift amount in bits from the original load.
11902   unsigned Shift;
11903
11904   // The DAG from which Origin came from.
11905   // This is used to get some contextual information about legal types, etc.
11906   SelectionDAG *DAG;
11907
11908   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11909               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11910       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11911
11912   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11913   /// \return Result is \p BitWidth and has used bits set to 1 and
11914   ///         not used bits set to 0.
11915   APInt getUsedBits() const {
11916     // Reproduce the trunc(lshr) sequence:
11917     // - Start from the truncated value.
11918     // - Zero extend to the desired bit width.
11919     // - Shift left.
11920     assert(Origin && "No original load to compare against.");
11921     unsigned BitWidth = Origin->getValueSizeInBits(0);
11922     assert(Inst && "This slice is not bound to an instruction");
11923     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11924            "Extracted slice is bigger than the whole type!");
11925     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11926     UsedBits.setAllBits();
11927     UsedBits = UsedBits.zext(BitWidth);
11928     UsedBits <<= Shift;
11929     return UsedBits;
11930   }
11931
11932   /// \brief Get the size of the slice to be loaded in bytes.
11933   unsigned getLoadedSize() const {
11934     unsigned SliceSize = getUsedBits().countPopulation();
11935     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11936     return SliceSize / 8;
11937   }
11938
11939   /// \brief Get the type that will be loaded for this slice.
11940   /// Note: This may not be the final type for the slice.
11941   EVT getLoadedType() const {
11942     assert(DAG && "Missing context");
11943     LLVMContext &Ctxt = *DAG->getContext();
11944     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11945   }
11946
11947   /// \brief Get the alignment of the load used for this slice.
11948   unsigned getAlignment() const {
11949     unsigned Alignment = Origin->getAlignment();
11950     unsigned Offset = getOffsetFromBase();
11951     if (Offset != 0)
11952       Alignment = MinAlign(Alignment, Alignment + Offset);
11953     return Alignment;
11954   }
11955
11956   /// \brief Check if this slice can be rewritten with legal operations.
11957   bool isLegal() const {
11958     // An invalid slice is not legal.
11959     if (!Origin || !Inst || !DAG)
11960       return false;
11961
11962     // Offsets are for indexed load only, we do not handle that.
11963     if (!Origin->getOffset().isUndef())
11964       return false;
11965
11966     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11967
11968     // Check that the type is legal.
11969     EVT SliceType = getLoadedType();
11970     if (!TLI.isTypeLegal(SliceType))
11971       return false;
11972
11973     // Check that the load is legal for this type.
11974     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11975       return false;
11976
11977     // Check that the offset can be computed.
11978     // 1. Check its type.
11979     EVT PtrType = Origin->getBasePtr().getValueType();
11980     if (PtrType == MVT::Untyped || PtrType.isExtended())
11981       return false;
11982
11983     // 2. Check that it fits in the immediate.
11984     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11985       return false;
11986
11987     // 3. Check that the computation is legal.
11988     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11989       return false;
11990
11991     // Check that the zext is legal if it needs one.
11992     EVT TruncateType = Inst->getValueType(0);
11993     if (TruncateType != SliceType &&
11994         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11995       return false;
11996
11997     return true;
11998   }
11999
12000   /// \brief Get the offset in bytes of this slice in the original chunk of
12001   /// bits.
12002   /// \pre DAG != nullptr.
12003   uint64_t getOffsetFromBase() const {
12004     assert(DAG && "Missing context.");
12005     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
12006     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
12007     uint64_t Offset = Shift / 8;
12008     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
12009     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
12010            "The size of the original loaded type is not a multiple of a"
12011            " byte.");
12012     // If Offset is bigger than TySizeInBytes, it means we are loading all
12013     // zeros. This should have been optimized before in the process.
12014     assert(TySizeInBytes > Offset &&
12015            "Invalid shift amount for given loaded size");
12016     if (IsBigEndian)
12017       Offset = TySizeInBytes - Offset - getLoadedSize();
12018     return Offset;
12019   }
12020
12021   /// \brief Generate the sequence of instructions to load the slice
12022   /// represented by this object and redirect the uses of this slice to
12023   /// this new sequence of instructions.
12024   /// \pre this->Inst && this->Origin are valid Instructions and this
12025   /// object passed the legal check: LoadedSlice::isLegal returned true.
12026   /// \return The last instruction of the sequence used to load the slice.
12027   SDValue loadSlice() const {
12028     assert(Inst && Origin && "Unable to replace a non-existing slice.");
12029     const SDValue &OldBaseAddr = Origin->getBasePtr();
12030     SDValue BaseAddr = OldBaseAddr;
12031     // Get the offset in that chunk of bytes w.r.t. the endianness.
12032     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
12033     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
12034     if (Offset) {
12035       // BaseAddr = BaseAddr + Offset.
12036       EVT ArithType = BaseAddr.getValueType();
12037       SDLoc DL(Origin);
12038       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
12039                               DAG->getConstant(Offset, DL, ArithType));
12040     }
12041
12042     // Create the type of the loaded slice according to its size.
12043     EVT SliceType = getLoadedType();
12044
12045     // Create the load for the slice.
12046     SDValue LastInst =
12047         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
12048                      Origin->getPointerInfo().getWithOffset(Offset),
12049                      getAlignment(), Origin->getMemOperand()->getFlags());
12050     // If the final type is not the same as the loaded type, this means that
12051     // we have to pad with zero. Create a zero extend for that.
12052     EVT FinalType = Inst->getValueType(0);
12053     if (SliceType != FinalType)
12054       LastInst =
12055           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
12056     return LastInst;
12057   }
12058
12059   /// \brief Check if this slice can be merged with an expensive cross register
12060   /// bank copy. E.g.,
12061   /// i = load i32
12062   /// f = bitcast i32 i to float
12063   bool canMergeExpensiveCrossRegisterBankCopy() const {
12064     if (!Inst || !Inst->hasOneUse())
12065       return false;
12066     SDNode *Use = *Inst->use_begin();
12067     if (Use->getOpcode() != ISD::BITCAST)
12068       return false;
12069     assert(DAG && "Missing context");
12070     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12071     EVT ResVT = Use->getValueType(0);
12072     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
12073     const TargetRegisterClass *ArgRC =
12074         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
12075     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
12076       return false;
12077
12078     // At this point, we know that we perform a cross-register-bank copy.
12079     // Check if it is expensive.
12080     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
12081     // Assume bitcasts are cheap, unless both register classes do not
12082     // explicitly share a common sub class.
12083     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
12084       return false;
12085
12086     // Check if it will be merged with the load.
12087     // 1. Check the alignment constraint.
12088     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
12089         ResVT.getTypeForEVT(*DAG->getContext()));
12090
12091     if (RequiredAlignment > getAlignment())
12092       return false;
12093
12094     // 2. Check that the load is a legal operation for that type.
12095     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
12096       return false;
12097
12098     // 3. Check that we do not have a zext in the way.
12099     if (Inst->getValueType(0) != getLoadedType())
12100       return false;
12101
12102     return true;
12103   }
12104 };
12105
12106 } // end anonymous namespace
12107
12108 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
12109 /// \p UsedBits looks like 0..0 1..1 0..0.
12110 static bool areUsedBitsDense(const APInt &UsedBits) {
12111   // If all the bits are one, this is dense!
12112   if (UsedBits.isAllOnesValue())
12113     return true;
12114
12115   // Get rid of the unused bits on the right.
12116   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
12117   // Get rid of the unused bits on the left.
12118   if (NarrowedUsedBits.countLeadingZeros())
12119     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
12120   // Check that the chunk of bits is completely used.
12121   return NarrowedUsedBits.isAllOnesValue();
12122 }
12123
12124 /// \brief Check whether or not \p First and \p Second are next to each other
12125 /// in memory. This means that there is no hole between the bits loaded
12126 /// by \p First and the bits loaded by \p Second.
12127 static bool areSlicesNextToEachOther(const LoadedSlice &First,
12128                                      const LoadedSlice &Second) {
12129   assert(First.Origin == Second.Origin && First.Origin &&
12130          "Unable to match different memory origins.");
12131   APInt UsedBits = First.getUsedBits();
12132   assert((UsedBits & Second.getUsedBits()) == 0 &&
12133          "Slices are not supposed to overlap.");
12134   UsedBits |= Second.getUsedBits();
12135   return areUsedBitsDense(UsedBits);
12136 }
12137
12138 /// \brief Adjust the \p GlobalLSCost according to the target
12139 /// paring capabilities and the layout of the slices.
12140 /// \pre \p GlobalLSCost should account for at least as many loads as
12141 /// there is in the slices in \p LoadedSlices.
12142 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12143                                  LoadedSlice::Cost &GlobalLSCost) {
12144   unsigned NumberOfSlices = LoadedSlices.size();
12145   // If there is less than 2 elements, no pairing is possible.
12146   if (NumberOfSlices < 2)
12147     return;
12148
12149   // Sort the slices so that elements that are likely to be next to each
12150   // other in memory are next to each other in the list.
12151   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
12152             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
12153     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
12154     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
12155   });
12156   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
12157   // First (resp. Second) is the first (resp. Second) potentially candidate
12158   // to be placed in a paired load.
12159   const LoadedSlice *First = nullptr;
12160   const LoadedSlice *Second = nullptr;
12161   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
12162                 // Set the beginning of the pair.
12163                                                            First = Second) {
12164     Second = &LoadedSlices[CurrSlice];
12165
12166     // If First is NULL, it means we start a new pair.
12167     // Get to the next slice.
12168     if (!First)
12169       continue;
12170
12171     EVT LoadedType = First->getLoadedType();
12172
12173     // If the types of the slices are different, we cannot pair them.
12174     if (LoadedType != Second->getLoadedType())
12175       continue;
12176
12177     // Check if the target supplies paired loads for this type.
12178     unsigned RequiredAlignment = 0;
12179     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
12180       // move to the next pair, this type is hopeless.
12181       Second = nullptr;
12182       continue;
12183     }
12184     // Check if we meet the alignment requirement.
12185     if (RequiredAlignment > First->getAlignment())
12186       continue;
12187
12188     // Check that both loads are next to each other in memory.
12189     if (!areSlicesNextToEachOther(*First, *Second))
12190       continue;
12191
12192     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
12193     --GlobalLSCost.Loads;
12194     // Move to the next pair.
12195     Second = nullptr;
12196   }
12197 }
12198
12199 /// \brief Check the profitability of all involved LoadedSlice.
12200 /// Currently, it is considered profitable if there is exactly two
12201 /// involved slices (1) which are (2) next to each other in memory, and
12202 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
12203 ///
12204 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
12205 /// the elements themselves.
12206 ///
12207 /// FIXME: When the cost model will be mature enough, we can relax
12208 /// constraints (1) and (2).
12209 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12210                                 const APInt &UsedBits, bool ForCodeSize) {
12211   unsigned NumberOfSlices = LoadedSlices.size();
12212   if (StressLoadSlicing)
12213     return NumberOfSlices > 1;
12214
12215   // Check (1).
12216   if (NumberOfSlices != 2)
12217     return false;
12218
12219   // Check (2).
12220   if (!areUsedBitsDense(UsedBits))
12221     return false;
12222
12223   // Check (3).
12224   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
12225   // The original code has one big load.
12226   OrigCost.Loads = 1;
12227   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
12228     const LoadedSlice &LS = LoadedSlices[CurrSlice];
12229     // Accumulate the cost of all the slices.
12230     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
12231     GlobalSlicingCost += SliceCost;
12232
12233     // Account as cost in the original configuration the gain obtained
12234     // with the current slices.
12235     OrigCost.addSliceGain(LS);
12236   }
12237
12238   // If the target supports paired load, adjust the cost accordingly.
12239   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
12240   return OrigCost > GlobalSlicingCost;
12241 }
12242
12243 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
12244 /// operations, split it in the various pieces being extracted.
12245 ///
12246 /// This sort of thing is introduced by SROA.
12247 /// This slicing takes care not to insert overlapping loads.
12248 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
12249 bool DAGCombiner::SliceUpLoad(SDNode *N) {
12250   if (Level < AfterLegalizeDAG)
12251     return false;
12252
12253   LoadSDNode *LD = cast<LoadSDNode>(N);
12254   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
12255       !LD->getValueType(0).isInteger())
12256     return false;
12257
12258   // Keep track of already used bits to detect overlapping values.
12259   // In that case, we will just abort the transformation.
12260   APInt UsedBits(LD->getValueSizeInBits(0), 0);
12261
12262   SmallVector<LoadedSlice, 4> LoadedSlices;
12263
12264   // Check if this load is used as several smaller chunks of bits.
12265   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
12266   // of computation for each trunc.
12267   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
12268        UI != UIEnd; ++UI) {
12269     // Skip the uses of the chain.
12270     if (UI.getUse().getResNo() != 0)
12271       continue;
12272
12273     SDNode *User = *UI;
12274     unsigned Shift = 0;
12275
12276     // Check if this is a trunc(lshr).
12277     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
12278         isa<ConstantSDNode>(User->getOperand(1))) {
12279       Shift = User->getConstantOperandVal(1);
12280       User = *User->use_begin();
12281     }
12282
12283     // At this point, User is a Truncate, iff we encountered, trunc or
12284     // trunc(lshr).
12285     if (User->getOpcode() != ISD::TRUNCATE)
12286       return false;
12287
12288     // The width of the type must be a power of 2 and greater than 8-bits.
12289     // Otherwise the load cannot be represented in LLVM IR.
12290     // Moreover, if we shifted with a non-8-bits multiple, the slice
12291     // will be across several bytes. We do not support that.
12292     unsigned Width = User->getValueSizeInBits(0);
12293     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
12294       return false;
12295
12296     // Build the slice for this chain of computations.
12297     LoadedSlice LS(User, LD, Shift, &DAG);
12298     APInt CurrentUsedBits = LS.getUsedBits();
12299
12300     // Check if this slice overlaps with another.
12301     if ((CurrentUsedBits & UsedBits) != 0)
12302       return false;
12303     // Update the bits used globally.
12304     UsedBits |= CurrentUsedBits;
12305
12306     // Check if the new slice would be legal.
12307     if (!LS.isLegal())
12308       return false;
12309
12310     // Record the slice.
12311     LoadedSlices.push_back(LS);
12312   }
12313
12314   // Abort slicing if it does not seem to be profitable.
12315   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
12316     return false;
12317
12318   ++SlicedLoads;
12319
12320   // Rewrite each chain to use an independent load.
12321   // By construction, each chain can be represented by a unique load.
12322
12323   // Prepare the argument for the new token factor for all the slices.
12324   SmallVector<SDValue, 8> ArgChains;
12325   for (SmallVectorImpl<LoadedSlice>::const_iterator
12326            LSIt = LoadedSlices.begin(),
12327            LSItEnd = LoadedSlices.end();
12328        LSIt != LSItEnd; ++LSIt) {
12329     SDValue SliceInst = LSIt->loadSlice();
12330     CombineTo(LSIt->Inst, SliceInst, true);
12331     if (SliceInst.getOpcode() != ISD::LOAD)
12332       SliceInst = SliceInst.getOperand(0);
12333     assert(SliceInst->getOpcode() == ISD::LOAD &&
12334            "It takes more than a zext to get to the loaded slice!!");
12335     ArgChains.push_back(SliceInst.getValue(1));
12336   }
12337
12338   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12339                               ArgChains);
12340   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12341   AddToWorklist(Chain.getNode());
12342   return true;
12343 }
12344
12345 /// Check to see if V is (and load (ptr), imm), where the load is having
12346 /// specific bytes cleared out.  If so, return the byte size being masked out
12347 /// and the shift amount.
12348 static std::pair<unsigned, unsigned>
12349 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12350   std::pair<unsigned, unsigned> Result(0, 0);
12351
12352   // Check for the structure we're looking for.
12353   if (V->getOpcode() != ISD::AND ||
12354       !isa<ConstantSDNode>(V->getOperand(1)) ||
12355       !ISD::isNormalLoad(V->getOperand(0).getNode()))
12356     return Result;
12357
12358   // Check the chain and pointer.
12359   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12360   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
12361
12362   // The store should be chained directly to the load or be an operand of a
12363   // tokenfactor.
12364   if (LD == Chain.getNode())
12365     ; // ok.
12366   else if (Chain->getOpcode() != ISD::TokenFactor)
12367     return Result; // Fail.
12368   else {
12369     bool isOk = false;
12370     for (const SDValue &ChainOp : Chain->op_values())
12371       if (ChainOp.getNode() == LD) {
12372         isOk = true;
12373         break;
12374       }
12375     if (!isOk) return Result;
12376   }
12377
12378   // This only handles simple types.
12379   if (V.getValueType() != MVT::i16 &&
12380       V.getValueType() != MVT::i32 &&
12381       V.getValueType() != MVT::i64)
12382     return Result;
12383
12384   // Check the constant mask.  Invert it so that the bits being masked out are
12385   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12386   // follow the sign bit for uniformity.
12387   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12388   unsigned NotMaskLZ = countLeadingZeros(NotMask);
12389   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
12390   unsigned NotMaskTZ = countTrailingZeros(NotMask);
12391   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
12392   if (NotMaskLZ == 64) return Result;  // All zero mask.
12393
12394   // See if we have a continuous run of bits.  If so, we have 0*1+0*
12395   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12396     return Result;
12397
12398   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12399   if (V.getValueType() != MVT::i64 && NotMaskLZ)
12400     NotMaskLZ -= 64-V.getValueSizeInBits();
12401
12402   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12403   switch (MaskedBytes) {
12404   case 1:
12405   case 2:
12406   case 4: break;
12407   default: return Result; // All one mask, or 5-byte mask.
12408   }
12409
12410   // Verify that the first bit starts at a multiple of mask so that the access
12411   // is aligned the same as the access width.
12412   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12413
12414   Result.first = MaskedBytes;
12415   Result.second = NotMaskTZ/8;
12416   return Result;
12417 }
12418
12419 /// Check to see if IVal is something that provides a value as specified by
12420 /// MaskInfo. If so, replace the specified store with a narrower store of
12421 /// truncated IVal.
12422 static SDNode *
12423 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12424                                 SDValue IVal, StoreSDNode *St,
12425                                 DAGCombiner *DC) {
12426   unsigned NumBytes = MaskInfo.first;
12427   unsigned ByteShift = MaskInfo.second;
12428   SelectionDAG &DAG = DC->getDAG();
12429
12430   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12431   // that uses this.  If not, this is not a replacement.
12432   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12433                                   ByteShift*8, (ByteShift+NumBytes)*8);
12434   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12435
12436   // Check that it is legal on the target to do this.  It is legal if the new
12437   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12438   // legalization.
12439   MVT VT = MVT::getIntegerVT(NumBytes*8);
12440   if (!DC->isTypeLegal(VT))
12441     return nullptr;
12442
12443   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12444   // shifted by ByteShift and truncated down to NumBytes.
12445   if (ByteShift) {
12446     SDLoc DL(IVal);
12447     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12448                        DAG.getConstant(ByteShift*8, DL,
12449                                     DC->getShiftAmountTy(IVal.getValueType())));
12450   }
12451
12452   // Figure out the offset for the store and the alignment of the access.
12453   unsigned StOffset;
12454   unsigned NewAlign = St->getAlignment();
12455
12456   if (DAG.getDataLayout().isLittleEndian())
12457     StOffset = ByteShift;
12458   else
12459     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12460
12461   SDValue Ptr = St->getBasePtr();
12462   if (StOffset) {
12463     SDLoc DL(IVal);
12464     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12465                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12466     NewAlign = MinAlign(NewAlign, StOffset);
12467   }
12468
12469   // Truncate down to the new size.
12470   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12471
12472   ++OpsNarrowed;
12473   return DAG
12474       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12475                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12476       .getNode();
12477 }
12478
12479 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12480 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12481 /// narrowing the load and store if it would end up being a win for performance
12482 /// or code size.
12483 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12484   StoreSDNode *ST  = cast<StoreSDNode>(N);
12485   if (ST->isVolatile())
12486     return SDValue();
12487
12488   SDValue Chain = ST->getChain();
12489   SDValue Value = ST->getValue();
12490   SDValue Ptr   = ST->getBasePtr();
12491   EVT VT = Value.getValueType();
12492
12493   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12494     return SDValue();
12495
12496   unsigned Opc = Value.getOpcode();
12497
12498   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12499   // is a byte mask indicating a consecutive number of bytes, check to see if
12500   // Y is known to provide just those bytes.  If so, we try to replace the
12501   // load + replace + store sequence with a single (narrower) store, which makes
12502   // the load dead.
12503   if (Opc == ISD::OR) {
12504     std::pair<unsigned, unsigned> MaskedLoad;
12505     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12506     if (MaskedLoad.first)
12507       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12508                                                   Value.getOperand(1), ST,this))
12509         return SDValue(NewST, 0);
12510
12511     // Or is commutative, so try swapping X and Y.
12512     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12513     if (MaskedLoad.first)
12514       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12515                                                   Value.getOperand(0), ST,this))
12516         return SDValue(NewST, 0);
12517   }
12518
12519   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12520       Value.getOperand(1).getOpcode() != ISD::Constant)
12521     return SDValue();
12522
12523   SDValue N0 = Value.getOperand(0);
12524   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12525       Chain == SDValue(N0.getNode(), 1)) {
12526     LoadSDNode *LD = cast<LoadSDNode>(N0);
12527     if (LD->getBasePtr() != Ptr ||
12528         LD->getPointerInfo().getAddrSpace() !=
12529         ST->getPointerInfo().getAddrSpace())
12530       return SDValue();
12531
12532     // Find the type to narrow it the load / op / store to.
12533     SDValue N1 = Value.getOperand(1);
12534     unsigned BitWidth = N1.getValueSizeInBits();
12535     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12536     if (Opc == ISD::AND)
12537       Imm ^= APInt::getAllOnesValue(BitWidth);
12538     if (Imm == 0 || Imm.isAllOnesValue())
12539       return SDValue();
12540     unsigned ShAmt = Imm.countTrailingZeros();
12541     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12542     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12543     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12544     // The narrowing should be profitable, the load/store operation should be
12545     // legal (or custom) and the store size should be equal to the NewVT width.
12546     while (NewBW < BitWidth &&
12547            (NewVT.getStoreSizeInBits() != NewBW ||
12548             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12549             !TLI.isNarrowingProfitable(VT, NewVT))) {
12550       NewBW = NextPowerOf2(NewBW);
12551       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12552     }
12553     if (NewBW >= BitWidth)
12554       return SDValue();
12555
12556     // If the lsb changed does not start at the type bitwidth boundary,
12557     // start at the previous one.
12558     if (ShAmt % NewBW)
12559       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12560     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12561                                    std::min(BitWidth, ShAmt + NewBW));
12562     if ((Imm & Mask) == Imm) {
12563       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12564       if (Opc == ISD::AND)
12565         NewImm ^= APInt::getAllOnesValue(NewBW);
12566       uint64_t PtrOff = ShAmt / 8;
12567       // For big endian targets, we need to adjust the offset to the pointer to
12568       // load the correct bytes.
12569       if (DAG.getDataLayout().isBigEndian())
12570         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12571
12572       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12573       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12574       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12575         return SDValue();
12576
12577       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12578                                    Ptr.getValueType(), Ptr,
12579                                    DAG.getConstant(PtrOff, SDLoc(LD),
12580                                                    Ptr.getValueType()));
12581       SDValue NewLD =
12582           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12583                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12584                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12585       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12586                                    DAG.getConstant(NewImm, SDLoc(Value),
12587                                                    NewVT));
12588       SDValue NewST =
12589           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12590                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12591
12592       AddToWorklist(NewPtr.getNode());
12593       AddToWorklist(NewLD.getNode());
12594       AddToWorklist(NewVal.getNode());
12595       WorklistRemover DeadNodes(*this);
12596       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12597       ++OpsNarrowed;
12598       return NewST;
12599     }
12600   }
12601
12602   return SDValue();
12603 }
12604
12605 /// For a given floating point load / store pair, if the load value isn't used
12606 /// by any other operations, then consider transforming the pair to integer
12607 /// load / store operations if the target deems the transformation profitable.
12608 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12609   StoreSDNode *ST  = cast<StoreSDNode>(N);
12610   SDValue Chain = ST->getChain();
12611   SDValue Value = ST->getValue();
12612   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12613       Value.hasOneUse() &&
12614       Chain == SDValue(Value.getNode(), 1)) {
12615     LoadSDNode *LD = cast<LoadSDNode>(Value);
12616     EVT VT = LD->getMemoryVT();
12617     if (!VT.isFloatingPoint() ||
12618         VT != ST->getMemoryVT() ||
12619         LD->isNonTemporal() ||
12620         ST->isNonTemporal() ||
12621         LD->getPointerInfo().getAddrSpace() != 0 ||
12622         ST->getPointerInfo().getAddrSpace() != 0)
12623       return SDValue();
12624
12625     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12626     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12627         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12628         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12629         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12630       return SDValue();
12631
12632     unsigned LDAlign = LD->getAlignment();
12633     unsigned STAlign = ST->getAlignment();
12634     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12635     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12636     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12637       return SDValue();
12638
12639     SDValue NewLD =
12640         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12641                     LD->getPointerInfo(), LDAlign);
12642
12643     SDValue NewST =
12644         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12645                      ST->getPointerInfo(), STAlign);
12646
12647     AddToWorklist(NewLD.getNode());
12648     AddToWorklist(NewST.getNode());
12649     WorklistRemover DeadNodes(*this);
12650     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12651     ++LdStFP2Int;
12652     return NewST;
12653   }
12654
12655   return SDValue();
12656 }
12657
12658 // This is a helper function for visitMUL to check the profitability
12659 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12660 // MulNode is the original multiply, AddNode is (add x, c1),
12661 // and ConstNode is c2.
12662 //
12663 // If the (add x, c1) has multiple uses, we could increase
12664 // the number of adds if we make this transformation.
12665 // It would only be worth doing this if we can remove a
12666 // multiply in the process. Check for that here.
12667 // To illustrate:
12668 //     (A + c1) * c3
12669 //     (A + c2) * c3
12670 // We're checking for cases where we have common "c3 * A" expressions.
12671 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12672                                               SDValue &AddNode,
12673                                               SDValue &ConstNode) {
12674   APInt Val;
12675
12676   // If the add only has one use, this would be OK to do.
12677   if (AddNode.getNode()->hasOneUse())
12678     return true;
12679
12680   // Walk all the users of the constant with which we're multiplying.
12681   for (SDNode *Use : ConstNode->uses()) {
12682     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12683       continue;
12684
12685     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12686       SDNode *OtherOp;
12687       SDNode *MulVar = AddNode.getOperand(0).getNode();
12688
12689       // OtherOp is what we're multiplying against the constant.
12690       if (Use->getOperand(0) == ConstNode)
12691         OtherOp = Use->getOperand(1).getNode();
12692       else
12693         OtherOp = Use->getOperand(0).getNode();
12694
12695       // Check to see if multiply is with the same operand of our "add".
12696       //
12697       //     ConstNode  = CONST
12698       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12699       //     ...
12700       //     AddNode  = (A + c1)  <-- MulVar is A.
12701       //         = AddNode * ConstNode   <-- current visiting instruction.
12702       //
12703       // If we make this transformation, we will have a common
12704       // multiply (ConstNode * A) that we can save.
12705       if (OtherOp == MulVar)
12706         return true;
12707
12708       // Now check to see if a future expansion will give us a common
12709       // multiply.
12710       //
12711       //     ConstNode  = CONST
12712       //     AddNode    = (A + c1)
12713       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12714       //     ...
12715       //     OtherOp = (A + c2)
12716       //     Use     = OtherOp * ConstNode <-- visiting Use.
12717       //
12718       // If we make this transformation, we will have a common
12719       // multiply (CONST * A) after we also do the same transformation
12720       // to the "t2" instruction.
12721       if (OtherOp->getOpcode() == ISD::ADD &&
12722           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12723           OtherOp->getOperand(0).getNode() == MulVar)
12724         return true;
12725     }
12726   }
12727
12728   // Didn't find a case where this would be profitable.
12729   return false;
12730 }
12731
12732 static SDValue peekThroughBitcast(SDValue V) {
12733   while (V.getOpcode() == ISD::BITCAST)
12734     V = V.getOperand(0);
12735   return V;
12736 }
12737
12738 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12739                                          unsigned NumStores) {
12740   SmallVector<SDValue, 8> Chains;
12741   SmallPtrSet<const SDNode *, 8> Visited;
12742   SDLoc StoreDL(StoreNodes[0].MemNode);
12743
12744   for (unsigned i = 0; i < NumStores; ++i) {
12745     Visited.insert(StoreNodes[i].MemNode);
12746   }
12747
12748   // don't include nodes that are children
12749   for (unsigned i = 0; i < NumStores; ++i) {
12750     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12751       Chains.push_back(StoreNodes[i].MemNode->getChain());
12752   }
12753
12754   assert(Chains.size() > 0 && "Chain should have generated a chain");
12755   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12756 }
12757
12758 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12759     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12760     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12761   // Make sure we have something to merge.
12762   if (NumStores < 2)
12763     return false;
12764
12765   // The latest Node in the DAG.
12766   SDLoc DL(StoreNodes[0].MemNode);
12767
12768   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
12769   unsigned SizeInBits = NumStores * ElementSizeBits;
12770   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
12771
12772   EVT StoreTy;
12773   if (UseVector) {
12774     unsigned Elts = NumStores * NumMemElts;
12775     // Get the type for the merged vector store.
12776     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12777   } else
12778     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12779
12780   SDValue StoredVal;
12781   if (UseVector) {
12782     if (IsConstantSrc) {
12783       SmallVector<SDValue, 8> BuildVector;
12784       for (unsigned I = 0; I != NumStores; ++I) {
12785         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12786         SDValue Val = St->getValue();
12787         // If constant is of the wrong type, convert it now.
12788         if (MemVT != Val.getValueType()) {
12789           Val = peekThroughBitcast(Val);
12790           // Deal with constants of wrong size.
12791           if (ElementSizeBits != Val.getValueSizeInBits()) {
12792             EVT IntMemVT =
12793                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
12794             if (isa<ConstantFPSDNode>(Val)) {
12795               // Not clear how to truncate FP values.
12796               return false;
12797             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
12798               Val = DAG.getConstant(C->getAPIntValue()
12799                                         .zextOrTrunc(Val.getValueSizeInBits())
12800                                         .zextOrTrunc(ElementSizeBits),
12801                                     SDLoc(C), IntMemVT);
12802           }
12803           // Make sure correctly size type is the correct type.
12804           Val = DAG.getBitcast(MemVT, Val);
12805         }
12806         BuildVector.push_back(Val);
12807       }
12808       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12809                                                : ISD::BUILD_VECTOR,
12810                               DL, StoreTy, BuildVector);
12811     } else {
12812       SmallVector<SDValue, 8> Ops;
12813       for (unsigned i = 0; i < NumStores; ++i) {
12814         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12815         SDValue Val = peekThroughBitcast(St->getValue());
12816         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
12817         // type MemVT. If the underlying value is not the correct
12818         // type, but it is an extraction of an appropriate vector we
12819         // can recast Val to be of the correct type. This may require
12820         // converting between EXTRACT_VECTOR_ELT and
12821         // EXTRACT_SUBVECTOR.
12822         if ((MemVT != Val.getValueType()) &&
12823             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12824              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
12825           SDValue Vec = Val.getOperand(0);
12826           EVT MemVTScalarTy = MemVT.getScalarType();
12827           // We may need to add a bitcast here to get types to line up.
12828           if (MemVTScalarTy != Vec.getValueType()) {
12829             unsigned Elts = Vec.getValueType().getSizeInBits() /
12830                             MemVTScalarTy.getSizeInBits();
12831             EVT NewVecTy =
12832                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
12833             Vec = DAG.getBitcast(NewVecTy, Vec);
12834           }
12835           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
12836                                         : ISD::EXTRACT_VECTOR_ELT;
12837           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
12838         }
12839         Ops.push_back(Val);
12840       }
12841
12842       // Build the extracted vector elements back into a vector.
12843       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12844                                                : ISD::BUILD_VECTOR,
12845                               DL, StoreTy, Ops);
12846     }
12847   } else {
12848     // We should always use a vector store when merging extracted vector
12849     // elements, so this path implies a store of constants.
12850     assert(IsConstantSrc && "Merged vector elements should use vector store");
12851
12852     APInt StoreInt(SizeInBits, 0);
12853
12854     // Construct a single integer constant which is made of the smaller
12855     // constant inputs.
12856     bool IsLE = DAG.getDataLayout().isLittleEndian();
12857     for (unsigned i = 0; i < NumStores; ++i) {
12858       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12859       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12860
12861       SDValue Val = St->getValue();
12862       StoreInt <<= ElementSizeBits;
12863       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12864         StoreInt |= C->getAPIntValue()
12865                         .zextOrTrunc(ElementSizeBits)
12866                         .zextOrTrunc(SizeInBits);
12867       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12868         StoreInt |= C->getValueAPF()
12869                         .bitcastToAPInt()
12870                         .zextOrTrunc(ElementSizeBits)
12871                         .zextOrTrunc(SizeInBits);
12872         // If fp truncation is necessary give up for now.
12873         if (MemVT.getSizeInBits() != ElementSizeBits)
12874           return false;
12875       } else {
12876         llvm_unreachable("Invalid constant element type");
12877       }
12878     }
12879
12880     // Create the new Load and Store operations.
12881     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12882   }
12883
12884   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12885   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12886
12887   // make sure we use trunc store if it's necessary to be legal.
12888   SDValue NewStore;
12889   if (!UseTrunc) {
12890     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12891                             FirstInChain->getPointerInfo(),
12892                             FirstInChain->getAlignment());
12893   } else { // Must be realized as a trunc store
12894     EVT LegalizedStoredValueTy =
12895         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12896     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12897     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12898     SDValue ExtendedStoreVal =
12899         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12900                         LegalizedStoredValueTy);
12901     NewStore = DAG.getTruncStore(
12902         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12903         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12904         FirstInChain->getAlignment(),
12905         FirstInChain->getMemOperand()->getFlags());
12906   }
12907
12908   // Replace all merged stores with the new store.
12909   for (unsigned i = 0; i < NumStores; ++i)
12910     CombineTo(StoreNodes[i].MemNode, NewStore);
12911
12912   AddToWorklist(NewChain.getNode());
12913   return true;
12914 }
12915
12916 void DAGCombiner::getStoreMergeCandidates(
12917     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12918   // This holds the base pointer, index, and the offset in bytes from the base
12919   // pointer.
12920   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12921   EVT MemVT = St->getMemoryVT();
12922
12923   SDValue Val = peekThroughBitcast(St->getValue());
12924   // We must have a base and an offset.
12925   if (!BasePtr.getBase().getNode())
12926     return;
12927
12928   // Do not handle stores to undef base pointers.
12929   if (BasePtr.getBase().isUndef())
12930     return;
12931
12932   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
12933   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12934                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12935   bool IsLoadSrc = isa<LoadSDNode>(Val);
12936   BaseIndexOffset LBasePtr;
12937   // Match on loadbaseptr if relevant.
12938   EVT LoadVT;
12939   if (IsLoadSrc) {
12940     auto *Ld = cast<LoadSDNode>(Val);
12941     LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12942     LoadVT = Ld->getMemoryVT();
12943     // Load and store should be the same type.
12944     if (MemVT != LoadVT)
12945       return;
12946   }
12947   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
12948                             int64_t &Offset) -> bool {
12949     if (Other->isVolatile() || Other->isIndexed())
12950       return false;
12951     SDValue Val = peekThroughBitcast(Other->getValue());
12952     // Allow merging constants of different types as integers.
12953     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
12954                                            : Other->getMemoryVT() != MemVT;
12955     if (IsLoadSrc) {
12956       if (NoTypeMatch)
12957         return false;
12958       // The Load's Base Ptr must also match
12959       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
12960         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
12961         if (LoadVT != OtherLd->getMemoryVT())
12962           return false;
12963         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
12964           return false;
12965       } else
12966         return false;
12967     }
12968     if (IsConstantSrc) {
12969       if (NoTypeMatch)
12970         return false;
12971       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
12972         return false;
12973     }
12974     if (IsExtractVecSrc) {
12975       // Do not merge truncated stores here.
12976       if (Other->isTruncatingStore())
12977         return false;
12978       if (!MemVT.bitsEq(Val.getValueType()))
12979         return false;
12980       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
12981           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
12982         return false;
12983     }
12984     Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
12985     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
12986   };
12987
12988   // We looking for a root node which is an ancestor to all mergable
12989   // stores. We search up through a load, to our root and then down
12990   // through all children. For instance we will find Store{1,2,3} if
12991   // St is Store1, Store2. or Store3 where the root is not a load
12992   // which always true for nonvolatile ops. TODO: Expand
12993   // the search to find all valid candidates through multiple layers of loads.
12994   //
12995   // Root
12996   // |-------|-------|
12997   // Load    Load    Store3
12998   // |       |
12999   // Store1   Store2
13000   //
13001   // FIXME: We should be able to climb and
13002   // descend TokenFactors to find candidates as well.
13003
13004   SDNode *RootNode = (St->getChain()).getNode();
13005
13006   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13007     RootNode = Ldn->getChain().getNode();
13008     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13009       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
13010         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
13011           if (I2.getOperandNo() == 0)
13012             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
13013               BaseIndexOffset Ptr;
13014               int64_t PtrDiff;
13015               if (CandidateMatch(OtherST, Ptr, PtrDiff))
13016                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13017             }
13018   } else
13019     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13020       if (I.getOperandNo() == 0)
13021         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
13022           BaseIndexOffset Ptr;
13023           int64_t PtrDiff;
13024           if (CandidateMatch(OtherST, Ptr, PtrDiff))
13025             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13026         }
13027 }
13028
13029 // We need to check that merging these stores does not cause a loop in
13030 // the DAG. Any store candidate may depend on another candidate
13031 // indirectly through its operand (we already consider dependencies
13032 // through the chain). Check in parallel by searching up from
13033 // non-chain operands of candidates.
13034 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13035     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
13036   // FIXME: We should be able to truncate a full search of
13037   // predecessors by doing a BFS and keeping tabs the originating
13038   // stores from which worklist nodes come from in a similar way to
13039   // TokenFactor simplfication.
13040
13041   SmallPtrSet<const SDNode *, 16> Visited;
13042   SmallVector<const SDNode *, 8> Worklist;
13043   unsigned int Max = 8192;
13044   // Search Ops of store candidates.
13045   for (unsigned i = 0; i < NumStores; ++i) {
13046     SDNode *n = StoreNodes[i].MemNode;
13047     // Potential loops may happen only through non-chain operands
13048     for (unsigned j = 1; j < n->getNumOperands(); ++j)
13049       Worklist.push_back(n->getOperand(j).getNode());
13050   }
13051   // Search through DAG. We can stop early if we find a store node.
13052   for (unsigned i = 0; i < NumStores; ++i) {
13053     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
13054                                      Max))
13055       return false;
13056     // Check if we ended early, failing conservatively if so.
13057     if (Visited.size() >= Max)
13058       return false;
13059   }
13060   return true;
13061 }
13062
13063 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13064   if (OptLevel == CodeGenOpt::None)
13065     return false;
13066
13067   EVT MemVT = St->getMemoryVT();
13068   int64_t ElementSizeBytes = MemVT.getStoreSize();
13069   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13070
13071   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
13072     return false;
13073
13074   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
13075       Attribute::NoImplicitFloat);
13076
13077   // This function cannot currently deal with non-byte-sized memory sizes.
13078   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
13079     return false;
13080
13081   if (!MemVT.isSimple())
13082     return false;
13083
13084   // Perform an early exit check. Do not bother looking at stored values that
13085   // are not constants, loads, or extracted vector elements.
13086   SDValue StoredVal = peekThroughBitcast(St->getValue());
13087   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
13088   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
13089                        isa<ConstantFPSDNode>(StoredVal);
13090   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13091                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13092
13093   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
13094     return false;
13095
13096   SmallVector<MemOpLink, 8> StoreNodes;
13097   // Find potential store merge candidates by searching through chain sub-DAG
13098   getStoreMergeCandidates(St, StoreNodes);
13099
13100   // Check if there is anything to merge.
13101   if (StoreNodes.size() < 2)
13102     return false;
13103
13104   // Sort the memory operands according to their distance from the
13105   // base pointer.
13106   std::sort(StoreNodes.begin(), StoreNodes.end(),
13107             [](MemOpLink LHS, MemOpLink RHS) {
13108               return LHS.OffsetFromBase < RHS.OffsetFromBase;
13109             });
13110
13111   // Store Merge attempts to merge the lowest stores. This generally
13112   // works out as if successful, as the remaining stores are checked
13113   // after the first collection of stores is merged. However, in the
13114   // case that a non-mergeable store is found first, e.g., {p[-2],
13115   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
13116   // mergeable cases. To prevent this, we prune such stores from the
13117   // front of StoreNodes here.
13118
13119   bool RV = false;
13120   while (StoreNodes.size() > 1) {
13121     unsigned StartIdx = 0;
13122     while ((StartIdx + 1 < StoreNodes.size()) &&
13123            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
13124                StoreNodes[StartIdx + 1].OffsetFromBase)
13125       ++StartIdx;
13126
13127     // Bail if we don't have enough candidates to merge.
13128     if (StartIdx + 1 >= StoreNodes.size())
13129       return RV;
13130
13131     if (StartIdx)
13132       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
13133
13134     // Scan the memory operations on the chain and find the first
13135     // non-consecutive store memory address.
13136     unsigned NumConsecutiveStores = 1;
13137     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
13138     // Check that the addresses are consecutive starting from the second
13139     // element in the list of stores.
13140     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
13141       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
13142       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13143         break;
13144       NumConsecutiveStores = i + 1;
13145     }
13146
13147     if (NumConsecutiveStores < 2) {
13148       StoreNodes.erase(StoreNodes.begin(),
13149                        StoreNodes.begin() + NumConsecutiveStores);
13150       continue;
13151     }
13152
13153     // Check that we can merge these candidates without causing a cycle
13154     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
13155                                                   NumConsecutiveStores)) {
13156       StoreNodes.erase(StoreNodes.begin(),
13157                        StoreNodes.begin() + NumConsecutiveStores);
13158       continue;
13159     }
13160
13161     // The node with the lowest store address.
13162     LLVMContext &Context = *DAG.getContext();
13163     const DataLayout &DL = DAG.getDataLayout();
13164
13165     // Store the constants into memory as one consecutive store.
13166     if (IsConstantSrc) {
13167       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13168       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13169       unsigned FirstStoreAlign = FirstInChain->getAlignment();
13170       unsigned LastLegalType = 1;
13171       unsigned LastLegalVectorType = 1;
13172       bool LastIntegerTrunc = false;
13173       bool NonZero = false;
13174       unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
13175       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13176         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
13177         SDValue StoredVal = ST->getValue();
13178         bool IsElementZero = false;
13179         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
13180           IsElementZero = C->isNullValue();
13181         else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
13182           IsElementZero = C->getConstantFPValue()->isNullValue();
13183         if (IsElementZero) {
13184           if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
13185             FirstZeroAfterNonZero = i;
13186         }
13187         NonZero |= !IsElementZero;
13188
13189         // Find a legal type for the constant store.
13190         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13191         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13192         bool IsFast = false;
13193         if (TLI.isTypeLegal(StoreTy) &&
13194             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13195             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13196                                    FirstStoreAlign, &IsFast) &&
13197             IsFast) {
13198           LastIntegerTrunc = false;
13199           LastLegalType = i + 1;
13200           // Or check whether a truncstore is legal.
13201         } else if (TLI.getTypeAction(Context, StoreTy) ==
13202                    TargetLowering::TypePromoteInteger) {
13203           EVT LegalizedStoredValueTy =
13204               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
13205           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13206               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13207               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13208                                      FirstStoreAlign, &IsFast) &&
13209               IsFast) {
13210             LastIntegerTrunc = true;
13211             LastLegalType = i + 1;
13212           }
13213         }
13214
13215         // We only use vectors if the constant is known to be zero or the target
13216         // allows it and the function is not marked with the noimplicitfloat
13217         // attribute.
13218         if ((!NonZero ||
13219              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
13220             !NoVectors) {
13221           // Find a legal type for the vector store.
13222           unsigned Elts = (i + 1) * NumMemElts;
13223           EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13224           if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
13225               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13226               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13227                                      FirstStoreAlign, &IsFast) &&
13228               IsFast)
13229             LastLegalVectorType = i + 1;
13230         }
13231       }
13232
13233       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
13234       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
13235
13236       // Check if we found a legal integer type that creates a meaningful merge.
13237       if (NumElem < 2) {
13238         // We know that candidate stores are in order and of correct
13239         // shape. While there is no mergeable sequence from the
13240         // beginning one may start later in the sequence. The only
13241         // reason a merge of size N could have failed where another of
13242         // the same size would not have, is if the alignment has
13243         // improved or we've dropped a non-zero value. Drop as many
13244         // candidates as we can here.
13245         unsigned NumSkip = 1;
13246         while (
13247             (NumSkip < NumConsecutiveStores) &&
13248             (NumSkip < FirstZeroAfterNonZero) &&
13249             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
13250           NumSkip++;
13251         }
13252         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13253         continue;
13254       }
13255
13256       bool Merged = MergeStoresOfConstantsOrVecElts(
13257           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
13258       RV |= Merged;
13259
13260       // Remove merged stores for next iteration.
13261       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13262       continue;
13263     }
13264
13265     // When extracting multiple vector elements, try to store them
13266     // in one vector store rather than a sequence of scalar stores.
13267     if (IsExtractVecSrc) {
13268       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13269       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13270       unsigned FirstStoreAlign = FirstInChain->getAlignment();
13271       unsigned NumStoresToMerge = 1;
13272       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13273         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13274         SDValue StVal = peekThroughBitcast(St->getValue());
13275         // This restriction could be loosened.
13276         // Bail out if any stored values are not elements extracted from a
13277         // vector. It should be possible to handle mixed sources, but load
13278         // sources need more careful handling (see the block of code below that
13279         // handles consecutive loads).
13280         if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13281             StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13282           return RV;
13283
13284         // Find a legal type for the vector store.
13285         unsigned Elts = (i + 1) * NumMemElts;
13286         EVT Ty =
13287             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13288         bool IsFast;
13289         if (TLI.isTypeLegal(Ty) &&
13290             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13291             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13292                                    FirstStoreAlign, &IsFast) &&
13293             IsFast)
13294           NumStoresToMerge = i + 1;
13295       }
13296
13297       // Check if we found a legal integer type that creates a meaningful merge.
13298       if (NumStoresToMerge < 2) {
13299         // We know that candidate stores are in order and of correct
13300         // shape. While there is no mergeable sequence from the
13301         // beginning one may start later in the sequence. The only
13302         // reason a merge of size N could have failed where another of
13303         // the same size would not have, is if the alignment has
13304         // improved. Drop as many candidates as we can here.
13305         unsigned NumSkip = 1;
13306         while ((NumSkip < NumConsecutiveStores) &&
13307                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13308           NumSkip++;
13309
13310         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13311         continue;
13312       }
13313
13314       bool Merged = MergeStoresOfConstantsOrVecElts(
13315           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
13316       if (!Merged) {
13317         StoreNodes.erase(StoreNodes.begin(),
13318                          StoreNodes.begin() + NumStoresToMerge);
13319         continue;
13320       }
13321       // Remove merged stores for next iteration.
13322       StoreNodes.erase(StoreNodes.begin(),
13323                        StoreNodes.begin() + NumStoresToMerge);
13324       RV = true;
13325       continue;
13326     }
13327
13328     // Below we handle the case of multiple consecutive stores that
13329     // come from multiple consecutive loads. We merge them into a single
13330     // wide load and a single wide store.
13331
13332     // Look for load nodes which are used by the stored values.
13333     SmallVector<MemOpLink, 8> LoadNodes;
13334
13335     // Find acceptable loads. Loads need to have the same chain (token factor),
13336     // must not be zext, volatile, indexed, and they must be consecutive.
13337     BaseIndexOffset LdBasePtr;
13338     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13339       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13340       SDValue Val = peekThroughBitcast(St->getValue());
13341       LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
13342       if (!Ld)
13343         break;
13344
13345       // Loads must only have one use.
13346       if (!Ld->hasNUsesOfValue(1, 0))
13347         break;
13348
13349       // The memory operands must not be volatile.
13350       if (Ld->isVolatile() || Ld->isIndexed())
13351         break;
13352
13353       // The stored memory type must be the same.
13354       if (Ld->getMemoryVT() != MemVT)
13355         break;
13356
13357       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
13358       // If this is not the first ptr that we check.
13359       int64_t LdOffset = 0;
13360       if (LdBasePtr.getBase().getNode()) {
13361         // The base ptr must be the same.
13362         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
13363           break;
13364       } else {
13365         // Check that all other base pointers are the same as this one.
13366         LdBasePtr = LdPtr;
13367       }
13368
13369       // We found a potential memory operand to merge.
13370       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
13371     }
13372
13373     if (LoadNodes.size() < 2) {
13374       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
13375       continue;
13376     }
13377
13378     // If we have load/store pair instructions and we only have two values,
13379     // don't bother merging.
13380     unsigned RequiredAlignment;
13381     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
13382         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
13383       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
13384       continue;
13385     }
13386     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13387     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13388     unsigned FirstStoreAlign = FirstInChain->getAlignment();
13389     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
13390     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
13391     unsigned FirstLoadAlign = FirstLoad->getAlignment();
13392
13393     // Scan the memory operations on the chain and find the first
13394     // non-consecutive load memory address. These variables hold the index in
13395     // the store node array.
13396     unsigned LastConsecutiveLoad = 1;
13397     // This variable refers to the size and not index in the array.
13398     unsigned LastLegalVectorType = 1;
13399     unsigned LastLegalIntegerType = 1;
13400     bool isDereferenceable = true;
13401     bool DoIntegerTruncate = false;
13402     StartAddress = LoadNodes[0].OffsetFromBase;
13403     SDValue FirstChain = FirstLoad->getChain();
13404     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
13405       // All loads must share the same chain.
13406       if (LoadNodes[i].MemNode->getChain() != FirstChain)
13407         break;
13408
13409       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13410       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13411         break;
13412       LastConsecutiveLoad = i;
13413
13414       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
13415         isDereferenceable = false;
13416
13417       // Find a legal type for the vector store.
13418       unsigned Elts = (i + 1) * NumMemElts;
13419       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13420
13421       bool IsFastSt, IsFastLd;
13422       if (TLI.isTypeLegal(StoreTy) &&
13423           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13424           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13425                                  FirstStoreAlign, &IsFastSt) &&
13426           IsFastSt &&
13427           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13428                                  FirstLoadAlign, &IsFastLd) &&
13429           IsFastLd) {
13430         LastLegalVectorType = i + 1;
13431       }
13432
13433       // Find a legal type for the integer store.
13434       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13435       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13436       if (TLI.isTypeLegal(StoreTy) &&
13437           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13438           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13439                                  FirstStoreAlign, &IsFastSt) &&
13440           IsFastSt &&
13441           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13442                                  FirstLoadAlign, &IsFastLd) &&
13443           IsFastLd) {
13444         LastLegalIntegerType = i + 1;
13445         DoIntegerTruncate = false;
13446         // Or check whether a truncstore and extload is legal.
13447       } else if (TLI.getTypeAction(Context, StoreTy) ==
13448                  TargetLowering::TypePromoteInteger) {
13449         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
13450         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13451             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13452             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
13453                                StoreTy) &&
13454             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
13455                                StoreTy) &&
13456             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13457             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13458                                    FirstStoreAlign, &IsFastSt) &&
13459             IsFastSt &&
13460             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13461                                    FirstLoadAlign, &IsFastLd) &&
13462             IsFastLd) {
13463           LastLegalIntegerType = i + 1;
13464           DoIntegerTruncate = true;
13465         }
13466       }
13467     }
13468
13469     // Only use vector types if the vector type is larger than the integer type.
13470     // If they are the same, use integers.
13471     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
13472     unsigned LastLegalType =
13473         std::max(LastLegalVectorType, LastLegalIntegerType);
13474
13475     // We add +1 here because the LastXXX variables refer to location while
13476     // the NumElem refers to array/index size.
13477     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
13478     NumElem = std::min(LastLegalType, NumElem);
13479
13480     if (NumElem < 2) {
13481       // We know that candidate stores are in order and of correct
13482       // shape. While there is no mergeable sequence from the
13483       // beginning one may start later in the sequence. The only
13484       // reason a merge of size N could have failed where another of
13485       // the same size would not have is if the alignment or either
13486       // the load or store has improved. Drop as many candidates as we
13487       // can here.
13488       unsigned NumSkip = 1;
13489       while ((NumSkip < LoadNodes.size()) &&
13490              (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
13491              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13492         NumSkip++;
13493       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13494       continue;
13495     }
13496
13497     // Find if it is better to use vectors or integers to load and store
13498     // to memory.
13499     EVT JointMemOpVT;
13500     if (UseVectorTy) {
13501       // Find a legal type for the vector store.
13502       unsigned Elts = NumElem * NumMemElts;
13503       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13504     } else {
13505       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13506       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13507     }
13508
13509     SDLoc LoadDL(LoadNodes[0].MemNode);
13510     SDLoc StoreDL(StoreNodes[0].MemNode);
13511
13512     // The merged loads are required to have the same incoming chain, so
13513     // using the first's chain is acceptable.
13514
13515     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13516     AddToWorklist(NewStoreChain.getNode());
13517
13518     MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13519                                           MachineMemOperand::MODereferenceable:
13520                                           MachineMemOperand::MONone;
13521
13522     SDValue NewLoad, NewStore;
13523     if (UseVectorTy || !DoIntegerTruncate) {
13524       NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13525                             FirstLoad->getBasePtr(),
13526                             FirstLoad->getPointerInfo(), FirstLoadAlign,
13527                             MMOFlags);
13528       NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13529                               FirstInChain->getBasePtr(),
13530                               FirstInChain->getPointerInfo(), FirstStoreAlign);
13531     } else { // This must be the truncstore/extload case
13532       EVT ExtendedTy =
13533           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13534       NewLoad =
13535           DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13536                          FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13537                          JointMemOpVT, FirstLoadAlign, MMOFlags);
13538       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13539                                    FirstInChain->getBasePtr(),
13540                                    FirstInChain->getPointerInfo(), JointMemOpVT,
13541                                    FirstInChain->getAlignment(),
13542                                    FirstInChain->getMemOperand()->getFlags());
13543     }
13544
13545     // Transfer chain users from old loads to the new load.
13546     for (unsigned i = 0; i < NumElem; ++i) {
13547       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13548       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13549                                     SDValue(NewLoad.getNode(), 1));
13550     }
13551
13552     // Replace the all stores with the new store. Recursively remove
13553     // corresponding value if its no longer used.
13554     for (unsigned i = 0; i < NumElem; ++i) {
13555       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
13556       CombineTo(StoreNodes[i].MemNode, NewStore);
13557       if (Val.getNode()->use_empty())
13558         recursivelyDeleteUnusedNodes(Val.getNode());
13559     }
13560
13561     RV = true;
13562     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13563   }
13564   return RV;
13565 }
13566
13567 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13568   SDLoc SL(ST);
13569   SDValue ReplStore;
13570
13571   // Replace the chain to avoid dependency.
13572   if (ST->isTruncatingStore()) {
13573     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13574                                   ST->getBasePtr(), ST->getMemoryVT(),
13575                                   ST->getMemOperand());
13576   } else {
13577     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13578                              ST->getMemOperand());
13579   }
13580
13581   // Create token to keep both nodes around.
13582   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13583                               MVT::Other, ST->getChain(), ReplStore);
13584
13585   // Make sure the new and old chains are cleaned up.
13586   AddToWorklist(Token.getNode());
13587
13588   // Don't add users to work list.
13589   return CombineTo(ST, Token, false);
13590 }
13591
13592 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13593   SDValue Value = ST->getValue();
13594   if (Value.getOpcode() == ISD::TargetConstantFP)
13595     return SDValue();
13596
13597   SDLoc DL(ST);
13598
13599   SDValue Chain = ST->getChain();
13600   SDValue Ptr = ST->getBasePtr();
13601
13602   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13603
13604   // NOTE: If the original store is volatile, this transform must not increase
13605   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13606   // processor operation but an i64 (which is not legal) requires two.  So the
13607   // transform should not be done in this case.
13608
13609   SDValue Tmp;
13610   switch (CFP->getSimpleValueType(0).SimpleTy) {
13611   default:
13612     llvm_unreachable("Unknown FP type");
13613   case MVT::f16:    // We don't do this for these yet.
13614   case MVT::f80:
13615   case MVT::f128:
13616   case MVT::ppcf128:
13617     return SDValue();
13618   case MVT::f32:
13619     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13620         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13621       ;
13622       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13623                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13624                             MVT::i32);
13625       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13626     }
13627
13628     return SDValue();
13629   case MVT::f64:
13630     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13631          !ST->isVolatile()) ||
13632         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13633       ;
13634       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13635                             getZExtValue(), SDLoc(CFP), MVT::i64);
13636       return DAG.getStore(Chain, DL, Tmp,
13637                           Ptr, ST->getMemOperand());
13638     }
13639
13640     if (!ST->isVolatile() &&
13641         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13642       // Many FP stores are not made apparent until after legalize, e.g. for
13643       // argument passing.  Since this is so common, custom legalize the
13644       // 64-bit integer store into two 32-bit stores.
13645       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13646       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13647       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13648       if (DAG.getDataLayout().isBigEndian())
13649         std::swap(Lo, Hi);
13650
13651       unsigned Alignment = ST->getAlignment();
13652       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13653       AAMDNodes AAInfo = ST->getAAInfo();
13654
13655       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13656                                  ST->getAlignment(), MMOFlags, AAInfo);
13657       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13658                         DAG.getConstant(4, DL, Ptr.getValueType()));
13659       Alignment = MinAlign(Alignment, 4U);
13660       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13661                                  ST->getPointerInfo().getWithOffset(4),
13662                                  Alignment, MMOFlags, AAInfo);
13663       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13664                          St0, St1);
13665     }
13666
13667     return SDValue();
13668   }
13669 }
13670
13671 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13672   StoreSDNode *ST  = cast<StoreSDNode>(N);
13673   SDValue Chain = ST->getChain();
13674   SDValue Value = ST->getValue();
13675   SDValue Ptr   = ST->getBasePtr();
13676
13677   // If this is a store of a bit convert, store the input value if the
13678   // resultant store does not need a higher alignment than the original.
13679   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13680       ST->isUnindexed()) {
13681     EVT SVT = Value.getOperand(0).getValueType();
13682     if (((!LegalOperations && !ST->isVolatile()) ||
13683          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13684         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13685       unsigned OrigAlign = ST->getAlignment();
13686       bool Fast = false;
13687       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13688                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13689           Fast) {
13690         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13691                             ST->getPointerInfo(), OrigAlign,
13692                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13693       }
13694     }
13695   }
13696
13697   // Turn 'store undef, Ptr' -> nothing.
13698   if (Value.isUndef() && ST->isUnindexed())
13699     return Chain;
13700
13701   // Try to infer better alignment information than the store already has.
13702   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13703     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13704       if (Align > ST->getAlignment()) {
13705         SDValue NewStore =
13706             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13707                               ST->getMemoryVT(), Align,
13708                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13709         if (NewStore.getNode() != N)
13710           return CombineTo(ST, NewStore, true);
13711       }
13712     }
13713   }
13714
13715   // Try transforming a pair floating point load / store ops to integer
13716   // load / store ops.
13717   if (SDValue NewST = TransformFPLoadStorePair(N))
13718     return NewST;
13719
13720   if (ST->isUnindexed()) {
13721     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13722     // adjacent stores.
13723     if (findBetterNeighborChains(ST)) {
13724       // replaceStoreChain uses CombineTo, which handled all of the worklist
13725       // manipulation. Return the original node to not do anything else.
13726       return SDValue(ST, 0);
13727     }
13728     Chain = ST->getChain();
13729   }
13730
13731   // FIXME: is there such a thing as a truncating indexed store?
13732   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13733       Value.getValueType().isInteger()) {
13734     // See if we can simplify the input to this truncstore with knowledge that
13735     // only the low bits are being used.  For example:
13736     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13737     SDValue Shorter = DAG.GetDemandedBits(
13738         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13739                                     ST->getMemoryVT().getScalarSizeInBits()));
13740     AddToWorklist(Value.getNode());
13741     if (Shorter.getNode())
13742       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13743                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13744
13745     // Otherwise, see if we can simplify the operation with
13746     // SimplifyDemandedBits, which only works if the value has a single use.
13747     if (SimplifyDemandedBits(
13748             Value,
13749             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13750                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13751       // Re-visit the store if anything changed and the store hasn't been merged
13752       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13753       // node back to the worklist if necessary, but we also need to re-visit
13754       // the Store node itself.
13755       if (N->getOpcode() != ISD::DELETED_NODE)
13756         AddToWorklist(N);
13757       return SDValue(N, 0);
13758     }
13759   }
13760
13761   // If this is a load followed by a store to the same location, then the store
13762   // is dead/noop.
13763   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13764     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13765         ST->isUnindexed() && !ST->isVolatile() &&
13766         // There can't be any side effects between the load and store, such as
13767         // a call or store.
13768         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13769       // The store is dead, remove it.
13770       return Chain;
13771     }
13772   }
13773
13774   // Deal with elidable overlapping chained stores.
13775   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain))
13776     if (OptLevel != CodeGenOpt::None && ST->isUnindexed() &&
13777         ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() &&
13778         !ST1->getBasePtr().isUndef() && !ST->isVolatile()) {
13779       BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST->getBasePtr(), DAG);
13780       BaseIndexOffset ST1BasePtr =
13781           BaseIndexOffset::match(ST1->getBasePtr(), DAG);
13782       unsigned STBytes = ST->getMemoryVT().getStoreSize();
13783       unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize();
13784       int64_t PtrDiff;
13785       // If this is a store who's preceeding store to a subset of the same
13786       // memory and no one other node is chained to that store we can
13787       // effectively drop the store. Do not remove stores to undef as they may
13788       // be used as data sinks.
13789
13790       if (((ST->getBasePtr() == ST1->getBasePtr()) &&
13791            (ST->getValue() == ST1->getValue())) ||
13792           (STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) &&
13793            (0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) {
13794         CombineTo(ST1, ST1->getChain());
13795         return SDValue(N, 0);
13796       }
13797     }
13798
13799   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13800   // truncating store.  We can do this even if this is already a truncstore.
13801   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13802       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13803       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13804                             ST->getMemoryVT())) {
13805     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13806                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13807   }
13808
13809   // Always perform this optimization before types are legal. If the target
13810   // prefers, also try this after legalization to catch stores that were created
13811   // by intrinsics or other nodes.
13812   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
13813     while (true) {
13814       // There can be multiple store sequences on the same chain.
13815       // Keep trying to merge store sequences until we are unable to do so
13816       // or until we merge the last store on the chain.
13817       bool Changed = MergeConsecutiveStores(ST);
13818       if (!Changed) break;
13819       // Return N as merge only uses CombineTo and no worklist clean
13820       // up is necessary.
13821       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13822         return SDValue(N, 0);
13823     }
13824   }
13825
13826   // Try transforming N to an indexed store.
13827   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13828     return SDValue(N, 0);
13829
13830   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13831   //
13832   // Make sure to do this only after attempting to merge stores in order to
13833   //  avoid changing the types of some subset of stores due to visit order,
13834   //  preventing their merging.
13835   if (isa<ConstantFPSDNode>(ST->getValue())) {
13836     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13837       return NewSt;
13838   }
13839
13840   if (SDValue NewSt = splitMergedValStore(ST))
13841     return NewSt;
13842
13843   return ReduceLoadOpStoreWidth(N);
13844 }
13845
13846 /// For the instruction sequence of store below, F and I values
13847 /// are bundled together as an i64 value before being stored into memory.
13848 /// Sometimes it is more efficent to generate separate stores for F and I,
13849 /// which can remove the bitwise instructions or sink them to colder places.
13850 ///
13851 ///   (store (or (zext (bitcast F to i32) to i64),
13852 ///              (shl (zext I to i64), 32)), addr)  -->
13853 ///   (store F, addr) and (store I, addr+4)
13854 ///
13855 /// Similarly, splitting for other merged store can also be beneficial, like:
13856 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13857 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13858 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13859 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13860 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13861 ///
13862 /// We allow each target to determine specifically which kind of splitting is
13863 /// supported.
13864 ///
13865 /// The store patterns are commonly seen from the simple code snippet below
13866 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13867 ///   void goo(const std::pair<int, float> &);
13868 ///   hoo() {
13869 ///     ...
13870 ///     goo(std::make_pair(tmp, ftmp));
13871 ///     ...
13872 ///   }
13873 ///
13874 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13875   if (OptLevel == CodeGenOpt::None)
13876     return SDValue();
13877
13878   SDValue Val = ST->getValue();
13879   SDLoc DL(ST);
13880
13881   // Match OR operand.
13882   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13883     return SDValue();
13884
13885   // Match SHL operand and get Lower and Higher parts of Val.
13886   SDValue Op1 = Val.getOperand(0);
13887   SDValue Op2 = Val.getOperand(1);
13888   SDValue Lo, Hi;
13889   if (Op1.getOpcode() != ISD::SHL) {
13890     std::swap(Op1, Op2);
13891     if (Op1.getOpcode() != ISD::SHL)
13892       return SDValue();
13893   }
13894   Lo = Op2;
13895   Hi = Op1.getOperand(0);
13896   if (!Op1.hasOneUse())
13897     return SDValue();
13898
13899   // Match shift amount to HalfValBitSize.
13900   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13901   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13902   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13903     return SDValue();
13904
13905   // Lo and Hi are zero-extended from int with size less equal than 32
13906   // to i64.
13907   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13908       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13909       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13910       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13911       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13912       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13913     return SDValue();
13914
13915   // Use the EVT of low and high parts before bitcast as the input
13916   // of target query.
13917   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13918                   ? Lo.getOperand(0).getValueType()
13919                   : Lo.getValueType();
13920   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13921                    ? Hi.getOperand(0).getValueType()
13922                    : Hi.getValueType();
13923   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13924     return SDValue();
13925
13926   // Start to split store.
13927   unsigned Alignment = ST->getAlignment();
13928   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13929   AAMDNodes AAInfo = ST->getAAInfo();
13930
13931   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13932   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13933   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13934   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13935
13936   SDValue Chain = ST->getChain();
13937   SDValue Ptr = ST->getBasePtr();
13938   // Lower value store.
13939   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13940                              ST->getAlignment(), MMOFlags, AAInfo);
13941   Ptr =
13942       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13943                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13944   // Higher value store.
13945   SDValue St1 =
13946       DAG.getStore(St0, DL, Hi, Ptr,
13947                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13948                    Alignment / 2, MMOFlags, AAInfo);
13949   return St1;
13950 }
13951
13952 /// Convert a disguised subvector insertion into a shuffle:
13953 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
13954 /// bitcast(shuffle (bitcast V), (extended X), Mask)
13955 /// Note: We do not use an insert_subvector node because that requires a legal
13956 /// subvector type.
13957 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
13958   SDValue InsertVal = N->getOperand(1);
13959   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
13960       !InsertVal.getOperand(0).getValueType().isVector())
13961     return SDValue();
13962
13963   SDValue SubVec = InsertVal.getOperand(0);
13964   SDValue DestVec = N->getOperand(0);
13965   EVT SubVecVT = SubVec.getValueType();
13966   EVT VT = DestVec.getValueType();
13967   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
13968   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
13969   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
13970
13971   // Step 1: Create a shuffle mask that implements this insert operation. The
13972   // vector that we are inserting into will be operand 0 of the shuffle, so
13973   // those elements are just 'i'. The inserted subvector is in the first
13974   // positions of operand 1 of the shuffle. Example:
13975   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
13976   SmallVector<int, 16> Mask(NumMaskVals);
13977   for (unsigned i = 0; i != NumMaskVals; ++i) {
13978     if (i / NumSrcElts == InsIndex)
13979       Mask[i] = (i % NumSrcElts) + NumMaskVals;
13980     else
13981       Mask[i] = i;
13982   }
13983
13984   // Bail out if the target can not handle the shuffle we want to create.
13985   EVT SubVecEltVT = SubVecVT.getVectorElementType();
13986   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
13987   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
13988     return SDValue();
13989
13990   // Step 2: Create a wide vector from the inserted source vector by appending
13991   // undefined elements. This is the same size as our destination vector.
13992   SDLoc DL(N);
13993   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
13994   ConcatOps[0] = SubVec;
13995   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
13996
13997   // Step 3: Shuffle in the padded subvector.
13998   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
13999   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
14000   AddToWorklist(PaddedSubV.getNode());
14001   AddToWorklist(DestVecBC.getNode());
14002   AddToWorklist(Shuf.getNode());
14003   return DAG.getBitcast(VT, Shuf);
14004 }
14005
14006 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
14007   SDValue InVec = N->getOperand(0);
14008   SDValue InVal = N->getOperand(1);
14009   SDValue EltNo = N->getOperand(2);
14010   SDLoc DL(N);
14011
14012   // If the inserted element is an UNDEF, just use the input vector.
14013   if (InVal.isUndef())
14014     return InVec;
14015
14016   EVT VT = InVec.getValueType();
14017
14018   // Remove redundant insertions:
14019   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
14020   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14021       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
14022     return InVec;
14023
14024   // We must know which element is being inserted for folds below here.
14025   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14026   if (!IndexC)
14027     return SDValue();
14028   unsigned Elt = IndexC->getZExtValue();
14029
14030   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
14031     return Shuf;
14032
14033   // Canonicalize insert_vector_elt dag nodes.
14034   // Example:
14035   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
14036   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
14037   //
14038   // Do this only if the child insert_vector node has one use; also
14039   // do this only if indices are both constants and Idx1 < Idx0.
14040   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
14041       && isa<ConstantSDNode>(InVec.getOperand(2))) {
14042     unsigned OtherElt = InVec.getConstantOperandVal(2);
14043     if (Elt < OtherElt) {
14044       // Swap nodes.
14045       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14046                                   InVec.getOperand(0), InVal, EltNo);
14047       AddToWorklist(NewOp.getNode());
14048       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
14049                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
14050     }
14051   }
14052
14053   // If we can't generate a legal BUILD_VECTOR, exit
14054   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
14055     return SDValue();
14056
14057   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
14058   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
14059   // vector elements.
14060   SmallVector<SDValue, 8> Ops;
14061   // Do not combine these two vectors if the output vector will not replace
14062   // the input vector.
14063   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
14064     Ops.append(InVec.getNode()->op_begin(),
14065                InVec.getNode()->op_end());
14066   } else if (InVec.isUndef()) {
14067     unsigned NElts = VT.getVectorNumElements();
14068     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
14069   } else {
14070     return SDValue();
14071   }
14072
14073   // Insert the element
14074   if (Elt < Ops.size()) {
14075     // All the operands of BUILD_VECTOR must have the same type;
14076     // we enforce that here.
14077     EVT OpVT = Ops[0].getValueType();
14078     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
14079   }
14080
14081   // Return the new vector
14082   return DAG.getBuildVector(VT, DL, Ops);
14083 }
14084
14085 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
14086     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
14087   assert(!OriginalLoad->isVolatile());
14088
14089   EVT ResultVT = EVE->getValueType(0);
14090   EVT VecEltVT = InVecVT.getVectorElementType();
14091   unsigned Align = OriginalLoad->getAlignment();
14092   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
14093       VecEltVT.getTypeForEVT(*DAG.getContext()));
14094
14095   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
14096     return SDValue();
14097
14098   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
14099     ISD::NON_EXTLOAD : ISD::EXTLOAD;
14100   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
14101     return SDValue();
14102
14103   Align = NewAlign;
14104
14105   SDValue NewPtr = OriginalLoad->getBasePtr();
14106   SDValue Offset;
14107   EVT PtrType = NewPtr.getValueType();
14108   MachinePointerInfo MPI;
14109   SDLoc DL(EVE);
14110   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
14111     int Elt = ConstEltNo->getZExtValue();
14112     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
14113     Offset = DAG.getConstant(PtrOff, DL, PtrType);
14114     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
14115   } else {
14116     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
14117     Offset = DAG.getNode(
14118         ISD::MUL, DL, PtrType, Offset,
14119         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
14120     MPI = OriginalLoad->getPointerInfo();
14121   }
14122   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
14123
14124   // The replacement we need to do here is a little tricky: we need to
14125   // replace an extractelement of a load with a load.
14126   // Use ReplaceAllUsesOfValuesWith to do the replacement.
14127   // Note that this replacement assumes that the extractvalue is the only
14128   // use of the load; that's okay because we don't want to perform this
14129   // transformation in other cases anyway.
14130   SDValue Load;
14131   SDValue Chain;
14132   if (ResultVT.bitsGT(VecEltVT)) {
14133     // If the result type of vextract is wider than the load, then issue an
14134     // extending load instead.
14135     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
14136                                                   VecEltVT)
14137                                    ? ISD::ZEXTLOAD
14138                                    : ISD::EXTLOAD;
14139     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
14140                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
14141                           Align, OriginalLoad->getMemOperand()->getFlags(),
14142                           OriginalLoad->getAAInfo());
14143     Chain = Load.getValue(1);
14144   } else {
14145     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
14146                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
14147                        OriginalLoad->getAAInfo());
14148     Chain = Load.getValue(1);
14149     if (ResultVT.bitsLT(VecEltVT))
14150       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
14151     else
14152       Load = DAG.getBitcast(ResultVT, Load);
14153   }
14154   WorklistRemover DeadNodes(*this);
14155   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
14156   SDValue To[] = { Load, Chain };
14157   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
14158   // Since we're explicitly calling ReplaceAllUses, add the new node to the
14159   // worklist explicitly as well.
14160   AddToWorklist(Load.getNode());
14161   AddUsersToWorklist(Load.getNode()); // Add users too
14162   // Make sure to revisit this node to clean it up; it will usually be dead.
14163   AddToWorklist(EVE);
14164   ++OpsNarrowed;
14165   return SDValue(EVE, 0);
14166 }
14167
14168 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
14169   // (vextract (scalar_to_vector val, 0) -> val
14170   SDValue InVec = N->getOperand(0);
14171   EVT VT = InVec.getValueType();
14172   EVT NVT = N->getValueType(0);
14173
14174   if (InVec.isUndef())
14175     return DAG.getUNDEF(NVT);
14176
14177   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14178     // Check if the result type doesn't match the inserted element type. A
14179     // SCALAR_TO_VECTOR may truncate the inserted element and the
14180     // EXTRACT_VECTOR_ELT may widen the extracted vector.
14181     SDValue InOp = InVec.getOperand(0);
14182     if (InOp.getValueType() != NVT) {
14183       assert(InOp.getValueType().isInteger() && NVT.isInteger());
14184       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
14185     }
14186     return InOp;
14187   }
14188
14189   SDValue EltNo = N->getOperand(1);
14190   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
14191
14192   // extract_vector_elt (build_vector x, y), 1 -> y
14193   if (ConstEltNo &&
14194       InVec.getOpcode() == ISD::BUILD_VECTOR &&
14195       TLI.isTypeLegal(VT) &&
14196       (InVec.hasOneUse() ||
14197        TLI.aggressivelyPreferBuildVectorSources(VT))) {
14198     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
14199     EVT InEltVT = Elt.getValueType();
14200
14201     // Sometimes build_vector's scalar input types do not match result type.
14202     if (NVT == InEltVT)
14203       return Elt;
14204
14205     // TODO: It may be useful to truncate if free if the build_vector implicitly
14206     // converts.
14207   }
14208
14209   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
14210   bool isLE = DAG.getDataLayout().isLittleEndian();
14211   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
14212   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
14213       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
14214     SDValue BCSrc = InVec.getOperand(0);
14215     if (BCSrc.getValueType().isScalarInteger())
14216       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
14217   }
14218
14219   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
14220   //
14221   // This only really matters if the index is non-constant since other combines
14222   // on the constant elements already work.
14223   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
14224       EltNo == InVec.getOperand(2)) {
14225     SDValue Elt = InVec.getOperand(1);
14226     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
14227   }
14228
14229   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
14230   // We only perform this optimization before the op legalization phase because
14231   // we may introduce new vector instructions which are not backed by TD
14232   // patterns. For example on AVX, extracting elements from a wide vector
14233   // without using extract_subvector. However, if we can find an underlying
14234   // scalar value, then we can always use that.
14235   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
14236     int NumElem = VT.getVectorNumElements();
14237     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
14238     // Find the new index to extract from.
14239     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
14240
14241     // Extracting an undef index is undef.
14242     if (OrigElt == -1)
14243       return DAG.getUNDEF(NVT);
14244
14245     // Select the right vector half to extract from.
14246     SDValue SVInVec;
14247     if (OrigElt < NumElem) {
14248       SVInVec = InVec->getOperand(0);
14249     } else {
14250       SVInVec = InVec->getOperand(1);
14251       OrigElt -= NumElem;
14252     }
14253
14254     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
14255       SDValue InOp = SVInVec.getOperand(OrigElt);
14256       if (InOp.getValueType() != NVT) {
14257         assert(InOp.getValueType().isInteger() && NVT.isInteger());
14258         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
14259       }
14260
14261       return InOp;
14262     }
14263
14264     // FIXME: We should handle recursing on other vector shuffles and
14265     // scalar_to_vector here as well.
14266
14267     if (!LegalOperations ||
14268         // FIXME: Should really be just isOperationLegalOrCustom.
14269         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
14270         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
14271       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14272       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
14273                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
14274     }
14275   }
14276
14277   bool BCNumEltsChanged = false;
14278   EVT ExtVT = VT.getVectorElementType();
14279   EVT LVT = ExtVT;
14280
14281   // If the result of load has to be truncated, then it's not necessarily
14282   // profitable.
14283   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
14284     return SDValue();
14285
14286   if (InVec.getOpcode() == ISD::BITCAST) {
14287     // Don't duplicate a load with other uses.
14288     if (!InVec.hasOneUse())
14289       return SDValue();
14290
14291     EVT BCVT = InVec.getOperand(0).getValueType();
14292     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
14293       return SDValue();
14294     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
14295       BCNumEltsChanged = true;
14296     InVec = InVec.getOperand(0);
14297     ExtVT = BCVT.getVectorElementType();
14298   }
14299
14300   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
14301   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
14302       ISD::isNormalLoad(InVec.getNode()) &&
14303       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
14304     SDValue Index = N->getOperand(1);
14305     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
14306       if (!OrigLoad->isVolatile()) {
14307         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
14308                                                              OrigLoad);
14309       }
14310     }
14311   }
14312
14313   // Perform only after legalization to ensure build_vector / vector_shuffle
14314   // optimizations have already been done.
14315   if (!LegalOperations) return SDValue();
14316
14317   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
14318   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
14319   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
14320
14321   if (ConstEltNo) {
14322     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
14323
14324     LoadSDNode *LN0 = nullptr;
14325     const ShuffleVectorSDNode *SVN = nullptr;
14326     if (ISD::isNormalLoad(InVec.getNode())) {
14327       LN0 = cast<LoadSDNode>(InVec);
14328     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14329                InVec.getOperand(0).getValueType() == ExtVT &&
14330                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
14331       // Don't duplicate a load with other uses.
14332       if (!InVec.hasOneUse())
14333         return SDValue();
14334
14335       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
14336     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
14337       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
14338       // =>
14339       // (load $addr+1*size)
14340
14341       // Don't duplicate a load with other uses.
14342       if (!InVec.hasOneUse())
14343         return SDValue();
14344
14345       // If the bit convert changed the number of elements, it is unsafe
14346       // to examine the mask.
14347       if (BCNumEltsChanged)
14348         return SDValue();
14349
14350       // Select the input vector, guarding against out of range extract vector.
14351       unsigned NumElems = VT.getVectorNumElements();
14352       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
14353       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
14354
14355       if (InVec.getOpcode() == ISD::BITCAST) {
14356         // Don't duplicate a load with other uses.
14357         if (!InVec.hasOneUse())
14358           return SDValue();
14359
14360         InVec = InVec.getOperand(0);
14361       }
14362       if (ISD::isNormalLoad(InVec.getNode())) {
14363         LN0 = cast<LoadSDNode>(InVec);
14364         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
14365         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
14366       }
14367     }
14368
14369     // Make sure we found a non-volatile load and the extractelement is
14370     // the only use.
14371     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
14372       return SDValue();
14373
14374     // If Idx was -1 above, Elt is going to be -1, so just return undef.
14375     if (Elt == -1)
14376       return DAG.getUNDEF(LVT);
14377
14378     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
14379   }
14380
14381   return SDValue();
14382 }
14383
14384 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
14385 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
14386   // We perform this optimization post type-legalization because
14387   // the type-legalizer often scalarizes integer-promoted vectors.
14388   // Performing this optimization before may create bit-casts which
14389   // will be type-legalized to complex code sequences.
14390   // We perform this optimization only before the operation legalizer because we
14391   // may introduce illegal operations.
14392   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
14393     return SDValue();
14394
14395   unsigned NumInScalars = N->getNumOperands();
14396   SDLoc DL(N);
14397   EVT VT = N->getValueType(0);
14398
14399   // Check to see if this is a BUILD_VECTOR of a bunch of values
14400   // which come from any_extend or zero_extend nodes. If so, we can create
14401   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
14402   // optimizations. We do not handle sign-extend because we can't fill the sign
14403   // using shuffles.
14404   EVT SourceType = MVT::Other;
14405   bool AllAnyExt = true;
14406
14407   for (unsigned i = 0; i != NumInScalars; ++i) {
14408     SDValue In = N->getOperand(i);
14409     // Ignore undef inputs.
14410     if (In.isUndef()) continue;
14411
14412     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
14413     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
14414
14415     // Abort if the element is not an extension.
14416     if (!ZeroExt && !AnyExt) {
14417       SourceType = MVT::Other;
14418       break;
14419     }
14420
14421     // The input is a ZeroExt or AnyExt. Check the original type.
14422     EVT InTy = In.getOperand(0).getValueType();
14423
14424     // Check that all of the widened source types are the same.
14425     if (SourceType == MVT::Other)
14426       // First time.
14427       SourceType = InTy;
14428     else if (InTy != SourceType) {
14429       // Multiple income types. Abort.
14430       SourceType = MVT::Other;
14431       break;
14432     }
14433
14434     // Check if all of the extends are ANY_EXTENDs.
14435     AllAnyExt &= AnyExt;
14436   }
14437
14438   // In order to have valid types, all of the inputs must be extended from the
14439   // same source type and all of the inputs must be any or zero extend.
14440   // Scalar sizes must be a power of two.
14441   EVT OutScalarTy = VT.getScalarType();
14442   bool ValidTypes = SourceType != MVT::Other &&
14443                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
14444                  isPowerOf2_32(SourceType.getSizeInBits());
14445
14446   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
14447   // turn into a single shuffle instruction.
14448   if (!ValidTypes)
14449     return SDValue();
14450
14451   bool isLE = DAG.getDataLayout().isLittleEndian();
14452   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
14453   assert(ElemRatio > 1 && "Invalid element size ratio");
14454   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
14455                                DAG.getConstant(0, DL, SourceType);
14456
14457   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
14458   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
14459
14460   // Populate the new build_vector
14461   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14462     SDValue Cast = N->getOperand(i);
14463     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
14464             Cast.getOpcode() == ISD::ZERO_EXTEND ||
14465             Cast.isUndef()) && "Invalid cast opcode");
14466     SDValue In;
14467     if (Cast.isUndef())
14468       In = DAG.getUNDEF(SourceType);
14469     else
14470       In = Cast->getOperand(0);
14471     unsigned Index = isLE ? (i * ElemRatio) :
14472                             (i * ElemRatio + (ElemRatio - 1));
14473
14474     assert(Index < Ops.size() && "Invalid index");
14475     Ops[Index] = In;
14476   }
14477
14478   // The type of the new BUILD_VECTOR node.
14479   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
14480   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
14481          "Invalid vector size");
14482   // Check if the new vector type is legal.
14483   if (!isTypeLegal(VecVT)) return SDValue();
14484
14485   // Make the new BUILD_VECTOR.
14486   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
14487
14488   // The new BUILD_VECTOR node has the potential to be further optimized.
14489   AddToWorklist(BV.getNode());
14490   // Bitcast to the desired type.
14491   return DAG.getBitcast(VT, BV);
14492 }
14493
14494 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
14495   EVT VT = N->getValueType(0);
14496
14497   unsigned NumInScalars = N->getNumOperands();
14498   SDLoc DL(N);
14499
14500   EVT SrcVT = MVT::Other;
14501   unsigned Opcode = ISD::DELETED_NODE;
14502   unsigned NumDefs = 0;
14503
14504   for (unsigned i = 0; i != NumInScalars; ++i) {
14505     SDValue In = N->getOperand(i);
14506     unsigned Opc = In.getOpcode();
14507
14508     if (Opc == ISD::UNDEF)
14509       continue;
14510
14511     // If all scalar values are floats and converted from integers.
14512     if (Opcode == ISD::DELETED_NODE &&
14513         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
14514       Opcode = Opc;
14515     }
14516
14517     if (Opc != Opcode)
14518       return SDValue();
14519
14520     EVT InVT = In.getOperand(0).getValueType();
14521
14522     // If all scalar values are typed differently, bail out. It's chosen to
14523     // simplify BUILD_VECTOR of integer types.
14524     if (SrcVT == MVT::Other)
14525       SrcVT = InVT;
14526     if (SrcVT != InVT)
14527       return SDValue();
14528     NumDefs++;
14529   }
14530
14531   // If the vector has just one element defined, it's not worth to fold it into
14532   // a vectorized one.
14533   if (NumDefs < 2)
14534     return SDValue();
14535
14536   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
14537          && "Should only handle conversion from integer to float.");
14538   assert(SrcVT != MVT::Other && "Cannot determine source type!");
14539
14540   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
14541
14542   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
14543     return SDValue();
14544
14545   // Just because the floating-point vector type is legal does not necessarily
14546   // mean that the corresponding integer vector type is.
14547   if (!isTypeLegal(NVT))
14548     return SDValue();
14549
14550   SmallVector<SDValue, 8> Opnds;
14551   for (unsigned i = 0; i != NumInScalars; ++i) {
14552     SDValue In = N->getOperand(i);
14553
14554     if (In.isUndef())
14555       Opnds.push_back(DAG.getUNDEF(SrcVT));
14556     else
14557       Opnds.push_back(In.getOperand(0));
14558   }
14559   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
14560   AddToWorklist(BV.getNode());
14561
14562   return DAG.getNode(Opcode, DL, VT, BV);
14563 }
14564
14565 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
14566                                            ArrayRef<int> VectorMask,
14567                                            SDValue VecIn1, SDValue VecIn2,
14568                                            unsigned LeftIdx) {
14569   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14570   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
14571
14572   EVT VT = N->getValueType(0);
14573   EVT InVT1 = VecIn1.getValueType();
14574   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
14575
14576   unsigned Vec2Offset = 0;
14577   unsigned NumElems = VT.getVectorNumElements();
14578   unsigned ShuffleNumElems = NumElems;
14579
14580   // In case both the input vectors are extracted from same base
14581   // vector we do not need extra addend (Vec2Offset) while
14582   // computing shuffle mask.
14583   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14584       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14585       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
14586     Vec2Offset = InVT1.getVectorNumElements();
14587
14588   // We can't generate a shuffle node with mismatched input and output types.
14589   // Try to make the types match the type of the output.
14590   if (InVT1 != VT || InVT2 != VT) {
14591     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
14592       // If the output vector length is a multiple of both input lengths,
14593       // we can concatenate them and pad the rest with undefs.
14594       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14595       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
14596       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14597       ConcatOps[0] = VecIn1;
14598       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
14599       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14600       VecIn2 = SDValue();
14601     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14602       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
14603         return SDValue();
14604
14605       if (!VecIn2.getNode()) {
14606         // If we only have one input vector, and it's twice the size of the
14607         // output, split it in two.
14608         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14609                              DAG.getConstant(NumElems, DL, IdxTy));
14610         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14611         // Since we now have shorter input vectors, adjust the offset of the
14612         // second vector's start.
14613         Vec2Offset = NumElems;
14614       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14615         // VecIn1 is wider than the output, and we have another, possibly
14616         // smaller input. Pad the smaller input with undefs, shuffle at the
14617         // input vector width, and extract the output.
14618         // The shuffle type is different than VT, so check legality again.
14619         if (LegalOperations &&
14620             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14621           return SDValue();
14622
14623         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14624         // lower it back into a BUILD_VECTOR. So if the inserted type is
14625         // illegal, don't even try.
14626         if (InVT1 != InVT2) {
14627           if (!TLI.isTypeLegal(InVT2))
14628             return SDValue();
14629           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14630                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14631         }
14632         ShuffleNumElems = NumElems * 2;
14633       } else {
14634         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14635         // than VecIn1. We can't handle this for now - this case will disappear
14636         // when we start sorting the vectors by type.
14637         return SDValue();
14638       }
14639     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14640                InVT1.getSizeInBits() == VT.getSizeInBits()) {
14641       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14642       ConcatOps[0] = VecIn2;
14643       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14644     } else {
14645       // TODO: Support cases where the length mismatch isn't exactly by a
14646       // factor of 2.
14647       // TODO: Move this check upwards, so that if we have bad type
14648       // mismatches, we don't create any DAG nodes.
14649       return SDValue();
14650     }
14651   }
14652
14653   // Initialize mask to undef.
14654   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14655
14656   // Only need to run up to the number of elements actually used, not the
14657   // total number of elements in the shuffle - if we are shuffling a wider
14658   // vector, the high lanes should be set to undef.
14659   for (unsigned i = 0; i != NumElems; ++i) {
14660     if (VectorMask[i] <= 0)
14661       continue;
14662
14663     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14664     if (VectorMask[i] == (int)LeftIdx) {
14665       Mask[i] = ExtIndex;
14666     } else if (VectorMask[i] == (int)LeftIdx + 1) {
14667       Mask[i] = Vec2Offset + ExtIndex;
14668     }
14669   }
14670
14671   // The type the input vectors may have changed above.
14672   InVT1 = VecIn1.getValueType();
14673
14674   // If we already have a VecIn2, it should have the same type as VecIn1.
14675   // If we don't, get an undef/zero vector of the appropriate type.
14676   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14677   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14678
14679   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14680   if (ShuffleNumElems > NumElems)
14681     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14682
14683   return Shuffle;
14684 }
14685
14686 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14687 // operations. If the types of the vectors we're extracting from allow it,
14688 // turn this into a vector_shuffle node.
14689 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14690   SDLoc DL(N);
14691   EVT VT = N->getValueType(0);
14692
14693   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14694   if (!isTypeLegal(VT))
14695     return SDValue();
14696
14697   // May only combine to shuffle after legalize if shuffle is legal.
14698   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14699     return SDValue();
14700
14701   bool UsesZeroVector = false;
14702   unsigned NumElems = N->getNumOperands();
14703
14704   // Record, for each element of the newly built vector, which input vector
14705   // that element comes from. -1 stands for undef, 0 for the zero vector,
14706   // and positive values for the input vectors.
14707   // VectorMask maps each element to its vector number, and VecIn maps vector
14708   // numbers to their initial SDValues.
14709
14710   SmallVector<int, 8> VectorMask(NumElems, -1);
14711   SmallVector<SDValue, 8> VecIn;
14712   VecIn.push_back(SDValue());
14713
14714   for (unsigned i = 0; i != NumElems; ++i) {
14715     SDValue Op = N->getOperand(i);
14716
14717     if (Op.isUndef())
14718       continue;
14719
14720     // See if we can use a blend with a zero vector.
14721     // TODO: Should we generalize this to a blend with an arbitrary constant
14722     // vector?
14723     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14724       UsesZeroVector = true;
14725       VectorMask[i] = 0;
14726       continue;
14727     }
14728
14729     // Not an undef or zero. If the input is something other than an
14730     // EXTRACT_VECTOR_ELT with a constant index, bail out.
14731     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14732         !isa<ConstantSDNode>(Op.getOperand(1)))
14733       return SDValue();
14734     SDValue ExtractedFromVec = Op.getOperand(0);
14735
14736     // All inputs must have the same element type as the output.
14737     if (VT.getVectorElementType() !=
14738         ExtractedFromVec.getValueType().getVectorElementType())
14739       return SDValue();
14740
14741     // Have we seen this input vector before?
14742     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14743     // a map back from SDValues to numbers isn't worth it.
14744     unsigned Idx = std::distance(
14745         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14746     if (Idx == VecIn.size())
14747       VecIn.push_back(ExtractedFromVec);
14748
14749     VectorMask[i] = Idx;
14750   }
14751
14752   // If we didn't find at least one input vector, bail out.
14753   if (VecIn.size() < 2)
14754     return SDValue();
14755
14756   // If all the Operands of BUILD_VECTOR extract from same
14757   // vector, then split the vector efficiently based on the maximum
14758   // vector access index and adjust the VectorMask and
14759   // VecIn accordingly.
14760   if (VecIn.size() == 2) {
14761     unsigned MaxIndex = 0;
14762     unsigned NearestPow2 = 0;
14763     SDValue Vec = VecIn.back();
14764     EVT InVT = Vec.getValueType();
14765     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14766     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
14767
14768     for (unsigned i = 0; i < NumElems; i++) {
14769       if (VectorMask[i] <= 0)
14770         continue;
14771       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
14772       IndexVec[i] = Index;
14773       MaxIndex = std::max(MaxIndex, Index);
14774     }
14775
14776     NearestPow2 = PowerOf2Ceil(MaxIndex);
14777     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
14778         NumElems * 2 < NearestPow2) {
14779       unsigned SplitSize = NearestPow2 / 2;
14780       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
14781                                      InVT.getVectorElementType(), SplitSize);
14782       if (TLI.isTypeLegal(SplitVT)) {
14783         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14784                                      DAG.getConstant(SplitSize, DL, IdxTy));
14785         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14786                                      DAG.getConstant(0, DL, IdxTy));
14787         VecIn.pop_back();
14788         VecIn.push_back(VecIn1);
14789         VecIn.push_back(VecIn2);
14790
14791         for (unsigned i = 0; i < NumElems; i++) {
14792           if (VectorMask[i] <= 0)
14793             continue;
14794           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
14795         }
14796       }
14797     }
14798   }
14799
14800   // TODO: We want to sort the vectors by descending length, so that adjacent
14801   // pairs have similar length, and the longer vector is always first in the
14802   // pair.
14803
14804   // TODO: Should this fire if some of the input vectors has illegal type (like
14805   // it does now), or should we let legalization run its course first?
14806
14807   // Shuffle phase:
14808   // Take pairs of vectors, and shuffle them so that the result has elements
14809   // from these vectors in the correct places.
14810   // For example, given:
14811   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14812   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14813   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14814   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14815   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14816   // We will generate:
14817   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14818   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14819   SmallVector<SDValue, 4> Shuffles;
14820   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14821     unsigned LeftIdx = 2 * In + 1;
14822     SDValue VecLeft = VecIn[LeftIdx];
14823     SDValue VecRight =
14824         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14825
14826     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14827                                                 VecRight, LeftIdx))
14828       Shuffles.push_back(Shuffle);
14829     else
14830       return SDValue();
14831   }
14832
14833   // If we need the zero vector as an "ingredient" in the blend tree, add it
14834   // to the list of shuffles.
14835   if (UsesZeroVector)
14836     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14837                                       : DAG.getConstantFP(0.0, DL, VT));
14838
14839   // If we only have one shuffle, we're done.
14840   if (Shuffles.size() == 1)
14841     return Shuffles[0];
14842
14843   // Update the vector mask to point to the post-shuffle vectors.
14844   for (int &Vec : VectorMask)
14845     if (Vec == 0)
14846       Vec = Shuffles.size() - 1;
14847     else
14848       Vec = (Vec - 1) / 2;
14849
14850   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14851   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14852   // generate:
14853   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14854   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14855   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14856   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14857   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14858   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14859   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14860
14861   // Make sure the initial size of the shuffle list is even.
14862   if (Shuffles.size() % 2)
14863     Shuffles.push_back(DAG.getUNDEF(VT));
14864
14865   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14866     if (CurSize % 2) {
14867       Shuffles[CurSize] = DAG.getUNDEF(VT);
14868       CurSize++;
14869     }
14870     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14871       int Left = 2 * In;
14872       int Right = 2 * In + 1;
14873       SmallVector<int, 8> Mask(NumElems, -1);
14874       for (unsigned i = 0; i != NumElems; ++i) {
14875         if (VectorMask[i] == Left) {
14876           Mask[i] = i;
14877           VectorMask[i] = In;
14878         } else if (VectorMask[i] == Right) {
14879           Mask[i] = i + NumElems;
14880           VectorMask[i] = In;
14881         }
14882       }
14883
14884       Shuffles[In] =
14885           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14886     }
14887   }
14888   return Shuffles[0];
14889 }
14890
14891 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14892   EVT VT = N->getValueType(0);
14893
14894   // A vector built entirely of undefs is undef.
14895   if (ISD::allOperandsUndef(N))
14896     return DAG.getUNDEF(VT);
14897
14898   // Check if we can express BUILD VECTOR via subvector extract.
14899   if (!LegalTypes && (N->getNumOperands() > 1)) {
14900     SDValue Op0 = N->getOperand(0);
14901     auto checkElem = [&](SDValue Op) -> uint64_t {
14902       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14903           (Op0.getOperand(0) == Op.getOperand(0)))
14904         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14905           return CNode->getZExtValue();
14906       return -1;
14907     };
14908
14909     int Offset = checkElem(Op0);
14910     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14911       if (Offset + i != checkElem(N->getOperand(i))) {
14912         Offset = -1;
14913         break;
14914       }
14915     }
14916
14917     if ((Offset == 0) &&
14918         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
14919       return Op0.getOperand(0);
14920     if ((Offset != -1) &&
14921         ((Offset % N->getValueType(0).getVectorNumElements()) ==
14922          0)) // IDX must be multiple of output size.
14923       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
14924                          Op0.getOperand(0), Op0.getOperand(1));
14925   }
14926
14927   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
14928     return V;
14929
14930   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
14931     return V;
14932
14933   if (SDValue V = reduceBuildVecToShuffle(N))
14934     return V;
14935
14936   return SDValue();
14937 }
14938
14939 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
14940   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14941   EVT OpVT = N->getOperand(0).getValueType();
14942
14943   // If the operands are legal vectors, leave them alone.
14944   if (TLI.isTypeLegal(OpVT))
14945     return SDValue();
14946
14947   SDLoc DL(N);
14948   EVT VT = N->getValueType(0);
14949   SmallVector<SDValue, 8> Ops;
14950
14951   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
14952   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14953
14954   // Keep track of what we encounter.
14955   bool AnyInteger = false;
14956   bool AnyFP = false;
14957   for (const SDValue &Op : N->ops()) {
14958     if (ISD::BITCAST == Op.getOpcode() &&
14959         !Op.getOperand(0).getValueType().isVector())
14960       Ops.push_back(Op.getOperand(0));
14961     else if (ISD::UNDEF == Op.getOpcode())
14962       Ops.push_back(ScalarUndef);
14963     else
14964       return SDValue();
14965
14966     // Note whether we encounter an integer or floating point scalar.
14967     // If it's neither, bail out, it could be something weird like x86mmx.
14968     EVT LastOpVT = Ops.back().getValueType();
14969     if (LastOpVT.isFloatingPoint())
14970       AnyFP = true;
14971     else if (LastOpVT.isInteger())
14972       AnyInteger = true;
14973     else
14974       return SDValue();
14975   }
14976
14977   // If any of the operands is a floating point scalar bitcast to a vector,
14978   // use floating point types throughout, and bitcast everything.
14979   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
14980   if (AnyFP) {
14981     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
14982     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
14983     if (AnyInteger) {
14984       for (SDValue &Op : Ops) {
14985         if (Op.getValueType() == SVT)
14986           continue;
14987         if (Op.isUndef())
14988           Op = ScalarUndef;
14989         else
14990           Op = DAG.getBitcast(SVT, Op);
14991       }
14992     }
14993   }
14994
14995   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
14996                                VT.getSizeInBits() / SVT.getSizeInBits());
14997   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
14998 }
14999
15000 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
15001 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
15002 // most two distinct vectors the same size as the result, attempt to turn this
15003 // into a legal shuffle.
15004 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
15005   EVT VT = N->getValueType(0);
15006   EVT OpVT = N->getOperand(0).getValueType();
15007   int NumElts = VT.getVectorNumElements();
15008   int NumOpElts = OpVT.getVectorNumElements();
15009
15010   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
15011   SmallVector<int, 8> Mask;
15012
15013   for (SDValue Op : N->ops()) {
15014     // Peek through any bitcast.
15015     Op = peekThroughBitcast(Op);
15016
15017     // UNDEF nodes convert to UNDEF shuffle mask values.
15018     if (Op.isUndef()) {
15019       Mask.append((unsigned)NumOpElts, -1);
15020       continue;
15021     }
15022
15023     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15024       return SDValue();
15025
15026     // What vector are we extracting the subvector from and at what index?
15027     SDValue ExtVec = Op.getOperand(0);
15028
15029     // We want the EVT of the original extraction to correctly scale the
15030     // extraction index.
15031     EVT ExtVT = ExtVec.getValueType();
15032
15033     // Peek through any bitcast.
15034     ExtVec = peekThroughBitcast(ExtVec);
15035
15036     // UNDEF nodes convert to UNDEF shuffle mask values.
15037     if (ExtVec.isUndef()) {
15038       Mask.append((unsigned)NumOpElts, -1);
15039       continue;
15040     }
15041
15042     if (!isa<ConstantSDNode>(Op.getOperand(1)))
15043       return SDValue();
15044     int ExtIdx = Op.getConstantOperandVal(1);
15045
15046     // Ensure that we are extracting a subvector from a vector the same
15047     // size as the result.
15048     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
15049       return SDValue();
15050
15051     // Scale the subvector index to account for any bitcast.
15052     int NumExtElts = ExtVT.getVectorNumElements();
15053     if (0 == (NumExtElts % NumElts))
15054       ExtIdx /= (NumExtElts / NumElts);
15055     else if (0 == (NumElts % NumExtElts))
15056       ExtIdx *= (NumElts / NumExtElts);
15057     else
15058       return SDValue();
15059
15060     // At most we can reference 2 inputs in the final shuffle.
15061     if (SV0.isUndef() || SV0 == ExtVec) {
15062       SV0 = ExtVec;
15063       for (int i = 0; i != NumOpElts; ++i)
15064         Mask.push_back(i + ExtIdx);
15065     } else if (SV1.isUndef() || SV1 == ExtVec) {
15066       SV1 = ExtVec;
15067       for (int i = 0; i != NumOpElts; ++i)
15068         Mask.push_back(i + ExtIdx + NumElts);
15069     } else {
15070       return SDValue();
15071     }
15072   }
15073
15074   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
15075     return SDValue();
15076
15077   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
15078                               DAG.getBitcast(VT, SV1), Mask);
15079 }
15080
15081 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
15082   // If we only have one input vector, we don't need to do any concatenation.
15083   if (N->getNumOperands() == 1)
15084     return N->getOperand(0);
15085
15086   // Check if all of the operands are undefs.
15087   EVT VT = N->getValueType(0);
15088   if (ISD::allOperandsUndef(N))
15089     return DAG.getUNDEF(VT);
15090
15091   // Optimize concat_vectors where all but the first of the vectors are undef.
15092   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
15093         return Op.isUndef();
15094       })) {
15095     SDValue In = N->getOperand(0);
15096     assert(In.getValueType().isVector() && "Must concat vectors");
15097
15098     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
15099     if (In->getOpcode() == ISD::BITCAST &&
15100         !In->getOperand(0).getValueType().isVector()) {
15101       SDValue Scalar = In->getOperand(0);
15102
15103       // If the bitcast type isn't legal, it might be a trunc of a legal type;
15104       // look through the trunc so we can still do the transform:
15105       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
15106       if (Scalar->getOpcode() == ISD::TRUNCATE &&
15107           !TLI.isTypeLegal(Scalar.getValueType()) &&
15108           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
15109         Scalar = Scalar->getOperand(0);
15110
15111       EVT SclTy = Scalar->getValueType(0);
15112
15113       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
15114         return SDValue();
15115
15116       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
15117       if (VNTNumElms < 2)
15118         return SDValue();
15119
15120       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
15121       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
15122         return SDValue();
15123
15124       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
15125       return DAG.getBitcast(VT, Res);
15126     }
15127   }
15128
15129   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
15130   // We have already tested above for an UNDEF only concatenation.
15131   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
15132   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
15133   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
15134     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
15135   };
15136   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
15137     SmallVector<SDValue, 8> Opnds;
15138     EVT SVT = VT.getScalarType();
15139
15140     EVT MinVT = SVT;
15141     if (!SVT.isFloatingPoint()) {
15142       // If BUILD_VECTOR are from built from integer, they may have different
15143       // operand types. Get the smallest type and truncate all operands to it.
15144       bool FoundMinVT = false;
15145       for (const SDValue &Op : N->ops())
15146         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15147           EVT OpSVT = Op.getOperand(0).getValueType();
15148           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
15149           FoundMinVT = true;
15150         }
15151       assert(FoundMinVT && "Concat vector type mismatch");
15152     }
15153
15154     for (const SDValue &Op : N->ops()) {
15155       EVT OpVT = Op.getValueType();
15156       unsigned NumElts = OpVT.getVectorNumElements();
15157
15158       if (ISD::UNDEF == Op.getOpcode())
15159         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
15160
15161       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15162         if (SVT.isFloatingPoint()) {
15163           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
15164           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
15165         } else {
15166           for (unsigned i = 0; i != NumElts; ++i)
15167             Opnds.push_back(
15168                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
15169         }
15170       }
15171     }
15172
15173     assert(VT.getVectorNumElements() == Opnds.size() &&
15174            "Concat vector type mismatch");
15175     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
15176   }
15177
15178   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
15179   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
15180     return V;
15181
15182   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
15183   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15184     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
15185       return V;
15186
15187   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
15188   // nodes often generate nop CONCAT_VECTOR nodes.
15189   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
15190   // place the incoming vectors at the exact same location.
15191   SDValue SingleSource = SDValue();
15192   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
15193
15194   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15195     SDValue Op = N->getOperand(i);
15196
15197     if (Op.isUndef())
15198       continue;
15199
15200     // Check if this is the identity extract:
15201     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15202       return SDValue();
15203
15204     // Find the single incoming vector for the extract_subvector.
15205     if (SingleSource.getNode()) {
15206       if (Op.getOperand(0) != SingleSource)
15207         return SDValue();
15208     } else {
15209       SingleSource = Op.getOperand(0);
15210
15211       // Check the source type is the same as the type of the result.
15212       // If not, this concat may extend the vector, so we can not
15213       // optimize it away.
15214       if (SingleSource.getValueType() != N->getValueType(0))
15215         return SDValue();
15216     }
15217
15218     unsigned IdentityIndex = i * PartNumElem;
15219     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15220     // The extract index must be constant.
15221     if (!CS)
15222       return SDValue();
15223
15224     // Check that we are reading from the identity index.
15225     if (CS->getZExtValue() != IdentityIndex)
15226       return SDValue();
15227   }
15228
15229   if (SingleSource.getNode())
15230     return SingleSource;
15231
15232   return SDValue();
15233 }
15234
15235 /// If we are extracting a subvector produced by a wide binary operator with at
15236 /// at least one operand that was the result of a vector concatenation, then try
15237 /// to use the narrow vector operands directly to avoid the concatenation and
15238 /// extraction.
15239 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
15240   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
15241   // some of these bailouts with other transforms.
15242
15243   // The extract index must be a constant, so we can map it to a concat operand.
15244   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15245   if (!ExtractIndex)
15246     return SDValue();
15247
15248   // Only handle the case where we are doubling and then halving. A larger ratio
15249   // may require more than two narrow binops to replace the wide binop.
15250   EVT VT = Extract->getValueType(0);
15251   unsigned NumElems = VT.getVectorNumElements();
15252   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
15253          "Extract index is not a multiple of the vector length.");
15254   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
15255     return SDValue();
15256
15257   // We are looking for an optionally bitcasted wide vector binary operator
15258   // feeding an extract subvector.
15259   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
15260
15261   // TODO: The motivating case for this transform is an x86 AVX1 target. That
15262   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
15263   // flavors, but no other 256-bit integer support. This could be extended to
15264   // handle any binop, but that may require fixing/adding other folds to avoid
15265   // codegen regressions.
15266   unsigned BOpcode = BinOp.getOpcode();
15267   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
15268     return SDValue();
15269
15270   // The binop must be a vector type, so we can chop it in half.
15271   EVT WideBVT = BinOp.getValueType();
15272   if (!WideBVT.isVector())
15273     return SDValue();
15274
15275   // Bail out if the target does not support a narrower version of the binop.
15276   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
15277                                    WideBVT.getVectorNumElements() / 2);
15278   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15279   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
15280     return SDValue();
15281
15282   // Peek through bitcasts of the binary operator operands if needed.
15283   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
15284   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
15285
15286   // We need at least one concatenation operation of a binop operand to make
15287   // this transform worthwhile. The concat must double the input vector sizes.
15288   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
15289   bool ConcatL =
15290       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
15291   bool ConcatR =
15292       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
15293   if (!ConcatL && !ConcatR)
15294     return SDValue();
15295
15296   // If one of the binop operands was not the result of a concat, we must
15297   // extract a half-sized operand for our new narrow binop. We can't just reuse
15298   // the original extract index operand because we may have bitcasted.
15299   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
15300   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
15301   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
15302   SDLoc DL(Extract);
15303
15304   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
15305   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
15306   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
15307   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
15308                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15309                                     BinOp.getOperand(0),
15310                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15311
15312   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
15313                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15314                                     BinOp.getOperand(1),
15315                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15316
15317   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
15318   return DAG.getBitcast(VT, NarrowBinOp);
15319 }
15320
15321 /// If we are extracting a subvector from a wide vector load, convert to a
15322 /// narrow load to eliminate the extraction:
15323 /// (extract_subvector (load wide vector)) --> (load narrow vector)
15324 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
15325   // TODO: Add support for big-endian. The offset calculation must be adjusted.
15326   if (DAG.getDataLayout().isBigEndian())
15327     return SDValue();
15328
15329   // TODO: The one-use check is overly conservative. Check the cost of the
15330   // extract instead or remove that condition entirely.
15331   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
15332   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15333   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
15334       !ExtIdx)
15335     return SDValue();
15336
15337   // The narrow load will be offset from the base address of the old load if
15338   // we are extracting from something besides index 0 (little-endian).
15339   EVT VT = Extract->getValueType(0);
15340   SDLoc DL(Extract);
15341   SDValue BaseAddr = Ld->getOperand(1);
15342   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
15343
15344   // TODO: Use "BaseIndexOffset" to make this more effective.
15345   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
15346   MachineFunction &MF = DAG.getMachineFunction();
15347   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
15348                                                    VT.getStoreSize());
15349   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
15350   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
15351   return NewLd;
15352 }
15353
15354 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
15355   EVT NVT = N->getValueType(0);
15356   SDValue V = N->getOperand(0);
15357
15358   // Extract from UNDEF is UNDEF.
15359   if (V.isUndef())
15360     return DAG.getUNDEF(NVT);
15361
15362   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
15363     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
15364       return NarrowLoad;
15365
15366   // Combine:
15367   //    (extract_subvec (concat V1, V2, ...), i)
15368   // Into:
15369   //    Vi if possible
15370   // Only operand 0 is checked as 'concat' assumes all inputs of the same
15371   // type.
15372   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
15373       isa<ConstantSDNode>(N->getOperand(1)) &&
15374       V->getOperand(0).getValueType() == NVT) {
15375     unsigned Idx = N->getConstantOperandVal(1);
15376     unsigned NumElems = NVT.getVectorNumElements();
15377     assert((Idx % NumElems) == 0 &&
15378            "IDX in concat is not a multiple of the result vector length.");
15379     return V->getOperand(Idx / NumElems);
15380   }
15381
15382   // Skip bitcasting
15383   V = peekThroughBitcast(V);
15384
15385   // If the input is a build vector. Try to make a smaller build vector.
15386   if (V->getOpcode() == ISD::BUILD_VECTOR) {
15387     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
15388       EVT InVT = V->getValueType(0);
15389       unsigned ExtractSize = NVT.getSizeInBits();
15390       unsigned EltSize = InVT.getScalarSizeInBits();
15391       // Only do this if we won't split any elements.
15392       if (ExtractSize % EltSize == 0) {
15393         unsigned NumElems = ExtractSize / EltSize;
15394         EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
15395                                          InVT.getVectorElementType(), NumElems);
15396         if ((!LegalOperations ||
15397              TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
15398             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
15399           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
15400                             EltSize;
15401
15402           // Extract the pieces from the original build_vector.
15403           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
15404                                             makeArrayRef(V->op_begin() + IdxVal,
15405                                                          NumElems));
15406           return DAG.getBitcast(NVT, BuildVec);
15407         }
15408       }
15409     }
15410   }
15411
15412   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
15413     // Handle only simple case where vector being inserted and vector
15414     // being extracted are of same size.
15415     EVT SmallVT = V->getOperand(1).getValueType();
15416     if (!NVT.bitsEq(SmallVT))
15417       return SDValue();
15418
15419     // Only handle cases where both indexes are constants.
15420     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
15421     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
15422
15423     if (InsIdx && ExtIdx) {
15424       // Combine:
15425       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
15426       // Into:
15427       //    indices are equal or bit offsets are equal => V1
15428       //    otherwise => (extract_subvec V1, ExtIdx)
15429       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
15430           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
15431         return DAG.getBitcast(NVT, V->getOperand(1));
15432       return DAG.getNode(
15433           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
15434           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
15435           N->getOperand(1));
15436     }
15437   }
15438
15439   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
15440     return NarrowBOp;
15441
15442   return SDValue();
15443 }
15444
15445 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
15446                                                  SDValue V, SelectionDAG &DAG) {
15447   SDLoc DL(V);
15448   EVT VT = V.getValueType();
15449
15450   switch (V.getOpcode()) {
15451   default:
15452     return V;
15453
15454   case ISD::CONCAT_VECTORS: {
15455     EVT OpVT = V->getOperand(0).getValueType();
15456     int OpSize = OpVT.getVectorNumElements();
15457     SmallBitVector OpUsedElements(OpSize, false);
15458     bool FoundSimplification = false;
15459     SmallVector<SDValue, 4> NewOps;
15460     NewOps.reserve(V->getNumOperands());
15461     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
15462       SDValue Op = V->getOperand(i);
15463       bool OpUsed = false;
15464       for (int j = 0; j < OpSize; ++j)
15465         if (UsedElements[i * OpSize + j]) {
15466           OpUsedElements[j] = true;
15467           OpUsed = true;
15468         }
15469       NewOps.push_back(
15470           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
15471                  : DAG.getUNDEF(OpVT));
15472       FoundSimplification |= Op == NewOps.back();
15473       OpUsedElements.reset();
15474     }
15475     if (FoundSimplification)
15476       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
15477     return V;
15478   }
15479
15480   case ISD::INSERT_SUBVECTOR: {
15481     SDValue BaseV = V->getOperand(0);
15482     SDValue SubV = V->getOperand(1);
15483     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
15484     if (!IdxN)
15485       return V;
15486
15487     int SubSize = SubV.getValueType().getVectorNumElements();
15488     int Idx = IdxN->getZExtValue();
15489     bool SubVectorUsed = false;
15490     SmallBitVector SubUsedElements(SubSize, false);
15491     for (int i = 0; i < SubSize; ++i)
15492       if (UsedElements[i + Idx]) {
15493         SubVectorUsed = true;
15494         SubUsedElements[i] = true;
15495         UsedElements[i + Idx] = false;
15496       }
15497
15498     // Now recurse on both the base and sub vectors.
15499     SDValue SimplifiedSubV =
15500         SubVectorUsed
15501             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
15502             : DAG.getUNDEF(SubV.getValueType());
15503     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
15504     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
15505       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
15506                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
15507     return V;
15508   }
15509   }
15510 }
15511
15512 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
15513                                        SDValue N1, SelectionDAG &DAG) {
15514   EVT VT = SVN->getValueType(0);
15515   int NumElts = VT.getVectorNumElements();
15516   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
15517   for (int M : SVN->getMask())
15518     if (M >= 0 && M < NumElts)
15519       N0UsedElements[M] = true;
15520     else if (M >= NumElts)
15521       N1UsedElements[M - NumElts] = true;
15522
15523   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
15524   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
15525   if (S0 == N0 && S1 == N1)
15526     return SDValue();
15527
15528   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
15529 }
15530
15531 static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
15532                                    SDValue N1, SelectionDAG &DAG) {
15533   auto isUndefElt = [](SDValue V, int Idx) {
15534     // TODO - handle more cases as required.
15535     if (V.getOpcode() == ISD::BUILD_VECTOR)
15536       return V.getOperand(Idx).isUndef();
15537     if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
15538       return (Idx != 0) || V.getOperand(0).isUndef();
15539     return false;
15540   };
15541
15542   EVT VT = SVN->getValueType(0);
15543   unsigned NumElts = VT.getVectorNumElements();
15544
15545   bool Changed = false;
15546   SmallVector<int, 8> NewMask;
15547   for (unsigned i = 0; i != NumElts; ++i) {
15548     int Idx = SVN->getMaskElt(i);
15549     if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
15550         ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
15551       Changed = true;
15552       Idx = -1;
15553     }
15554     NewMask.push_back(Idx);
15555   }
15556   if (Changed)
15557     return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
15558
15559   return SDValue();
15560 }
15561
15562 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
15563 // or turn a shuffle of a single concat into simpler shuffle then concat.
15564 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
15565   EVT VT = N->getValueType(0);
15566   unsigned NumElts = VT.getVectorNumElements();
15567
15568   SDValue N0 = N->getOperand(0);
15569   SDValue N1 = N->getOperand(1);
15570   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15571
15572   SmallVector<SDValue, 4> Ops;
15573   EVT ConcatVT = N0.getOperand(0).getValueType();
15574   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
15575   unsigned NumConcats = NumElts / NumElemsPerConcat;
15576
15577   // Special case: shuffle(concat(A,B)) can be more efficiently represented
15578   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
15579   // half vector elements.
15580   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
15581       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
15582                   SVN->getMask().end(), [](int i) { return i == -1; })) {
15583     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
15584                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
15585     N1 = DAG.getUNDEF(ConcatVT);
15586     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
15587   }
15588
15589   // Look at every vector that's inserted. We're looking for exact
15590   // subvector-sized copies from a concatenated vector
15591   for (unsigned I = 0; I != NumConcats; ++I) {
15592     // Make sure we're dealing with a copy.
15593     unsigned Begin = I * NumElemsPerConcat;
15594     bool AllUndef = true, NoUndef = true;
15595     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
15596       if (SVN->getMaskElt(J) >= 0)
15597         AllUndef = false;
15598       else
15599         NoUndef = false;
15600     }
15601
15602     if (NoUndef) {
15603       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
15604         return SDValue();
15605
15606       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
15607         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
15608           return SDValue();
15609
15610       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
15611       if (FirstElt < N0.getNumOperands())
15612         Ops.push_back(N0.getOperand(FirstElt));
15613       else
15614         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15615
15616     } else if (AllUndef) {
15617       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15618     } else { // Mixed with general masks and undefs, can't do optimization.
15619       return SDValue();
15620     }
15621   }
15622
15623   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15624 }
15625
15626 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15627 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15628 //
15629 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15630 // a simplification in some sense, but it isn't appropriate in general: some
15631 // BUILD_VECTORs are substantially cheaper than others. The general case
15632 // of a BUILD_VECTOR requires inserting each element individually (or
15633 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15634 // all constants is a single constant pool load.  A BUILD_VECTOR where each
15635 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
15636 // are undef lowers to a small number of element insertions.
15637 //
15638 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15639 // We don't fold shuffles where one side is a non-zero constant, and we don't
15640 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
15641 // non-constant operands. This seems to work out reasonably well in practice.
15642 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15643                                        SelectionDAG &DAG,
15644                                        const TargetLowering &TLI) {
15645   EVT VT = SVN->getValueType(0);
15646   unsigned NumElts = VT.getVectorNumElements();
15647   SDValue N0 = SVN->getOperand(0);
15648   SDValue N1 = SVN->getOperand(1);
15649
15650   if (!N0->hasOneUse() || !N1->hasOneUse())
15651     return SDValue();
15652
15653   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15654   // discussed above.
15655   if (!N1.isUndef()) {
15656     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15657     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15658     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
15659       return SDValue();
15660     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
15661       return SDValue();
15662   }
15663
15664   // If both inputs are splats of the same value then we can safely merge this
15665   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
15666   bool IsSplat = false;
15667   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
15668   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
15669   if (BV0 && BV1)
15670     if (SDValue Splat0 = BV0->getSplatValue())
15671       IsSplat = (Splat0 == BV1->getSplatValue());
15672
15673   SmallVector<SDValue, 8> Ops;
15674   SmallSet<SDValue, 16> DuplicateOps;
15675   for (int M : SVN->getMask()) {
15676     SDValue Op = DAG.getUNDEF(VT.getScalarType());
15677     if (M >= 0) {
15678       int Idx = M < (int)NumElts ? M : M - NumElts;
15679       SDValue &S = (M < (int)NumElts ? N0 : N1);
15680       if (S.getOpcode() == ISD::BUILD_VECTOR) {
15681         Op = S.getOperand(Idx);
15682       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15683         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
15684         Op = S.getOperand(0);
15685       } else {
15686         // Operand can't be combined - bail out.
15687         return SDValue();
15688       }
15689     }
15690
15691     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
15692     // generating a splat; semantically, this is fine, but it's likely to
15693     // generate low-quality code if the target can't reconstruct an appropriate
15694     // shuffle.
15695     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
15696       if (!IsSplat && !DuplicateOps.insert(Op).second)
15697         return SDValue();
15698
15699     Ops.push_back(Op);
15700   }
15701
15702   // BUILD_VECTOR requires all inputs to be of the same type, find the
15703   // maximum type and extend them all.
15704   EVT SVT = VT.getScalarType();
15705   if (SVT.isInteger())
15706     for (SDValue &Op : Ops)
15707       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15708   if (SVT != VT.getScalarType())
15709     for (SDValue &Op : Ops)
15710       Op = TLI.isZExtFree(Op.getValueType(), SVT)
15711                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15712                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15713   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15714 }
15715
15716 // Match shuffles that can be converted to any_vector_extend_in_reg.
15717 // This is often generated during legalization.
15718 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15719 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15720 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15721                                             SelectionDAG &DAG,
15722                                             const TargetLowering &TLI,
15723                                             bool LegalOperations,
15724                                             bool LegalTypes) {
15725   EVT VT = SVN->getValueType(0);
15726   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15727
15728   // TODO Add support for big-endian when we have a test case.
15729   if (!VT.isInteger() || IsBigEndian)
15730     return SDValue();
15731
15732   unsigned NumElts = VT.getVectorNumElements();
15733   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15734   ArrayRef<int> Mask = SVN->getMask();
15735   SDValue N0 = SVN->getOperand(0);
15736
15737   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15738   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15739     for (unsigned i = 0; i != NumElts; ++i) {
15740       if (Mask[i] < 0)
15741         continue;
15742       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15743         continue;
15744       return false;
15745     }
15746     return true;
15747   };
15748
15749   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15750   // power-of-2 extensions as they are the most likely.
15751   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15752     // Check for non power of 2 vector sizes
15753     if (NumElts % Scale != 0)
15754       continue;
15755     if (!isAnyExtend(Scale))
15756       continue;
15757
15758     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15759     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15760     if (!LegalTypes || TLI.isTypeLegal(OutVT))
15761       if (!LegalOperations ||
15762           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15763         return DAG.getBitcast(VT,
15764                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15765   }
15766
15767   return SDValue();
15768 }
15769
15770 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15771 // each source element of a large type into the lowest elements of a smaller
15772 // destination type. This is often generated during legalization.
15773 // If the source node itself was a '*_extend_vector_inreg' node then we should
15774 // then be able to remove it.
15775 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15776                                         SelectionDAG &DAG) {
15777   EVT VT = SVN->getValueType(0);
15778   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15779
15780   // TODO Add support for big-endian when we have a test case.
15781   if (!VT.isInteger() || IsBigEndian)
15782     return SDValue();
15783
15784   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
15785
15786   unsigned Opcode = N0.getOpcode();
15787   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15788       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15789       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15790     return SDValue();
15791
15792   SDValue N00 = N0.getOperand(0);
15793   ArrayRef<int> Mask = SVN->getMask();
15794   unsigned NumElts = VT.getVectorNumElements();
15795   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15796   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15797   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15798
15799   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15800     return SDValue();
15801   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15802
15803   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15804   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15805   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15806   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15807     for (unsigned i = 0; i != NumElts; ++i) {
15808       if (Mask[i] < 0)
15809         continue;
15810       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15811         continue;
15812       return false;
15813     }
15814     return true;
15815   };
15816
15817   // At the moment we just handle the case where we've truncated back to the
15818   // same size as before the extension.
15819   // TODO: handle more extension/truncation cases as cases arise.
15820   if (EltSizeInBits != ExtSrcSizeInBits)
15821     return SDValue();
15822
15823   // We can remove *extend_vector_inreg only if the truncation happens at
15824   // the same scale as the extension.
15825   if (isTruncate(ExtScale))
15826     return DAG.getBitcast(VT, N00);
15827
15828   return SDValue();
15829 }
15830
15831 // Combine shuffles of splat-shuffles of the form:
15832 // shuffle (shuffle V, undef, splat-mask), undef, M
15833 // If splat-mask contains undef elements, we need to be careful about
15834 // introducing undef's in the folded mask which are not the result of composing
15835 // the masks of the shuffles.
15836 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15837                                      ShuffleVectorSDNode *Splat,
15838                                      SelectionDAG &DAG) {
15839   ArrayRef<int> SplatMask = Splat->getMask();
15840   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15841
15842   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15843   // every undef mask element in the splat-shuffle has a corresponding undef
15844   // element in the user-shuffle's mask or if the composition of mask elements
15845   // would result in undef.
15846   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15847   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15848   //   In this case it is not legal to simplify to the splat-shuffle because we
15849   //   may be exposing the users of the shuffle an undef element at index 1
15850   //   which was not there before the combine.
15851   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15852   //   In this case the composition of masks yields SplatMask, so it's ok to
15853   //   simplify to the splat-shuffle.
15854   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15855   //   In this case the composed mask includes all undef elements of SplatMask
15856   //   and in addition sets element zero to undef. It is safe to simplify to
15857   //   the splat-shuffle.
15858   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15859                                        ArrayRef<int> SplatMask) {
15860     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15861       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15862           SplatMask[UserMask[i]] != -1)
15863         return false;
15864     return true;
15865   };
15866   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15867     return SDValue(Splat, 0);
15868
15869   // Create a new shuffle with a mask that is composed of the two shuffles'
15870   // masks.
15871   SmallVector<int, 32> NewMask;
15872   for (int Idx : UserMask)
15873     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15874
15875   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15876                               Splat->getOperand(0), Splat->getOperand(1),
15877                               NewMask);
15878 }
15879
15880 /// If the shuffle mask is taking exactly one element from the first vector
15881 /// operand and passing through all other elements from the second vector
15882 /// operand, return the index of the mask element that is choosing an element
15883 /// from the first operand. Otherwise, return -1.
15884 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
15885   int MaskSize = Mask.size();
15886   int EltFromOp0 = -1;
15887   // TODO: This does not match if there are undef elements in the shuffle mask.
15888   // Should we ignore undefs in the shuffle mask instead? The trade-off is
15889   // removing an instruction (a shuffle), but losing the knowledge that some
15890   // vector lanes are not needed.
15891   for (int i = 0; i != MaskSize; ++i) {
15892     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
15893       // We're looking for a shuffle of exactly one element from operand 0.
15894       if (EltFromOp0 != -1)
15895         return -1;
15896       EltFromOp0 = i;
15897     } else if (Mask[i] != i + MaskSize) {
15898       // Nothing from operand 1 can change lanes.
15899       return -1;
15900     }
15901   }
15902   return EltFromOp0;
15903 }
15904
15905 /// If a shuffle inserts exactly one element from a source vector operand into
15906 /// another vector operand and we can access the specified element as a scalar,
15907 /// then we can eliminate the shuffle.
15908 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
15909                                       SelectionDAG &DAG) {
15910   // First, check if we are taking one element of a vector and shuffling that
15911   // element into another vector.
15912   ArrayRef<int> Mask = Shuf->getMask();
15913   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
15914   SDValue Op0 = Shuf->getOperand(0);
15915   SDValue Op1 = Shuf->getOperand(1);
15916   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
15917   if (ShufOp0Index == -1) {
15918     // Commute mask and check again.
15919     ShuffleVectorSDNode::commuteMask(CommutedMask);
15920     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
15921     if (ShufOp0Index == -1)
15922       return SDValue();
15923     // Commute operands to match the commuted shuffle mask.
15924     std::swap(Op0, Op1);
15925     Mask = CommutedMask;
15926   }
15927
15928   // The shuffle inserts exactly one element from operand 0 into operand 1.
15929   // Now see if we can access that element as a scalar via a real insert element
15930   // instruction.
15931   // TODO: We can try harder to locate the element as a scalar. Examples: it
15932   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
15933   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
15934          "Shuffle mask value must be from operand 0");
15935   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
15936     return SDValue();
15937
15938   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
15939   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
15940     return SDValue();
15941
15942   // There's an existing insertelement with constant insertion index, so we
15943   // don't need to check the legality/profitability of a replacement operation
15944   // that differs at most in the constant value. The target should be able to
15945   // lower any of those in a similar way. If not, legalization will expand this
15946   // to a scalar-to-vector plus shuffle.
15947   //
15948   // Note that the shuffle may move the scalar from the position that the insert
15949   // element used. Therefore, our new insert element occurs at the shuffle's
15950   // mask index value, not the insert's index value.
15951   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
15952   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
15953                                         Op0.getOperand(2).getValueType());
15954   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
15955                      Op1, Op0.getOperand(1), NewInsIndex);
15956 }
15957
15958 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15959   EVT VT = N->getValueType(0);
15960   unsigned NumElts = VT.getVectorNumElements();
15961
15962   SDValue N0 = N->getOperand(0);
15963   SDValue N1 = N->getOperand(1);
15964
15965   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15966
15967   // Canonicalize shuffle undef, undef -> undef
15968   if (N0.isUndef() && N1.isUndef())
15969     return DAG.getUNDEF(VT);
15970
15971   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15972
15973   // Canonicalize shuffle v, v -> v, undef
15974   if (N0 == N1) {
15975     SmallVector<int, 8> NewMask;
15976     for (unsigned i = 0; i != NumElts; ++i) {
15977       int Idx = SVN->getMaskElt(i);
15978       if (Idx >= (int)NumElts) Idx -= NumElts;
15979       NewMask.push_back(Idx);
15980     }
15981     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15982   }
15983
15984   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15985   if (N0.isUndef())
15986     return DAG.getCommutedVectorShuffle(*SVN);
15987
15988   // Remove references to rhs if it is undef
15989   if (N1.isUndef()) {
15990     bool Changed = false;
15991     SmallVector<int, 8> NewMask;
15992     for (unsigned i = 0; i != NumElts; ++i) {
15993       int Idx = SVN->getMaskElt(i);
15994       if (Idx >= (int)NumElts) {
15995         Idx = -1;
15996         Changed = true;
15997       }
15998       NewMask.push_back(Idx);
15999     }
16000     if (Changed)
16001       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
16002   }
16003
16004   // Simplify shuffle mask if a referenced element is UNDEF.
16005   if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
16006     return V;
16007
16008   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
16009     return InsElt;
16010
16011   // A shuffle of a single vector that is a splat can always be folded.
16012   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
16013     if (N1->isUndef() && N0Shuf->isSplat())
16014       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
16015
16016   // If it is a splat, check if the argument vector is another splat or a
16017   // build_vector.
16018   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
16019     SDNode *V = N0.getNode();
16020
16021     // If this is a bit convert that changes the element type of the vector but
16022     // not the number of vector elements, look through it.  Be careful not to
16023     // look though conversions that change things like v4f32 to v2f64.
16024     if (V->getOpcode() == ISD::BITCAST) {
16025       SDValue ConvInput = V->getOperand(0);
16026       if (ConvInput.getValueType().isVector() &&
16027           ConvInput.getValueType().getVectorNumElements() == NumElts)
16028         V = ConvInput.getNode();
16029     }
16030
16031     if (V->getOpcode() == ISD::BUILD_VECTOR) {
16032       assert(V->getNumOperands() == NumElts &&
16033              "BUILD_VECTOR has wrong number of operands");
16034       SDValue Base;
16035       bool AllSame = true;
16036       for (unsigned i = 0; i != NumElts; ++i) {
16037         if (!V->getOperand(i).isUndef()) {
16038           Base = V->getOperand(i);
16039           break;
16040         }
16041       }
16042       // Splat of <u, u, u, u>, return <u, u, u, u>
16043       if (!Base.getNode())
16044         return N0;
16045       for (unsigned i = 0; i != NumElts; ++i) {
16046         if (V->getOperand(i) != Base) {
16047           AllSame = false;
16048           break;
16049         }
16050       }
16051       // Splat of <x, x, x, x>, return <x, x, x, x>
16052       if (AllSame)
16053         return N0;
16054
16055       // Canonicalize any other splat as a build_vector.
16056       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
16057       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
16058       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
16059
16060       // We may have jumped through bitcasts, so the type of the
16061       // BUILD_VECTOR may not match the type of the shuffle.
16062       if (V->getValueType(0) != VT)
16063         NewBV = DAG.getBitcast(VT, NewBV);
16064       return NewBV;
16065     }
16066   }
16067
16068   // There are various patterns used to build up a vector from smaller vectors,
16069   // subvectors, or elements. Scan chains of these and replace unused insertions
16070   // or components with undef.
16071   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
16072     return S;
16073
16074   // Match shuffles that can be converted to any_vector_extend_in_reg.
16075   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
16076     return V;
16077
16078   // Combine "truncate_vector_in_reg" style shuffles.
16079   if (SDValue V = combineTruncationShuffle(SVN, DAG))
16080     return V;
16081
16082   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
16083       Level < AfterLegalizeVectorOps &&
16084       (N1.isUndef() ||
16085       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
16086        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
16087     if (SDValue V = partitionShuffleOfConcats(N, DAG))
16088       return V;
16089   }
16090
16091   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16092   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16093   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16094     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
16095       return Res;
16096
16097   // If this shuffle only has a single input that is a bitcasted shuffle,
16098   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
16099   // back to their original types.
16100   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16101       N1.isUndef() && Level < AfterLegalizeVectorOps &&
16102       TLI.isTypeLegal(VT)) {
16103
16104     // Peek through the bitcast only if there is one user.
16105     SDValue BC0 = N0;
16106     while (BC0.getOpcode() == ISD::BITCAST) {
16107       if (!BC0.hasOneUse())
16108         break;
16109       BC0 = BC0.getOperand(0);
16110     }
16111
16112     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
16113       if (Scale == 1)
16114         return SmallVector<int, 8>(Mask.begin(), Mask.end());
16115
16116       SmallVector<int, 8> NewMask;
16117       for (int M : Mask)
16118         for (int s = 0; s != Scale; ++s)
16119           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
16120       return NewMask;
16121     };
16122
16123     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
16124       EVT SVT = VT.getScalarType();
16125       EVT InnerVT = BC0->getValueType(0);
16126       EVT InnerSVT = InnerVT.getScalarType();
16127
16128       // Determine which shuffle works with the smaller scalar type.
16129       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
16130       EVT ScaleSVT = ScaleVT.getScalarType();
16131
16132       if (TLI.isTypeLegal(ScaleVT) &&
16133           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
16134           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
16135         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16136         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16137
16138         // Scale the shuffle masks to the smaller scalar type.
16139         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
16140         SmallVector<int, 8> InnerMask =
16141             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
16142         SmallVector<int, 8> OuterMask =
16143             ScaleShuffleMask(SVN->getMask(), OuterScale);
16144
16145         // Merge the shuffle masks.
16146         SmallVector<int, 8> NewMask;
16147         for (int M : OuterMask)
16148           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
16149
16150         // Test for shuffle mask legality over both commutations.
16151         SDValue SV0 = BC0->getOperand(0);
16152         SDValue SV1 = BC0->getOperand(1);
16153         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16154         if (!LegalMask) {
16155           std::swap(SV0, SV1);
16156           ShuffleVectorSDNode::commuteMask(NewMask);
16157           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16158         }
16159
16160         if (LegalMask) {
16161           SV0 = DAG.getBitcast(ScaleVT, SV0);
16162           SV1 = DAG.getBitcast(ScaleVT, SV1);
16163           return DAG.getBitcast(
16164               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
16165         }
16166       }
16167     }
16168   }
16169
16170   // Canonicalize shuffles according to rules:
16171   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
16172   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
16173   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
16174   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
16175       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
16176       TLI.isTypeLegal(VT)) {
16177     // The incoming shuffle must be of the same type as the result of the
16178     // current shuffle.
16179     assert(N1->getOperand(0).getValueType() == VT &&
16180            "Shuffle types don't match");
16181
16182     SDValue SV0 = N1->getOperand(0);
16183     SDValue SV1 = N1->getOperand(1);
16184     bool HasSameOp0 = N0 == SV0;
16185     bool IsSV1Undef = SV1.isUndef();
16186     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
16187       // Commute the operands of this shuffle so that next rule
16188       // will trigger.
16189       return DAG.getCommutedVectorShuffle(*SVN);
16190   }
16191
16192   // Try to fold according to rules:
16193   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16194   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16195   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16196   // Don't try to fold shuffles with illegal type.
16197   // Only fold if this shuffle is the only user of the other shuffle.
16198   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
16199       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
16200     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
16201
16202     // Don't try to fold splats; they're likely to simplify somehow, or they
16203     // might be free.
16204     if (OtherSV->isSplat())
16205       return SDValue();
16206
16207     // The incoming shuffle must be of the same type as the result of the
16208     // current shuffle.
16209     assert(OtherSV->getOperand(0).getValueType() == VT &&
16210            "Shuffle types don't match");
16211
16212     SDValue SV0, SV1;
16213     SmallVector<int, 4> Mask;
16214     // Compute the combined shuffle mask for a shuffle with SV0 as the first
16215     // operand, and SV1 as the second operand.
16216     for (unsigned i = 0; i != NumElts; ++i) {
16217       int Idx = SVN->getMaskElt(i);
16218       if (Idx < 0) {
16219         // Propagate Undef.
16220         Mask.push_back(Idx);
16221         continue;
16222       }
16223
16224       SDValue CurrentVec;
16225       if (Idx < (int)NumElts) {
16226         // This shuffle index refers to the inner shuffle N0. Lookup the inner
16227         // shuffle mask to identify which vector is actually referenced.
16228         Idx = OtherSV->getMaskElt(Idx);
16229         if (Idx < 0) {
16230           // Propagate Undef.
16231           Mask.push_back(Idx);
16232           continue;
16233         }
16234
16235         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
16236                                            : OtherSV->getOperand(1);
16237       } else {
16238         // This shuffle index references an element within N1.
16239         CurrentVec = N1;
16240       }
16241
16242       // Simple case where 'CurrentVec' is UNDEF.
16243       if (CurrentVec.isUndef()) {
16244         Mask.push_back(-1);
16245         continue;
16246       }
16247
16248       // Canonicalize the shuffle index. We don't know yet if CurrentVec
16249       // will be the first or second operand of the combined shuffle.
16250       Idx = Idx % NumElts;
16251       if (!SV0.getNode() || SV0 == CurrentVec) {
16252         // Ok. CurrentVec is the left hand side.
16253         // Update the mask accordingly.
16254         SV0 = CurrentVec;
16255         Mask.push_back(Idx);
16256         continue;
16257       }
16258
16259       // Bail out if we cannot convert the shuffle pair into a single shuffle.
16260       if (SV1.getNode() && SV1 != CurrentVec)
16261         return SDValue();
16262
16263       // Ok. CurrentVec is the right hand side.
16264       // Update the mask accordingly.
16265       SV1 = CurrentVec;
16266       Mask.push_back(Idx + NumElts);
16267     }
16268
16269     // Check if all indices in Mask are Undef. In case, propagate Undef.
16270     bool isUndefMask = true;
16271     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
16272       isUndefMask &= Mask[i] < 0;
16273
16274     if (isUndefMask)
16275       return DAG.getUNDEF(VT);
16276
16277     if (!SV0.getNode())
16278       SV0 = DAG.getUNDEF(VT);
16279     if (!SV1.getNode())
16280       SV1 = DAG.getUNDEF(VT);
16281
16282     // Avoid introducing shuffles with illegal mask.
16283     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
16284       ShuffleVectorSDNode::commuteMask(Mask);
16285
16286       if (!TLI.isShuffleMaskLegal(Mask, VT))
16287         return SDValue();
16288
16289       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
16290       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
16291       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
16292       std::swap(SV0, SV1);
16293     }
16294
16295     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16296     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16297     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16298     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
16299   }
16300
16301   return SDValue();
16302 }
16303
16304 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
16305   SDValue InVal = N->getOperand(0);
16306   EVT VT = N->getValueType(0);
16307
16308   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
16309   // with a VECTOR_SHUFFLE and possible truncate.
16310   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16311     SDValue InVec = InVal->getOperand(0);
16312     SDValue EltNo = InVal->getOperand(1);
16313     auto InVecT = InVec.getValueType();
16314     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
16315       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
16316       int Elt = C0->getZExtValue();
16317       NewMask[0] = Elt;
16318       SDValue Val;
16319       // If we have an implict truncate do truncate here as long as it's legal.
16320       // if it's not legal, this should
16321       if (VT.getScalarType() != InVal.getValueType() &&
16322           InVal.getValueType().isScalarInteger() &&
16323           isTypeLegal(VT.getScalarType())) {
16324         Val =
16325             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
16326         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
16327       }
16328       if (VT.getScalarType() == InVecT.getScalarType() &&
16329           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
16330           TLI.isShuffleMaskLegal(NewMask, VT)) {
16331         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
16332                                    DAG.getUNDEF(InVecT), NewMask);
16333         // If the initial vector is the correct size this shuffle is a
16334         // valid result.
16335         if (VT == InVecT)
16336           return Val;
16337         // If not we must truncate the vector.
16338         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
16339           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16340           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
16341           EVT SubVT =
16342               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
16343                                VT.getVectorNumElements());
16344           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
16345                             ZeroIdx);
16346           return Val;
16347         }
16348       }
16349     }
16350   }
16351
16352   return SDValue();
16353 }
16354
16355 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
16356   EVT VT = N->getValueType(0);
16357   SDValue N0 = N->getOperand(0);
16358   SDValue N1 = N->getOperand(1);
16359   SDValue N2 = N->getOperand(2);
16360
16361   // If inserting an UNDEF, just return the original vector.
16362   if (N1.isUndef())
16363     return N0;
16364
16365   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
16366   // us to pull BITCASTs from input to output.
16367   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
16368     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
16369       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
16370
16371   // If this is an insert of an extracted vector into an undef vector, we can
16372   // just use the input to the extract.
16373   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16374       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
16375     return N1.getOperand(0);
16376
16377   // If we are inserting a bitcast value into an undef, with the same
16378   // number of elements, just use the bitcast input of the extract.
16379   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
16380   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
16381   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
16382       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16383       N1.getOperand(0).getOperand(1) == N2 &&
16384       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
16385           VT.getVectorNumElements()) {
16386     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
16387   }
16388
16389   // If both N1 and N2 are bitcast values on which insert_subvector
16390   // would makes sense, pull the bitcast through.
16391   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
16392   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
16393   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
16394     SDValue CN0 = N0.getOperand(0);
16395     SDValue CN1 = N1.getOperand(0);
16396     if (CN0.getValueType().getVectorElementType() ==
16397             CN1.getValueType().getVectorElementType() &&
16398         CN0.getValueType().getVectorNumElements() ==
16399             VT.getVectorNumElements()) {
16400       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
16401                                       CN0.getValueType(), CN0, CN1, N2);
16402       return DAG.getBitcast(VT, NewINSERT);
16403     }
16404   }
16405
16406   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
16407   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
16408   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
16409   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
16410       N0.getOperand(1).getValueType() == N1.getValueType() &&
16411       N0.getOperand(2) == N2)
16412     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
16413                        N1, N2);
16414
16415   if (!isa<ConstantSDNode>(N2))
16416     return SDValue();
16417
16418   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
16419
16420   // Canonicalize insert_subvector dag nodes.
16421   // Example:
16422   // (insert_subvector (insert_subvector A, Idx0), Idx1)
16423   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
16424   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
16425       N1.getValueType() == N0.getOperand(1).getValueType() &&
16426       isa<ConstantSDNode>(N0.getOperand(2))) {
16427     unsigned OtherIdx = N0.getConstantOperandVal(2);
16428     if (InsIdx < OtherIdx) {
16429       // Swap nodes.
16430       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
16431                                   N0.getOperand(0), N1, N2);
16432       AddToWorklist(NewOp.getNode());
16433       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
16434                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
16435     }
16436   }
16437
16438   // If the input vector is a concatenation, and the insert replaces
16439   // one of the pieces, we can optimize into a single concat_vectors.
16440   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
16441       N0.getOperand(0).getValueType() == N1.getValueType()) {
16442     unsigned Factor = N1.getValueType().getVectorNumElements();
16443
16444     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
16445     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
16446
16447     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16448   }
16449
16450   return SDValue();
16451 }
16452
16453 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
16454   SDValue N0 = N->getOperand(0);
16455
16456   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
16457   if (N0->getOpcode() == ISD::FP16_TO_FP)
16458     return N0->getOperand(0);
16459
16460   return SDValue();
16461 }
16462
16463 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
16464   SDValue N0 = N->getOperand(0);
16465
16466   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
16467   if (N0->getOpcode() == ISD::AND) {
16468     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
16469     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
16470       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
16471                          N0.getOperand(0));
16472     }
16473   }
16474
16475   return SDValue();
16476 }
16477
16478 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
16479 /// with the destination vector and a zero vector.
16480 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
16481 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
16482 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
16483   EVT VT = N->getValueType(0);
16484   SDValue LHS = N->getOperand(0);
16485   SDValue RHS = peekThroughBitcast(N->getOperand(1));
16486   SDLoc DL(N);
16487
16488   // Make sure we're not running after operation legalization where it
16489   // may have custom lowered the vector shuffles.
16490   if (LegalOperations)
16491     return SDValue();
16492
16493   if (N->getOpcode() != ISD::AND)
16494     return SDValue();
16495
16496   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
16497     return SDValue();
16498
16499   EVT RVT = RHS.getValueType();
16500   unsigned NumElts = RHS.getNumOperands();
16501
16502   // Attempt to create a valid clear mask, splitting the mask into
16503   // sub elements and checking to see if each is
16504   // all zeros or all ones - suitable for shuffle masking.
16505   auto BuildClearMask = [&](int Split) {
16506     int NumSubElts = NumElts * Split;
16507     int NumSubBits = RVT.getScalarSizeInBits() / Split;
16508
16509     SmallVector<int, 8> Indices;
16510     for (int i = 0; i != NumSubElts; ++i) {
16511       int EltIdx = i / Split;
16512       int SubIdx = i % Split;
16513       SDValue Elt = RHS.getOperand(EltIdx);
16514       if (Elt.isUndef()) {
16515         Indices.push_back(-1);
16516         continue;
16517       }
16518
16519       APInt Bits;
16520       if (isa<ConstantSDNode>(Elt))
16521         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
16522       else if (isa<ConstantFPSDNode>(Elt))
16523         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
16524       else
16525         return SDValue();
16526
16527       // Extract the sub element from the constant bit mask.
16528       if (DAG.getDataLayout().isBigEndian()) {
16529         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
16530       } else {
16531         Bits.lshrInPlace(SubIdx * NumSubBits);
16532       }
16533
16534       if (Split > 1)
16535         Bits = Bits.trunc(NumSubBits);
16536
16537       if (Bits.isAllOnesValue())
16538         Indices.push_back(i);
16539       else if (Bits == 0)
16540         Indices.push_back(i + NumSubElts);
16541       else
16542         return SDValue();
16543     }
16544
16545     // Let's see if the target supports this vector_shuffle.
16546     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
16547     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
16548     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
16549       return SDValue();
16550
16551     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
16552     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
16553                                                    DAG.getBitcast(ClearVT, LHS),
16554                                                    Zero, Indices));
16555   };
16556
16557   // Determine maximum split level (byte level masking).
16558   int MaxSplit = 1;
16559   if (RVT.getScalarSizeInBits() % 8 == 0)
16560     MaxSplit = RVT.getScalarSizeInBits() / 8;
16561
16562   for (int Split = 1; Split <= MaxSplit; ++Split)
16563     if (RVT.getScalarSizeInBits() % Split == 0)
16564       if (SDValue S = BuildClearMask(Split))
16565         return S;
16566
16567   return SDValue();
16568 }
16569
16570 /// Visit a binary vector operation, like ADD.
16571 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
16572   assert(N->getValueType(0).isVector() &&
16573          "SimplifyVBinOp only works on vectors!");
16574
16575   SDValue LHS = N->getOperand(0);
16576   SDValue RHS = N->getOperand(1);
16577   SDValue Ops[] = {LHS, RHS};
16578
16579   // See if we can constant fold the vector operation.
16580   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
16581           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
16582     return Fold;
16583
16584   // Try to convert a constant mask AND into a shuffle clear mask.
16585   if (SDValue Shuffle = XformToShuffleWithZero(N))
16586     return Shuffle;
16587
16588   // Type legalization might introduce new shuffles in the DAG.
16589   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
16590   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
16591   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
16592       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
16593       LHS.getOperand(1).isUndef() &&
16594       RHS.getOperand(1).isUndef()) {
16595     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
16596     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
16597
16598     if (SVN0->getMask().equals(SVN1->getMask())) {
16599       EVT VT = N->getValueType(0);
16600       SDValue UndefVector = LHS.getOperand(1);
16601       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
16602                                      LHS.getOperand(0), RHS.getOperand(0),
16603                                      N->getFlags());
16604       AddUsersToWorklist(N);
16605       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
16606                                   SVN0->getMask());
16607     }
16608   }
16609
16610   return SDValue();
16611 }
16612
16613 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
16614                                     SDValue N2) {
16615   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
16616
16617   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
16618                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
16619
16620   // If we got a simplified select_cc node back from SimplifySelectCC, then
16621   // break it down into a new SETCC node, and a new SELECT node, and then return
16622   // the SELECT node, since we were called with a SELECT node.
16623   if (SCC.getNode()) {
16624     // Check to see if we got a select_cc back (to turn into setcc/select).
16625     // Otherwise, just return whatever node we got back, like fabs.
16626     if (SCC.getOpcode() == ISD::SELECT_CC) {
16627       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
16628                                   N0.getValueType(),
16629                                   SCC.getOperand(0), SCC.getOperand(1),
16630                                   SCC.getOperand(4));
16631       AddToWorklist(SETCC.getNode());
16632       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
16633                            SCC.getOperand(2), SCC.getOperand(3));
16634     }
16635
16636     return SCC;
16637   }
16638   return SDValue();
16639 }
16640
16641 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
16642 /// being selected between, see if we can simplify the select.  Callers of this
16643 /// should assume that TheSelect is deleted if this returns true.  As such, they
16644 /// should return the appropriate thing (e.g. the node) back to the top-level of
16645 /// the DAG combiner loop to avoid it being looked at.
16646 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
16647                                     SDValue RHS) {
16648   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16649   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
16650   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
16651     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
16652       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
16653       SDValue Sqrt = RHS;
16654       ISD::CondCode CC;
16655       SDValue CmpLHS;
16656       const ConstantFPSDNode *Zero = nullptr;
16657
16658       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
16659         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
16660         CmpLHS = TheSelect->getOperand(0);
16661         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
16662       } else {
16663         // SELECT or VSELECT
16664         SDValue Cmp = TheSelect->getOperand(0);
16665         if (Cmp.getOpcode() == ISD::SETCC) {
16666           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
16667           CmpLHS = Cmp.getOperand(0);
16668           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
16669         }
16670       }
16671       if (Zero && Zero->isZero() &&
16672           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
16673           CC == ISD::SETULT || CC == ISD::SETLT)) {
16674         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16675         CombineTo(TheSelect, Sqrt);
16676         return true;
16677       }
16678     }
16679   }
16680   // Cannot simplify select with vector condition
16681   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
16682
16683   // If this is a select from two identical things, try to pull the operation
16684   // through the select.
16685   if (LHS.getOpcode() != RHS.getOpcode() ||
16686       !LHS.hasOneUse() || !RHS.hasOneUse())
16687     return false;
16688
16689   // If this is a load and the token chain is identical, replace the select
16690   // of two loads with a load through a select of the address to load from.
16691   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
16692   // constants have been dropped into the constant pool.
16693   if (LHS.getOpcode() == ISD::LOAD) {
16694     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
16695     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
16696
16697     // Token chains must be identical.
16698     if (LHS.getOperand(0) != RHS.getOperand(0) ||
16699         // Do not let this transformation reduce the number of volatile loads.
16700         LLD->isVolatile() || RLD->isVolatile() ||
16701         // FIXME: If either is a pre/post inc/dec load,
16702         // we'd need to split out the address adjustment.
16703         LLD->isIndexed() || RLD->isIndexed() ||
16704         // If this is an EXTLOAD, the VT's must match.
16705         LLD->getMemoryVT() != RLD->getMemoryVT() ||
16706         // If this is an EXTLOAD, the kind of extension must match.
16707         (LLD->getExtensionType() != RLD->getExtensionType() &&
16708          // The only exception is if one of the extensions is anyext.
16709          LLD->getExtensionType() != ISD::EXTLOAD &&
16710          RLD->getExtensionType() != ISD::EXTLOAD) ||
16711         // FIXME: this discards src value information.  This is
16712         // over-conservative. It would be beneficial to be able to remember
16713         // both potential memory locations.  Since we are discarding
16714         // src value info, don't do the transformation if the memory
16715         // locations are not in the default address space.
16716         LLD->getPointerInfo().getAddrSpace() != 0 ||
16717         RLD->getPointerInfo().getAddrSpace() != 0 ||
16718         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
16719                                       LLD->getBasePtr().getValueType()))
16720       return false;
16721
16722     // Check that the select condition doesn't reach either load.  If so,
16723     // folding this will induce a cycle into the DAG.  If not, this is safe to
16724     // xform, so create a select of the addresses.
16725     SDValue Addr;
16726     if (TheSelect->getOpcode() == ISD::SELECT) {
16727       SDNode *CondNode = TheSelect->getOperand(0).getNode();
16728       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
16729           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
16730         return false;
16731       // The loads must not depend on one another.
16732       if (LLD->isPredecessorOf(RLD) ||
16733           RLD->isPredecessorOf(LLD))
16734         return false;
16735       Addr = DAG.getSelect(SDLoc(TheSelect),
16736                            LLD->getBasePtr().getValueType(),
16737                            TheSelect->getOperand(0), LLD->getBasePtr(),
16738                            RLD->getBasePtr());
16739     } else {  // Otherwise SELECT_CC
16740       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
16741       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
16742
16743       if ((LLD->hasAnyUseOfValue(1) &&
16744            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
16745           (RLD->hasAnyUseOfValue(1) &&
16746            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
16747         return false;
16748
16749       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
16750                          LLD->getBasePtr().getValueType(),
16751                          TheSelect->getOperand(0),
16752                          TheSelect->getOperand(1),
16753                          LLD->getBasePtr(), RLD->getBasePtr(),
16754                          TheSelect->getOperand(4));
16755     }
16756
16757     SDValue Load;
16758     // It is safe to replace the two loads if they have different alignments,
16759     // but the new load must be the minimum (most restrictive) alignment of the
16760     // inputs.
16761     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
16762     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
16763     if (!RLD->isInvariant())
16764       MMOFlags &= ~MachineMemOperand::MOInvariant;
16765     if (!RLD->isDereferenceable())
16766       MMOFlags &= ~MachineMemOperand::MODereferenceable;
16767     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
16768       // FIXME: Discards pointer and AA info.
16769       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16770                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16771                          MMOFlags);
16772     } else {
16773       // FIXME: Discards pointer and AA info.
16774       Load = DAG.getExtLoad(
16775           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16776                                                   : LLD->getExtensionType(),
16777           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16778           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16779     }
16780
16781     // Users of the select now use the result of the load.
16782     CombineTo(TheSelect, Load);
16783
16784     // Users of the old loads now use the new load's chain.  We know the
16785     // old-load value is dead now.
16786     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16787     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16788     return true;
16789   }
16790
16791   return false;
16792 }
16793
16794 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16795 /// bitwise 'and'.
16796 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16797                                             SDValue N1, SDValue N2, SDValue N3,
16798                                             ISD::CondCode CC) {
16799   // If this is a select where the false operand is zero and the compare is a
16800   // check of the sign bit, see if we can perform the "gzip trick":
16801   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16802   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16803   EVT XType = N0.getValueType();
16804   EVT AType = N2.getValueType();
16805   if (!isNullConstant(N3) || !XType.bitsGE(AType))
16806     return SDValue();
16807
16808   // If the comparison is testing for a positive value, we have to invert
16809   // the sign bit mask, so only do that transform if the target has a bitwise
16810   // 'and not' instruction (the invert is free).
16811   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
16812     // (X > -1) ? A : 0
16813     // (X >  0) ? X : 0 <-- This is canonical signed max.
16814     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
16815       return SDValue();
16816   } else if (CC == ISD::SETLT) {
16817     // (X <  0) ? A : 0
16818     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
16819     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
16820       return SDValue();
16821   } else {
16822     return SDValue();
16823   }
16824
16825   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16826   // constant.
16827   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16828   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16829   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
16830     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16831     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16832     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16833     AddToWorklist(Shift.getNode());
16834
16835     if (XType.bitsGT(AType)) {
16836       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16837       AddToWorklist(Shift.getNode());
16838     }
16839
16840     if (CC == ISD::SETGT)
16841       Shift = DAG.getNOT(DL, Shift, AType);
16842
16843     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16844   }
16845
16846   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16847   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16848   AddToWorklist(Shift.getNode());
16849
16850   if (XType.bitsGT(AType)) {
16851     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16852     AddToWorklist(Shift.getNode());
16853   }
16854
16855   if (CC == ISD::SETGT)
16856     Shift = DAG.getNOT(DL, Shift, AType);
16857
16858   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16859 }
16860
16861 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16862 /// where 'cond' is the comparison specified by CC.
16863 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16864                                       SDValue N2, SDValue N3, ISD::CondCode CC,
16865                                       bool NotExtCompare) {
16866   // (x ? y : y) -> y.
16867   if (N2 == N3) return N2;
16868
16869   EVT VT = N2.getValueType();
16870   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16871   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16872
16873   // Determine if the condition we're dealing with is constant
16874   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16875                               N0, N1, CC, DL, false);
16876   if (SCC.getNode()) AddToWorklist(SCC.getNode());
16877
16878   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16879     // fold select_cc true, x, y -> x
16880     // fold select_cc false, x, y -> y
16881     return !SCCC->isNullValue() ? N2 : N3;
16882   }
16883
16884   // Check to see if we can simplify the select into an fabs node
16885   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16886     // Allow either -0.0 or 0.0
16887     if (CFP->isZero()) {
16888       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16889       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16890           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16891           N2 == N3.getOperand(0))
16892         return DAG.getNode(ISD::FABS, DL, VT, N0);
16893
16894       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16895       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16896           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16897           N2.getOperand(0) == N3)
16898         return DAG.getNode(ISD::FABS, DL, VT, N3);
16899     }
16900   }
16901
16902   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16903   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16904   // in it.  This is a win when the constant is not otherwise available because
16905   // it replaces two constant pool loads with one.  We only do this if the FP
16906   // type is known to be legal, because if it isn't, then we are before legalize
16907   // types an we want the other legalization to happen first (e.g. to avoid
16908   // messing with soft float) and if the ConstantFP is not legal, because if
16909   // it is legal, we may not need to store the FP constant in a constant pool.
16910   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16911     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16912       if (TLI.isTypeLegal(N2.getValueType()) &&
16913           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16914                TargetLowering::Legal &&
16915            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16916            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16917           // If both constants have multiple uses, then we won't need to do an
16918           // extra load, they are likely around in registers for other users.
16919           (TV->hasOneUse() || FV->hasOneUse())) {
16920         Constant *Elts[] = {
16921           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16922           const_cast<ConstantFP*>(TV->getConstantFPValue())
16923         };
16924         Type *FPTy = Elts[0]->getType();
16925         const DataLayout &TD = DAG.getDataLayout();
16926
16927         // Create a ConstantArray of the two constants.
16928         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16929         SDValue CPIdx =
16930             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16931                                 TD.getPrefTypeAlignment(FPTy));
16932         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16933
16934         // Get the offsets to the 0 and 1 element of the array so that we can
16935         // select between them.
16936         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16937         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16938         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16939
16940         SDValue Cond = DAG.getSetCC(DL,
16941                                     getSetCCResultType(N0.getValueType()),
16942                                     N0, N1, CC);
16943         AddToWorklist(Cond.getNode());
16944         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16945                                           Cond, One, Zero);
16946         AddToWorklist(CstOffset.getNode());
16947         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16948                             CstOffset);
16949         AddToWorklist(CPIdx.getNode());
16950         return DAG.getLoad(
16951             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16952             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16953             Alignment);
16954       }
16955     }
16956
16957   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16958     return V;
16959
16960   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16961   // where y is has a single bit set.
16962   // A plaintext description would be, we can turn the SELECT_CC into an AND
16963   // when the condition can be materialized as an all-ones register.  Any
16964   // single bit-test can be materialized as an all-ones register with
16965   // shift-left and shift-right-arith.
16966   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16967       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16968     SDValue AndLHS = N0->getOperand(0);
16969     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16970     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16971       // Shift the tested bit over the sign bit.
16972       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16973       SDValue ShlAmt =
16974         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16975                         getShiftAmountTy(AndLHS.getValueType()));
16976       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16977
16978       // Now arithmetic right shift it all the way over, so the result is either
16979       // all-ones, or zero.
16980       SDValue ShrAmt =
16981         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16982                         getShiftAmountTy(Shl.getValueType()));
16983       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16984
16985       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16986     }
16987   }
16988
16989   // fold select C, 16, 0 -> shl C, 4
16990   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16991       TLI.getBooleanContents(N0.getValueType()) ==
16992           TargetLowering::ZeroOrOneBooleanContent) {
16993
16994     // If the caller doesn't want us to simplify this into a zext of a compare,
16995     // don't do it.
16996     if (NotExtCompare && N2C->isOne())
16997       return SDValue();
16998
16999     // Get a SetCC of the condition
17000     // NOTE: Don't create a SETCC if it's not legal on this target.
17001     if (!LegalOperations ||
17002         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
17003       SDValue Temp, SCC;
17004       // cast from setcc result type to select result type
17005       if (LegalTypes) {
17006         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
17007                             N0, N1, CC);
17008         if (N2.getValueType().bitsLT(SCC.getValueType()))
17009           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
17010                                         N2.getValueType());
17011         else
17012           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17013                              N2.getValueType(), SCC);
17014       } else {
17015         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
17016         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17017                            N2.getValueType(), SCC);
17018       }
17019
17020       AddToWorklist(SCC.getNode());
17021       AddToWorklist(Temp.getNode());
17022
17023       if (N2C->isOne())
17024         return Temp;
17025
17026       // shl setcc result by log2 n2c
17027       return DAG.getNode(
17028           ISD::SHL, DL, N2.getValueType(), Temp,
17029           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
17030                           getShiftAmountTy(Temp.getValueType())));
17031     }
17032   }
17033
17034   // Check to see if this is an integer abs.
17035   // select_cc setg[te] X,  0,  X, -X ->
17036   // select_cc setgt    X, -1,  X, -X ->
17037   // select_cc setl[te] X,  0, -X,  X ->
17038   // select_cc setlt    X,  1, -X,  X ->
17039   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
17040   if (N1C) {
17041     ConstantSDNode *SubC = nullptr;
17042     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
17043          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
17044         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
17045       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
17046     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
17047               (N1C->isOne() && CC == ISD::SETLT)) &&
17048              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
17049       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
17050
17051     EVT XType = N0.getValueType();
17052     if (SubC && SubC->isNullValue() && XType.isInteger()) {
17053       SDLoc DL(N0);
17054       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
17055                                   N0,
17056                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
17057                                          getShiftAmountTy(N0.getValueType())));
17058       SDValue Add = DAG.getNode(ISD::ADD, DL,
17059                                 XType, N0, Shift);
17060       AddToWorklist(Shift.getNode());
17061       AddToWorklist(Add.getNode());
17062       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
17063     }
17064   }
17065
17066   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
17067   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
17068   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
17069   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
17070   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
17071   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
17072   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
17073   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
17074   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
17075     SDValue ValueOnZero = N2;
17076     SDValue Count = N3;
17077     // If the condition is NE instead of E, swap the operands.
17078     if (CC == ISD::SETNE)
17079       std::swap(ValueOnZero, Count);
17080     // Check if the value on zero is a constant equal to the bits in the type.
17081     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
17082       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
17083         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
17084         // legal, combine to just cttz.
17085         if ((Count.getOpcode() == ISD::CTTZ ||
17086              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
17087             N0 == Count.getOperand(0) &&
17088             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
17089           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
17090         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
17091         // legal, combine to just ctlz.
17092         if ((Count.getOpcode() == ISD::CTLZ ||
17093              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
17094             N0 == Count.getOperand(0) &&
17095             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
17096           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
17097       }
17098     }
17099   }
17100
17101   return SDValue();
17102 }
17103
17104 /// This is a stub for TargetLowering::SimplifySetCC.
17105 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
17106                                    ISD::CondCode Cond, const SDLoc &DL,
17107                                    bool foldBooleans) {
17108   TargetLowering::DAGCombinerInfo
17109     DagCombineInfo(DAG, Level, false, this);
17110   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
17111 }
17112
17113 /// Given an ISD::SDIV node expressing a divide by constant, return
17114 /// a DAG expression to select that will generate the same value by multiplying
17115 /// by a magic number.
17116 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17117 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
17118   // when optimising for minimum size, we don't want to expand a div to a mul
17119   // and a shift.
17120   if (DAG.getMachineFunction().getFunction().optForMinSize())
17121     return SDValue();
17122
17123   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17124   if (!C)
17125     return SDValue();
17126
17127   // Avoid division by zero.
17128   if (C->isNullValue())
17129     return SDValue();
17130
17131   std::vector<SDNode *> Built;
17132   SDValue S =
17133       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17134
17135   for (SDNode *N : Built)
17136     AddToWorklist(N);
17137   return S;
17138 }
17139
17140 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
17141 /// DAG expression that will generate the same value by right shifting.
17142 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
17143   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17144   if (!C)
17145     return SDValue();
17146
17147   // Avoid division by zero.
17148   if (C->isNullValue())
17149     return SDValue();
17150
17151   std::vector<SDNode *> Built;
17152   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
17153
17154   for (SDNode *N : Built)
17155     AddToWorklist(N);
17156   return S;
17157 }
17158
17159 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
17160 /// expression that will generate the same value by multiplying by a magic
17161 /// number.
17162 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17163 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
17164   // when optimising for minimum size, we don't want to expand a div to a mul
17165   // and a shift.
17166   if (DAG.getMachineFunction().getFunction().optForMinSize())
17167     return SDValue();
17168
17169   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17170   if (!C)
17171     return SDValue();
17172
17173   // Avoid division by zero.
17174   if (C->isNullValue())
17175     return SDValue();
17176
17177   std::vector<SDNode *> Built;
17178   SDValue S =
17179       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17180
17181   for (SDNode *N : Built)
17182     AddToWorklist(N);
17183   return S;
17184 }
17185
17186 /// Determines the LogBase2 value for a non-null input value using the
17187 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
17188 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
17189   EVT VT = V.getValueType();
17190   unsigned EltBits = VT.getScalarSizeInBits();
17191   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
17192   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
17193   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
17194   return LogBase2;
17195 }
17196
17197 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17198 /// For the reciprocal, we need to find the zero of the function:
17199 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
17200 ///     =>
17201 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
17202 ///     does not require additional intermediate precision]
17203 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
17204   if (Level >= AfterLegalizeDAG)
17205     return SDValue();
17206
17207   // TODO: Handle half and/or extended types?
17208   EVT VT = Op.getValueType();
17209   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17210     return SDValue();
17211
17212   // If estimates are explicitly disabled for this function, we're done.
17213   MachineFunction &MF = DAG.getMachineFunction();
17214   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
17215   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17216     return SDValue();
17217
17218   // Estimates may be explicitly enabled for this type with a custom number of
17219   // refinement steps.
17220   int Iterations = TLI.getDivRefinementSteps(VT, MF);
17221   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
17222     AddToWorklist(Est.getNode());
17223
17224     if (Iterations) {
17225       EVT VT = Op.getValueType();
17226       SDLoc DL(Op);
17227       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17228
17229       // Newton iterations: Est = Est + Est (1 - Arg * Est)
17230       for (int i = 0; i < Iterations; ++i) {
17231         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
17232         AddToWorklist(NewEst.getNode());
17233
17234         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
17235         AddToWorklist(NewEst.getNode());
17236
17237         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17238         AddToWorklist(NewEst.getNode());
17239
17240         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
17241         AddToWorklist(Est.getNode());
17242       }
17243     }
17244     return Est;
17245   }
17246
17247   return SDValue();
17248 }
17249
17250 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17251 /// For the reciprocal sqrt, we need to find the zero of the function:
17252 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17253 ///     =>
17254 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
17255 /// As a result, we precompute A/2 prior to the iteration loop.
17256 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
17257                                          unsigned Iterations,
17258                                          SDNodeFlags Flags, bool Reciprocal) {
17259   EVT VT = Arg.getValueType();
17260   SDLoc DL(Arg);
17261   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
17262
17263   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
17264   // this entire sequence requires only one FP constant.
17265   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
17266   AddToWorklist(HalfArg.getNode());
17267
17268   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
17269   AddToWorklist(HalfArg.getNode());
17270
17271   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
17272   for (unsigned i = 0; i < Iterations; ++i) {
17273     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
17274     AddToWorklist(NewEst.getNode());
17275
17276     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
17277     AddToWorklist(NewEst.getNode());
17278
17279     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
17280     AddToWorklist(NewEst.getNode());
17281
17282     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17283     AddToWorklist(Est.getNode());
17284   }
17285
17286   // If non-reciprocal square root is requested, multiply the result by Arg.
17287   if (!Reciprocal) {
17288     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
17289     AddToWorklist(Est.getNode());
17290   }
17291
17292   return Est;
17293 }
17294
17295 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17296 /// For the reciprocal sqrt, we need to find the zero of the function:
17297 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17298 ///     =>
17299 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
17300 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
17301                                          unsigned Iterations,
17302                                          SDNodeFlags Flags, bool Reciprocal) {
17303   EVT VT = Arg.getValueType();
17304   SDLoc DL(Arg);
17305   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
17306   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
17307
17308   // This routine must enter the loop below to work correctly
17309   // when (Reciprocal == false).
17310   assert(Iterations > 0);
17311
17312   // Newton iterations for reciprocal square root:
17313   // E = (E * -0.5) * ((A * E) * E + -3.0)
17314   for (unsigned i = 0; i < Iterations; ++i) {
17315     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
17316     AddToWorklist(AE.getNode());
17317
17318     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
17319     AddToWorklist(AEE.getNode());
17320
17321     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
17322     AddToWorklist(RHS.getNode());
17323
17324     // When calculating a square root at the last iteration build:
17325     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
17326     // (notice a common subexpression)
17327     SDValue LHS;
17328     if (Reciprocal || (i + 1) < Iterations) {
17329       // RSQRT: LHS = (E * -0.5)
17330       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
17331     } else {
17332       // SQRT: LHS = (A * E) * -0.5
17333       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
17334     }
17335     AddToWorklist(LHS.getNode());
17336
17337     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
17338     AddToWorklist(Est.getNode());
17339   }
17340
17341   return Est;
17342 }
17343
17344 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
17345 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
17346 /// Op can be zero.
17347 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
17348                                            bool Reciprocal) {
17349   if (Level >= AfterLegalizeDAG)
17350     return SDValue();
17351
17352   // TODO: Handle half and/or extended types?
17353   EVT VT = Op.getValueType();
17354   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17355     return SDValue();
17356
17357   // If estimates are explicitly disabled for this function, we're done.
17358   MachineFunction &MF = DAG.getMachineFunction();
17359   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
17360   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17361     return SDValue();
17362
17363   // Estimates may be explicitly enabled for this type with a custom number of
17364   // refinement steps.
17365   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
17366
17367   bool UseOneConstNR = false;
17368   if (SDValue Est =
17369       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
17370                           Reciprocal)) {
17371     AddToWorklist(Est.getNode());
17372
17373     if (Iterations) {
17374       Est = UseOneConstNR
17375             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
17376             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
17377
17378       if (!Reciprocal) {
17379         // Unfortunately, Est is now NaN if the input was exactly 0.0.
17380         // Select out this case and force the answer to 0.0.
17381         EVT VT = Op.getValueType();
17382         SDLoc DL(Op);
17383
17384         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17385         EVT CCVT = getSetCCResultType(VT);
17386         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
17387         AddToWorklist(ZeroCmp.getNode());
17388
17389         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
17390                           ZeroCmp, FPZero, Est);
17391         AddToWorklist(Est.getNode());
17392       }
17393     }
17394     return Est;
17395   }
17396
17397   return SDValue();
17398 }
17399
17400 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17401   return buildSqrtEstimateImpl(Op, Flags, true);
17402 }
17403
17404 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17405   return buildSqrtEstimateImpl(Op, Flags, false);
17406 }
17407
17408 /// Return true if there is any possibility that the two addresses overlap.
17409 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
17410   // If they are the same then they must be aliases.
17411   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
17412
17413   // If they are both volatile then they cannot be reordered.
17414   if (Op0->isVolatile() && Op1->isVolatile()) return true;
17415
17416   // If one operation reads from invariant memory, and the other may store, they
17417   // cannot alias. These should really be checking the equivalent of mayWrite,
17418   // but it only matters for memory nodes other than load /store.
17419   if (Op0->isInvariant() && Op1->writeMem())
17420     return false;
17421
17422   if (Op1->isInvariant() && Op0->writeMem())
17423     return false;
17424
17425   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
17426   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
17427
17428   // Check for BaseIndexOffset matching.
17429   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
17430   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
17431   int64_t PtrDiff;
17432   if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
17433     return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
17434
17435   // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
17436   // able to calculate their relative offset if at least one arises
17437   // from an alloca. However, these allocas cannot overlap and we
17438   // can infer there is no alias.
17439   if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
17440     if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
17441       MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17442       // If the base are the same frame index but the we couldn't find a
17443       // constant offset, (indices are different) be conservative.
17444       if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
17445                      !MFI.isFixedObjectIndex(B->getIndex())))
17446         return false;
17447     }
17448
17449   bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
17450   bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
17451   bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
17452   bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
17453   bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
17454   bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
17455
17456   // If of mismatched base types or checkable indices we can check
17457   // they do not alias.
17458   if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
17459        (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
17460       (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
17461     return false;
17462
17463   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
17464   // compared to the size and offset of the access, we may be able to prove they
17465   // do not alias. This check is conservative for now to catch cases created by
17466   // splitting vector types.
17467   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
17468   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
17469   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
17470   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
17471   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
17472       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
17473     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
17474     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
17475
17476     // There is no overlap between these relatively aligned accesses of similar
17477     // size. Return no alias.
17478     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
17479         (OffAlign1 + NumBytes1) <= OffAlign0)
17480       return false;
17481   }
17482
17483   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
17484                    ? CombinerGlobalAA
17485                    : DAG.getSubtarget().useAA();
17486 #ifndef NDEBUG
17487   if (CombinerAAOnlyFunc.getNumOccurrences() &&
17488       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
17489     UseAA = false;
17490 #endif
17491
17492   if (UseAA && AA &&
17493       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
17494     // Use alias analysis information.
17495     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
17496     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
17497     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
17498     AliasResult AAResult =
17499         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
17500                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
17501                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
17502                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
17503     if (AAResult == NoAlias)
17504       return false;
17505   }
17506
17507   // Otherwise we have to assume they alias.
17508   return true;
17509 }
17510
17511 /// Walk up chain skipping non-aliasing memory nodes,
17512 /// looking for aliasing nodes and adding them to the Aliases vector.
17513 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
17514                                    SmallVectorImpl<SDValue> &Aliases) {
17515   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
17516   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
17517
17518   // Get alias information for node.
17519   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
17520
17521   // Starting off.
17522   Chains.push_back(OriginalChain);
17523   unsigned Depth = 0;
17524
17525   // Look at each chain and determine if it is an alias.  If so, add it to the
17526   // aliases list.  If not, then continue up the chain looking for the next
17527   // candidate.
17528   while (!Chains.empty()) {
17529     SDValue Chain = Chains.pop_back_val();
17530
17531     // For TokenFactor nodes, look at each operand and only continue up the
17532     // chain until we reach the depth limit.
17533     //
17534     // FIXME: The depth check could be made to return the last non-aliasing
17535     // chain we found before we hit a tokenfactor rather than the original
17536     // chain.
17537     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
17538       Aliases.clear();
17539       Aliases.push_back(OriginalChain);
17540       return;
17541     }
17542
17543     // Don't bother if we've been before.
17544     if (!Visited.insert(Chain.getNode()).second)
17545       continue;
17546
17547     switch (Chain.getOpcode()) {
17548     case ISD::EntryToken:
17549       // Entry token is ideal chain operand, but handled in FindBetterChain.
17550       break;
17551
17552     case ISD::LOAD:
17553     case ISD::STORE: {
17554       // Get alias information for Chain.
17555       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
17556           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
17557
17558       // If chain is alias then stop here.
17559       if (!(IsLoad && IsOpLoad) &&
17560           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
17561         Aliases.push_back(Chain);
17562       } else {
17563         // Look further up the chain.
17564         Chains.push_back(Chain.getOperand(0));
17565         ++Depth;
17566       }
17567       break;
17568     }
17569
17570     case ISD::TokenFactor:
17571       // We have to check each of the operands of the token factor for "small"
17572       // token factors, so we queue them up.  Adding the operands to the queue
17573       // (stack) in reverse order maintains the original order and increases the
17574       // likelihood that getNode will find a matching token factor (CSE.)
17575       if (Chain.getNumOperands() > 16) {
17576         Aliases.push_back(Chain);
17577         break;
17578       }
17579       for (unsigned n = Chain.getNumOperands(); n;)
17580         Chains.push_back(Chain.getOperand(--n));
17581       ++Depth;
17582       break;
17583
17584     case ISD::CopyFromReg:
17585       // Forward past CopyFromReg.
17586       Chains.push_back(Chain.getOperand(0));
17587       ++Depth;
17588       break;
17589
17590     default:
17591       // For all other instructions we will just have to take what we can get.
17592       Aliases.push_back(Chain);
17593       break;
17594     }
17595   }
17596 }
17597
17598 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
17599 /// (aliasing node.)
17600 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
17601   if (OptLevel == CodeGenOpt::None)
17602     return OldChain;
17603
17604   // Ops for replacing token factor.
17605   SmallVector<SDValue, 8> Aliases;
17606
17607   // Accumulate all the aliases to this node.
17608   GatherAllAliases(N, OldChain, Aliases);
17609
17610   // If no operands then chain to entry token.
17611   if (Aliases.size() == 0)
17612     return DAG.getEntryNode();
17613
17614   // If a single operand then chain to it.  We don't need to revisit it.
17615   if (Aliases.size() == 1)
17616     return Aliases[0];
17617
17618   // Construct a custom tailored token factor.
17619   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
17620 }
17621
17622 // This function tries to collect a bunch of potentially interesting
17623 // nodes to improve the chains of, all at once. This might seem
17624 // redundant, as this function gets called when visiting every store
17625 // node, so why not let the work be done on each store as it's visited?
17626 //
17627 // I believe this is mainly important because MergeConsecutiveStores
17628 // is unable to deal with merging stores of different sizes, so unless
17629 // we improve the chains of all the potential candidates up-front
17630 // before running MergeConsecutiveStores, it might only see some of
17631 // the nodes that will eventually be candidates, and then not be able
17632 // to go from a partially-merged state to the desired final
17633 // fully-merged state.
17634 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
17635   if (OptLevel == CodeGenOpt::None)
17636     return false;
17637
17638   // This holds the base pointer, index, and the offset in bytes from the base
17639   // pointer.
17640   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
17641
17642   // We must have a base and an offset.
17643   if (!BasePtr.getBase().getNode())
17644     return false;
17645
17646   // Do not handle stores to undef base pointers.
17647   if (BasePtr.getBase().isUndef())
17648     return false;
17649
17650   SmallVector<StoreSDNode *, 8> ChainedStores;
17651   ChainedStores.push_back(St);
17652
17653   // Walk up the chain and look for nodes with offsets from the same
17654   // base pointer. Stop when reaching an instruction with a different kind
17655   // or instruction which has a different base pointer.
17656   StoreSDNode *Index = St;
17657   while (Index) {
17658     // If the chain has more than one use, then we can't reorder the mem ops.
17659     if (Index != St && !SDValue(Index, 0)->hasOneUse())
17660       break;
17661
17662     if (Index->isVolatile() || Index->isIndexed())
17663       break;
17664
17665     // Find the base pointer and offset for this memory node.
17666     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
17667
17668     // Check that the base pointer is the same as the original one.
17669     if (!BasePtr.equalBaseIndex(Ptr, DAG))
17670       break;
17671
17672     // Walk up the chain to find the next store node, ignoring any
17673     // intermediate loads. Any other kind of node will halt the loop.
17674     SDNode *NextInChain = Index->getChain().getNode();
17675     while (true) {
17676       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
17677         // We found a store node. Use it for the next iteration.
17678         if (STn->isVolatile() || STn->isIndexed()) {
17679           Index = nullptr;
17680           break;
17681         }
17682         ChainedStores.push_back(STn);
17683         Index = STn;
17684         break;
17685       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
17686         NextInChain = Ldn->getChain().getNode();
17687         continue;
17688       } else {
17689         Index = nullptr;
17690         break;
17691       }
17692     } // end while
17693   }
17694
17695   // At this point, ChainedStores lists all of the Store nodes
17696   // reachable by iterating up through chain nodes matching the above
17697   // conditions.  For each such store identified, try to find an
17698   // earlier chain to attach the store to which won't violate the
17699   // required ordering.
17700   bool MadeChangeToSt = false;
17701   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
17702
17703   for (StoreSDNode *ChainedStore : ChainedStores) {
17704     SDValue Chain = ChainedStore->getChain();
17705     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
17706
17707     if (Chain != BetterChain) {
17708       if (ChainedStore == St)
17709         MadeChangeToSt = true;
17710       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
17711     }
17712   }
17713
17714   // Do all replacements after finding the replacements to make to avoid making
17715   // the chains more complicated by introducing new TokenFactors.
17716   for (auto Replacement : BetterChains)
17717     replaceStoreChain(Replacement.first, Replacement.second);
17718
17719   return MadeChangeToSt;
17720 }
17721
17722 /// This is the entry point for the file.
17723 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
17724                            CodeGenOpt::Level OptLevel) {
17725   /// This is the main entry point to this class.
17726   DAGCombiner(*this, AA, OptLevel).Run(Level);
17727 }