lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

   1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SelectionDAG::LegalizeVectors method.
  11 //
  12 // The vector legalizer looks for vector operations which might need to be
  13 // scalarized and legalizes them. This is a separate step from Legalize because
  14 // scalarizing can introduce illegal types.  For example, suppose we have an
  15 // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
  16 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
  17 // operation, which introduces nodes with the illegal type i64 which must be
  18 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
  19 // the operation must be unrolled, which introduces nodes with the illegal
  20 // type i8 which must be promoted.
  21 //
  22 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
  23 // or operations that happen to take a vector which are custom-lowered;
  24 // the legalization for such operations never produces nodes
  25 // with illegal types, so it's okay to put off legalizing them until
  26 // SelectionDAG::Legalize runs.
  27 //
  28 //===----------------------------------------------------------------------===//
  29
  30 #include "llvm/ADT/APInt.h"
  31 #include "llvm/ADT/DenseMap.h"
  32 #include "llvm/ADT/SmallVector.h"
  33 #include "llvm/CodeGen/ISDOpcodes.h"
  34 #include "llvm/CodeGen/MachineMemOperand.h"
  35 #include "llvm/CodeGen/SelectionDAG.h"
  36 #include "llvm/CodeGen/SelectionDAGNodes.h"
  37 #include "llvm/CodeGen/TargetLowering.h"
  38 #include "llvm/CodeGen/ValueTypes.h"
  39 #include "llvm/IR/DataLayout.h"
  40 #include "llvm/Support/Casting.h"
  41 #include "llvm/Support/Compiler.h"
  42 #include "llvm/Support/ErrorHandling.h"
  43 #include "llvm/Support/MachineValueType.h"
  44 #include "llvm/Support/MathExtras.h"
  45 #include <cassert>
  46 #include <cstdint>
  47 #include <iterator>
  48 #include <utility>
  49
  50 using namespace llvm;
  51
  52 #define DEBUG_TYPE "legalizevectorops"
  53
  54 namespace {
  55
  56 class VectorLegalizer {
  57   SelectionDAG& DAG;
  58   const TargetLowering &TLI;
  59   bool Changed = false; // Keep track of whether anything changed
  60
  61   /// For nodes that are of legal width, and that have more than one use, this
  62   /// map indicates what regularized operand to use.  This allows us to avoid
  63   /// legalizing the same thing more than once.
  64   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
  65
  66   /// Adds a node to the translation cache.
  67   void AddLegalizedOperand(SDValue From, SDValue To) {
  68     LegalizedNodes.insert(std::make_pair(From, To));
  69     // If someone requests legalization of the new node, return itself.
  70     if (From != To)
  71       LegalizedNodes.insert(std::make_pair(To, To));
  72   }
  73
  74   /// Legalizes the given node.
  75   SDValue LegalizeOp(SDValue Op);
  76
  77   /// Assuming the node is legal, "legalize" the results.
  78   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
  79
  80   /// Implements unrolling a VSETCC.
  81   SDValue UnrollVSETCC(SDValue Op);
  82
  83   /// Implement expand-based legalization of vector operations.
  84   ///
  85   /// This is just a high-level routine to dispatch to specific code paths for
  86   /// operations to legalize them.
  87   SDValue Expand(SDValue Op);
  88
  89   /// Implements expansion for FNEG; falls back to UnrollVectorOp if
  90   /// FSUB isn't legal.
  91   ///
  92   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
  93   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
  94   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
  95
  96   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
  97   SDValue ExpandSEXTINREG(SDValue Op);
  98
  99   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
 100   ///
 101   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
 102   /// type. The contents of the bits in the extended part of each element are
 103   /// undef.
 104   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
 105
 106   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
 107   ///
 108   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
 109   /// type, then shifts left and arithmetic shifts right to introduce a sign
 110   /// extension.
 111   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
 112
 113   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
 114   ///
 115   /// Shuffles the low lanes of the operand into place and blends zeros into
 116   /// the remaining lanes, finally bitcasting to the proper type.
 117   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
 118
 119   /// Expand bswap of vectors into a shuffle if legal.
 120   SDValue ExpandBSWAP(SDValue Op);
 121
 122   /// Implement vselect in terms of XOR, AND, OR when blend is not
 123   /// supported by the target.
 124   SDValue ExpandVSELECT(SDValue Op);
 125   SDValue ExpandSELECT(SDValue Op);
 126   SDValue ExpandLoad(SDValue Op);
 127   SDValue ExpandStore(SDValue Op);
 128   SDValue ExpandFNEG(SDValue Op);
 129   SDValue ExpandFSUB(SDValue Op);
 130   SDValue ExpandBITREVERSE(SDValue Op);
 131   SDValue ExpandCTLZ(SDValue Op);
 132   SDValue ExpandCTTZ(SDValue Op);
 133   SDValue ExpandStrictFPOp(SDValue Op);
 134
 135   /// Implements vector promotion.
 136   ///
 137   /// This is essentially just bitcasting the operands to a different type and
 138   /// bitcasting the result back to the original type.
 139   SDValue Promote(SDValue Op);
 140
 141   /// Implements [SU]INT_TO_FP vector promotion.
 142   ///
 143   /// This is a [zs]ext of the input operand to a larger integer type.
 144   SDValue PromoteINT_TO_FP(SDValue Op);
 145
 146   /// Implements FP_TO_[SU]INT vector promotion of the result type.
 147   ///
 148   /// It is promoted to a larger integer type.  The result is then
 149   /// truncated back to the original type.
 150   SDValue PromoteFP_TO_INT(SDValue Op);
 151
 152 public:
 153   VectorLegalizer(SelectionDAG& dag) :
 154       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
 155
 156   /// Begin legalizer the vector operations in the DAG.
 157   bool Run();
 158 };
 159
 160 } // end anonymous namespace
 161
 162 bool VectorLegalizer::Run() {
 163   // Before we start legalizing vector nodes, check if there are any vectors.
 164   bool HasVectors = false;
 165   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
 166        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
 167     // Check if the values of the nodes contain vectors. We don't need to check
 168     // the operands because we are going to check their values at some point.
 169     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
 170          J != E; ++J)
 171       HasVectors |= J->isVector();
 172
 173     // If we found a vector node we can start the legalization.
 174     if (HasVectors)
 175       break;
 176   }
 177
 178   // If this basic block has no vectors then no need to legalize vectors.
 179   if (!HasVectors)
 180     return false;
 181
 182   // The legalize process is inherently a bottom-up recursive process (users
 183   // legalize their uses before themselves).  Given infinite stack space, we
 184   // could just start legalizing on the root and traverse the whole graph.  In
 185   // practice however, this causes us to run out of stack space on large basic
 186   // blocks.  To avoid this problem, compute an ordering of the nodes where each
 187   // node is only legalized after all of its operands are legalized.
 188   DAG.AssignTopologicalOrder();
 189   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
 190        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
 191     LegalizeOp(SDValue(&*I, 0));
 192
 193   // Finally, it's possible the root changed.  Get the new root.
 194   SDValue OldRoot = DAG.getRoot();
 195   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
 196   DAG.setRoot(LegalizedNodes[OldRoot]);
 197
 198   LegalizedNodes.clear();
 199
 200   // Remove dead nodes now.
 201   DAG.RemoveDeadNodes();
 202
 203   return Changed;
 204 }
 205
 206 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
 207   // Generic legalization: just pass the operand through.
 208   for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
 209     AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
 210   return Result.getValue(Op.getResNo());
 211 }
 212
 213 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
 214   // Note that LegalizeOp may be reentered even from single-use nodes, which
 215   // means that we always must cache transformed nodes.
 216   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
 217   if (I != LegalizedNodes.end()) return I->second;
 218
 219   SDNode* Node = Op.getNode();
 220
 221   // Legalize the operands
 222   SmallVector<SDValue, 8> Ops;
 223   for (const SDValue &Op : Node->op_values())
 224     Ops.push_back(LegalizeOp(Op));
 225
 226   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
 227                            Op.getResNo());
 228
 229   if (Op.getOpcode() == ISD::LOAD) {
 230     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
 231     ISD::LoadExtType ExtType = LD->getExtensionType();
 232     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
 233       LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
 234                  Node->dump(&DAG));
 235       switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
 236                                    LD->getMemoryVT())) {
 237       default: llvm_unreachable("This action is not supported yet!");
 238       case TargetLowering::Legal:
 239         return TranslateLegalizeResults(Op, Result);
 240       case TargetLowering::Custom:
 241         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
 242           assert(Lowered->getNumValues() == Op->getNumValues() &&
 243                  "Unexpected number of results");
 244           Changed = Lowered != Result;
 245           return TranslateLegalizeResults(Op, Lowered);
 246         }
 247         LLVM_FALLTHROUGH;
 248       case TargetLowering::Expand:
 249         Changed = true;
 250         return LegalizeOp(ExpandLoad(Op));
 251       }
 252     }
 253   } else if (Op.getOpcode() == ISD::STORE) {
 254     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
 255     EVT StVT = ST->getMemoryVT();
 256     MVT ValVT = ST->getValue().getSimpleValueType();
 257     if (StVT.isVector() && ST->isTruncatingStore()) {
 258       LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
 259                  Node->dump(&DAG));
 260       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
 261       default: llvm_unreachable("This action is not supported yet!");
 262       case TargetLowering::Legal:
 263         return TranslateLegalizeResults(Op, Result);
 264       case TargetLowering::Custom: {
 265         SDValue Lowered = TLI.LowerOperation(Result, DAG);
 266         Changed = Lowered != Result;
 267         return TranslateLegalizeResults(Op, Lowered);
 268       }
 269       case TargetLowering::Expand:
 270         Changed = true;
 271         return LegalizeOp(ExpandStore(Op));
 272       }
 273     }
 274   }
 275
 276   bool HasVectorValue = false;
 277   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
 278        J != E;
 279        ++J)
 280     HasVectorValue |= J->isVector();
 281   if (!HasVectorValue)
 282     return TranslateLegalizeResults(Op, Result);
 283
 284   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
 285   switch (Op.getOpcode()) {
 286   default:
 287     return TranslateLegalizeResults(Op, Result);
 288   case ISD::STRICT_FADD:
 289   case ISD::STRICT_FSUB:
 290   case ISD::STRICT_FMUL:
 291   case ISD::STRICT_FDIV:
 292   case ISD::STRICT_FREM:
 293   case ISD::STRICT_FSQRT:
 294   case ISD::STRICT_FMA:
 295   case ISD::STRICT_FPOW:
 296   case ISD::STRICT_FPOWI:
 297   case ISD::STRICT_FSIN:
 298   case ISD::STRICT_FCOS:
 299   case ISD::STRICT_FEXP:
 300   case ISD::STRICT_FEXP2:
 301   case ISD::STRICT_FLOG:
 302   case ISD::STRICT_FLOG10:
 303   case ISD::STRICT_FLOG2:
 304   case ISD::STRICT_FRINT:
 305   case ISD::STRICT_FNEARBYINT:
 306     // These pseudo-ops get legalized as if they were their non-strict
 307     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
 308     // is also legal, but if ISD::FSQRT requires expansion then so does
 309     // ISD::STRICT_FSQRT.
 310     Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
 311                                             Node->getValueType(0));
 312     break;
 313   case ISD::ADD:
 314   case ISD::SUB:
 315   case ISD::MUL:
 316   case ISD::SDIV:
 317   case ISD::UDIV:
 318   case ISD::SREM:
 319   case ISD::UREM:
 320   case ISD::SDIVREM:
 321   case ISD::UDIVREM:
 322   case ISD::FADD:
 323   case ISD::FSUB:
 324   case ISD::FMUL:
 325   case ISD::FDIV:
 326   case ISD::FREM:
 327   case ISD::AND:
 328   case ISD::OR:
 329   case ISD::XOR:
 330   case ISD::SHL:
 331   case ISD::SRA:
 332   case ISD::SRL:
 333   case ISD::ROTL:
 334   case ISD::ROTR:
 335   case ISD::BSWAP:
 336   case ISD::BITREVERSE:
 337   case ISD::CTLZ:
 338   case ISD::CTTZ:
 339   case ISD::CTLZ_ZERO_UNDEF:
 340   case ISD::CTTZ_ZERO_UNDEF:
 341   case ISD::CTPOP:
 342   case ISD::SELECT:
 343   case ISD::VSELECT:
 344   case ISD::SELECT_CC:
 345   case ISD::SETCC:
 346   case ISD::ZERO_EXTEND:
 347   case ISD::ANY_EXTEND:
 348   case ISD::TRUNCATE:
 349   case ISD::SIGN_EXTEND:
 350   case ISD::FP_TO_SINT:
 351   case ISD::FP_TO_UINT:
 352   case ISD::FNEG:
 353   case ISD::FABS:
 354   case ISD::FMINNUM:
 355   case ISD::FMAXNUM:
 356   case ISD::FMINNAN:
 357   case ISD::FMAXNAN:
 358   case ISD::FCOPYSIGN:
 359   case ISD::FSQRT:
 360   case ISD::FSIN:
 361   case ISD::FCOS:
 362   case ISD::FPOWI:
 363   case ISD::FPOW:
 364   case ISD::FLOG:
 365   case ISD::FLOG2:
 366   case ISD::FLOG10:
 367   case ISD::FEXP:
 368   case ISD::FEXP2:
 369   case ISD::FCEIL:
 370   case ISD::FTRUNC:
 371   case ISD::FRINT:
 372   case ISD::FNEARBYINT:
 373   case ISD::FROUND:
 374   case ISD::FFLOOR:
 375   case ISD::FP_ROUND:
 376   case ISD::FP_EXTEND:
 377   case ISD::FMA:
 378   case ISD::SIGN_EXTEND_INREG:
 379   case ISD::ANY_EXTEND_VECTOR_INREG:
 380   case ISD::SIGN_EXTEND_VECTOR_INREG:
 381   case ISD::ZERO_EXTEND_VECTOR_INREG:
 382   case ISD::SMIN:
 383   case ISD::SMAX:
 384   case ISD::UMIN:
 385   case ISD::UMAX:
 386   case ISD::SMUL_LOHI:
 387   case ISD::UMUL_LOHI:
 388   case ISD::FCANONICALIZE:
 389   case ISD::SADDSAT:
 390     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
 391     break;
 392   case ISD::FP_ROUND_INREG:
 393     Action = TLI.getOperationAction(Node->getOpcode(),
 394                cast<VTSDNode>(Node->getOperand(1))->getVT());
 395     break;
 396   case ISD::SINT_TO_FP:
 397   case ISD::UINT_TO_FP:
 398     Action = TLI.getOperationAction(Node->getOpcode(),
 399                                     Node->getOperand(0).getValueType());
 400     break;
 401   }
 402
 403   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
 404
 405   switch (Action) {
 406   default: llvm_unreachable("This action is not supported yet!");
 407   case TargetLowering::Promote:
 408     Result = Promote(Op);
 409     Changed = true;
 410     break;
 411   case TargetLowering::Legal:
 412     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
 413     break;
 414   case TargetLowering::Custom: {
 415     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
 416     if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
 417       LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
 418       Result = Tmp1;
 419       break;
 420     }
 421     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
 422     LLVM_FALLTHROUGH;
 423   }
 424   case TargetLowering::Expand:
 425     Result = Expand(Op);
 426   }
 427
 428   // Make sure that the generated code is itself legal.
 429   if (Result != Op) {
 430     Result = LegalizeOp(Result);
 431     Changed = true;
 432   }
 433
 434   // Note that LegalizeOp may be reentered even from single-use nodes, which
 435   // means that we always must cache transformed nodes.
 436   AddLegalizedOperand(Op, Result);
 437   return Result;
 438 }
 439
 440 SDValue VectorLegalizer::Promote(SDValue Op) {
 441   // For a few operations there is a specific concept for promotion based on
 442   // the operand's type.
 443   switch (Op.getOpcode()) {
 444   case ISD::SINT_TO_FP:
 445   case ISD::UINT_TO_FP:
 446     // "Promote" the operation by extending the operand.
 447     return PromoteINT_TO_FP(Op);
 448   case ISD::FP_TO_UINT:
 449   case ISD::FP_TO_SINT:
 450     // Promote the operation by extending the operand.
 451     return PromoteFP_TO_INT(Op);
 452   }
 453
 454   // There are currently two cases of vector promotion:
 455   // 1) Bitcasting a vector of integers to a different type to a vector of the
 456   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
 457   // 2) Extending a vector of floats to a vector of the same number of larger
 458   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
 459   MVT VT = Op.getSimpleValueType();
 460   assert(Op.getNode()->getNumValues() == 1 &&
 461          "Can't promote a vector with multiple results!");
 462   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
 463   SDLoc dl(Op);
 464   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 465
 466   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
 467     if (Op.getOperand(j).getValueType().isVector())
 468       if (Op.getOperand(j)
 469               .getValueType()
 470               .getVectorElementType()
 471               .isFloatingPoint() &&
 472           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
 473         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
 474       else
 475         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
 476     else
 477       Operands[j] = Op.getOperand(j);
 478   }
 479
 480   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
 481   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
 482       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
 483        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
 484     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
 485   else
 486     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 487 }
 488
 489 SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
 490   // INT_TO_FP operations may require the input operand be promoted even
 491   // when the type is otherwise legal.
 492   MVT VT = Op.getOperand(0).getSimpleValueType();
 493   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
 494   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
 495          "Vectors have different number of elements!");
 496
 497   SDLoc dl(Op);
 498   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 499
 500   unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
 501     ISD::SIGN_EXTEND;
 502   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
 503     if (Op.getOperand(j).getValueType().isVector())
 504       Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
 505     else
 506       Operands[j] = Op.getOperand(j);
 507   }
 508
 509   return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
 510 }
 511
 512 // For FP_TO_INT we promote the result type to a vector type with wider
 513 // elements and then truncate the result.  This is different from the default
 514 // PromoteVector which uses bitcast to promote thus assumning that the
 515 // promoted vector type has the same overall size.
 516 SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
 517   MVT VT = Op.getSimpleValueType();
 518   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
 519   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
 520          "Vectors have different number of elements!");
 521
 522   unsigned NewOpc = Op->getOpcode();
 523   // Change FP_TO_UINT to FP_TO_SINT if possible.
 524   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
 525   if (NewOpc == ISD::FP_TO_UINT &&
 526       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
 527     NewOpc = ISD::FP_TO_SINT;
 528
 529   SDLoc dl(Op);
 530   SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
 531
 532   // Assert that the converted value fits in the original type.  If it doesn't
 533   // (eg: because the value being converted is too big), then the result of the
 534   // original operation was undefined anyway, so the assert is still correct.
 535   Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
 536                                                             : ISD::AssertSext,
 537                          dl, NVT, Promoted,
 538                          DAG.getValueType(VT.getScalarType()));
 539   return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
 540 }
 541
 542 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
 543   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
 544
 545   EVT SrcVT = LD->getMemoryVT();
 546   EVT SrcEltVT = SrcVT.getScalarType();
 547   unsigned NumElem = SrcVT.getVectorNumElements();
 548
 549   SDValue NewChain;
 550   SDValue Value;
 551   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
 552     SDLoc dl(Op);
 553
 554     SmallVector<SDValue, 8> Vals;
 555     SmallVector<SDValue, 8> LoadChains;
 556
 557     EVT DstEltVT = LD->getValueType(0).getScalarType();
 558     SDValue Chain = LD->getChain();
 559     SDValue BasePTR = LD->getBasePtr();
 560     ISD::LoadExtType ExtType = LD->getExtensionType();
 561
 562     // When elements in a vector is not byte-addressable, we cannot directly
 563     // load each element by advancing pointer, which could only address bytes.
 564     // Instead, we load all significant words, mask bits off, and concatenate
 565     // them to form each element. Finally, they are extended to destination
 566     // scalar type to build the destination vector.
 567     EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
 568
 569     assert(WideVT.isRound() &&
 570            "Could not handle the sophisticated case when the widest integer is"
 571            " not power of 2.");
 572     assert(WideVT.bitsGE(SrcEltVT) &&
 573            "Type is not legalized?");
 574
 575     unsigned WideBytes = WideVT.getStoreSize();
 576     unsigned Offset = 0;
 577     unsigned RemainingBytes = SrcVT.getStoreSize();
 578     SmallVector<SDValue, 8> LoadVals;
 579     while (RemainingBytes > 0) {
 580       SDValue ScalarLoad;
 581       unsigned LoadBytes = WideBytes;
 582
 583       if (RemainingBytes >= LoadBytes) {
 584         ScalarLoad =
 585             DAG.getLoad(WideVT, dl, Chain, BasePTR,
 586                         LD->getPointerInfo().getWithOffset(Offset),
 587                         MinAlign(LD->getAlignment(), Offset),
 588                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
 589       } else {
 590         EVT LoadVT = WideVT;
 591         while (RemainingBytes < LoadBytes) {
 592           LoadBytes >>= 1; // Reduce the load size by half.
 593           LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
 594         }
 595         ScalarLoad =
 596             DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
 597                            LD->getPointerInfo().getWithOffset(Offset), LoadVT,
 598                            MinAlign(LD->getAlignment(), Offset),
 599                            LD->getMemOperand()->getFlags(), LD->getAAInfo());
 600       }
 601
 602       RemainingBytes -= LoadBytes;
 603       Offset += LoadBytes;
 604
 605       BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
 606
 607       LoadVals.push_back(ScalarLoad.getValue(0));
 608       LoadChains.push_back(ScalarLoad.getValue(1));
 609     }
 610
 611     // Extract bits, pack and extend/trunc them into destination type.
 612     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
 613     SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
 614
 615     unsigned BitOffset = 0;
 616     unsigned WideIdx = 0;
 617     unsigned WideBits = WideVT.getSizeInBits();
 618
 619     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
 620       SDValue Lo, Hi, ShAmt;
 621
 622       if (BitOffset < WideBits) {
 623         ShAmt = DAG.getConstant(
 624             BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
 625         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
 626         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
 627       }
 628
 629       BitOffset += SrcEltBits;
 630       if (BitOffset >= WideBits) {
 631         WideIdx++;
 632         BitOffset -= WideBits;
 633         if (BitOffset > 0) {
 634           ShAmt = DAG.getConstant(
 635               SrcEltBits - BitOffset, dl,
 636               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
 637           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
 638           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
 639         }
 640       }
 641
 642       if (Hi.getNode())
 643         Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
 644
 645       switch (ExtType) {
 646       default: llvm_unreachable("Unknown extended-load op!");
 647       case ISD::EXTLOAD:
 648         Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
 649         break;
 650       case ISD::ZEXTLOAD:
 651         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
 652         break;
 653       case ISD::SEXTLOAD:
 654         ShAmt =
 655             DAG.getConstant(WideBits - SrcEltBits, dl,
 656                             TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
 657         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
 658         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
 659         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
 660         break;
 661       }
 662       Vals.push_back(Lo);
 663     }
 664
 665     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
 666     Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
 667   } else {
 668     SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
 669     // Skip past MERGE_VALUE node if known.
 670     if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
 671       NewChain = Scalarized.getOperand(1);
 672       Value = Scalarized.getOperand(0);
 673     } else {
 674       NewChain = Scalarized.getValue(1);
 675       Value = Scalarized.getValue(0);
 676     }
 677   }
 678
 679   AddLegalizedOperand(Op.getValue(0), Value);
 680   AddLegalizedOperand(Op.getValue(1), NewChain);
 681
 682   return (Op.getResNo() ? NewChain : Value);
 683 }
 684
 685 SDValue VectorLegalizer::ExpandStore(SDValue Op) {
 686   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
 687   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
 688   AddLegalizedOperand(Op, TF);
 689   return TF;
 690 }
 691
 692 SDValue VectorLegalizer::Expand(SDValue Op) {
 693   switch (Op->getOpcode()) {
 694   case ISD::SIGN_EXTEND_INREG:
 695     return ExpandSEXTINREG(Op);
 696   case ISD::ANY_EXTEND_VECTOR_INREG:
 697     return ExpandANY_EXTEND_VECTOR_INREG(Op);
 698   case ISD::SIGN_EXTEND_VECTOR_INREG:
 699     return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
 700   case ISD::ZERO_EXTEND_VECTOR_INREG:
 701     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
 702   case ISD::BSWAP:
 703     return ExpandBSWAP(Op);
 704   case ISD::VSELECT:
 705     return ExpandVSELECT(Op);
 706   case ISD::SELECT:
 707     return ExpandSELECT(Op);
 708   case ISD::UINT_TO_FP:
 709     return ExpandUINT_TO_FLOAT(Op);
 710   case ISD::FNEG:
 711     return ExpandFNEG(Op);
 712   case ISD::FSUB:
 713     return ExpandFSUB(Op);
 714   case ISD::SETCC:
 715     return UnrollVSETCC(Op);
 716   case ISD::BITREVERSE:
 717     return ExpandBITREVERSE(Op);
 718   case ISD::CTLZ:
 719   case ISD::CTLZ_ZERO_UNDEF:
 720     return ExpandCTLZ(Op);
 721   case ISD::CTTZ:
 722   case ISD::CTTZ_ZERO_UNDEF:
 723     return ExpandCTTZ(Op);
 724   case ISD::STRICT_FADD:
 725   case ISD::STRICT_FSUB:
 726   case ISD::STRICT_FMUL:
 727   case ISD::STRICT_FDIV:
 728   case ISD::STRICT_FREM:
 729   case ISD::STRICT_FSQRT:
 730   case ISD::STRICT_FMA:
 731   case ISD::STRICT_FPOW:
 732   case ISD::STRICT_FPOWI:
 733   case ISD::STRICT_FSIN:
 734   case ISD::STRICT_FCOS:
 735   case ISD::STRICT_FEXP:
 736   case ISD::STRICT_FEXP2:
 737   case ISD::STRICT_FLOG:
 738   case ISD::STRICT_FLOG10:
 739   case ISD::STRICT_FLOG2:
 740   case ISD::STRICT_FRINT:
 741   case ISD::STRICT_FNEARBYINT:
 742     return ExpandStrictFPOp(Op);
 743   default:
 744     return DAG.UnrollVectorOp(Op.getNode());
 745   }
 746 }
 747
 748 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
 749   // Lower a select instruction where the condition is a scalar and the
 750   // operands are vectors. Lower this select to VSELECT and implement it
 751   // using XOR AND OR. The selector bit is broadcasted.
 752   EVT VT = Op.getValueType();
 753   SDLoc DL(Op);
 754
 755   SDValue Mask = Op.getOperand(0);
 756   SDValue Op1 = Op.getOperand(1);
 757   SDValue Op2 = Op.getOperand(2);
 758
 759   assert(VT.isVector() && !Mask.getValueType().isVector()
 760          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
 761
 762   // If we can't even use the basic vector operations of
 763   // AND,OR,XOR, we will have to scalarize the op.
 764   // Notice that the operation may be 'promoted' which means that it is
 765   // 'bitcasted' to another type which is handled.
 766   // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
 767   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
 768       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
 769       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
 770       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
 771     return DAG.UnrollVectorOp(Op.getNode());
 772
 773   // Generate a mask operand.
 774   EVT MaskTy = VT.changeVectorElementTypeToInteger();
 775
 776   // What is the size of each element in the vector mask.
 777   EVT BitTy = MaskTy.getScalarType();
 778
 779   Mask = DAG.getSelect(DL, BitTy, Mask,
 780           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
 781                           BitTy),
 782           DAG.getConstant(0, DL, BitTy));
 783
 784   // Broadcast the mask so that the entire vector is all-one or all zero.
 785   Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
 786
 787   // Bitcast the operands to be the same type as the mask.
 788   // This is needed when we select between FP types because
 789   // the mask is a vector of integers.
 790   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
 791   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
 792
 793   SDValue AllOnes = DAG.getConstant(
 794             APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
 795   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
 796
 797   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
 798   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
 799   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
 800   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
 801 }
 802
 803 SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
 804   EVT VT = Op.getValueType();
 805
 806   // Make sure that the SRA and SHL instructions are available.
 807   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
 808       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
 809     return DAG.UnrollVectorOp(Op.getNode());
 810
 811   SDLoc DL(Op);
 812   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
 813
 814   unsigned BW = VT.getScalarSizeInBits();
 815   unsigned OrigBW = OrigTy.getScalarSizeInBits();
 816   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
 817
 818   Op = Op.getOperand(0);
 819   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
 820   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
 821 }
 822
 823 // Generically expand a vector anyext in register to a shuffle of the relevant
 824 // lanes into the appropriate locations, with other lanes left undef.
 825 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
 826   SDLoc DL(Op);
 827   EVT VT = Op.getValueType();
 828   int NumElements = VT.getVectorNumElements();
 829   SDValue Src = Op.getOperand(0);
 830   EVT SrcVT = Src.getValueType();
 831   int NumSrcElements = SrcVT.getVectorNumElements();
 832
 833   // Build a base mask of undef shuffles.
 834   SmallVector<int, 16> ShuffleMask;
 835   ShuffleMask.resize(NumSrcElements, -1);
 836
 837   // Place the extended lanes into the correct locations.
 838   int ExtLaneScale = NumSrcElements / NumElements;
 839   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
 840   for (int i = 0; i < NumElements; ++i)
 841     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
 842
 843   return DAG.getNode(
 844       ISD::BITCAST, DL, VT,
 845       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
 846 }
 847
 848 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
 849   SDLoc DL(Op);
 850   EVT VT = Op.getValueType();
 851   SDValue Src = Op.getOperand(0);
 852   EVT SrcVT = Src.getValueType();
 853
 854   // First build an any-extend node which can be legalized above when we
 855   // recurse through it.
 856   Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
 857
 858   // Now we need sign extend. Do this by shifting the elements. Even if these
 859   // aren't legal operations, they have a better chance of being legalized
 860   // without full scalarization than the sign extension does.
 861   unsigned EltWidth = VT.getScalarSizeInBits();
 862   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
 863   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
 864   return DAG.getNode(ISD::SRA, DL, VT,
 865                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
 866                      ShiftAmount);
 867 }
 868
 869 // Generically expand a vector zext in register to a shuffle of the relevant
 870 // lanes into the appropriate locations, a blend of zero into the high bits,
 871 // and a bitcast to the wider element type.
 872 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
 873   SDLoc DL(Op);
 874   EVT VT = Op.getValueType();
 875   int NumElements = VT.getVectorNumElements();
 876   SDValue Src = Op.getOperand(0);
 877   EVT SrcVT = Src.getValueType();
 878   int NumSrcElements = SrcVT.getVectorNumElements();
 879
 880   // Build up a zero vector to blend into this one.
 881   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
 882
 883   // Shuffle the incoming lanes into the correct position, and pull all other
 884   // lanes from the zero vector.
 885   SmallVector<int, 16> ShuffleMask;
 886   ShuffleMask.reserve(NumSrcElements);
 887   for (int i = 0; i < NumSrcElements; ++i)
 888     ShuffleMask.push_back(i);
 889
 890   int ExtLaneScale = NumSrcElements / NumElements;
 891   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
 892   for (int i = 0; i < NumElements; ++i)
 893     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
 894
 895   return DAG.getNode(ISD::BITCAST, DL, VT,
 896                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
 897 }
 898
 899 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
 900   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
 901   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
 902     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
 903       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
 904 }
 905
 906 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
 907   EVT VT = Op.getValueType();
 908
 909   // Generate a byte wise shuffle mask for the BSWAP.
 910   SmallVector<int, 16> ShuffleMask;
 911   createBSWAPShuffleMask(VT, ShuffleMask);
 912   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
 913
 914   // Only emit a shuffle if the mask is legal.
 915   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
 916     return DAG.UnrollVectorOp(Op.getNode());
 917
 918   SDLoc DL(Op);
 919   Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
 920   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
 921   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
 922 }
 923
 924 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
 925   EVT VT = Op.getValueType();
 926
 927   // If we have the scalar operation, it's probably cheaper to unroll it.
 928   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
 929     return DAG.UnrollVectorOp(Op.getNode());
 930
 931   // If the vector element width is a whole number of bytes, test if its legal
 932   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
 933   // vector. This greatly reduces the number of bit shifts necessary.
 934   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
 935   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
 936     SmallVector<int, 16> BSWAPMask;
 937     createBSWAPShuffleMask(VT, BSWAPMask);
 938
 939     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
 940     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
 941         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
 942          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
 943           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
 944           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
 945           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
 946       SDLoc DL(Op);
 947       Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
 948       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
 949                                 BSWAPMask);
 950       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
 951       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
 952     }
 953   }
 954
 955   // If we have the appropriate vector bit operations, it is better to use them
 956   // than unrolling and expanding each component.
 957   if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
 958       !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
 959       !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
 960       !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
 961     return DAG.UnrollVectorOp(Op.getNode());
 962
 963   // Let LegalizeDAG handle this later.
 964   return Op;
 965 }
 966
 967 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
 968   // Implement VSELECT in terms of XOR, AND, OR
 969   // on platforms which do not support blend natively.
 970   SDLoc DL(Op);
 971
 972   SDValue Mask = Op.getOperand(0);
 973   SDValue Op1 = Op.getOperand(1);
 974   SDValue Op2 = Op.getOperand(2);
 975
 976   EVT VT = Mask.getValueType();
 977
 978   // If we can't even use the basic vector operations of
 979   // AND,OR,XOR, we will have to scalarize the op.
 980   // Notice that the operation may be 'promoted' which means that it is
 981   // 'bitcasted' to another type which is handled.
 982   // This operation also isn't safe with AND, OR, XOR when the boolean
 983   // type is 0/1 as we need an all ones vector constant to mask with.
 984   // FIXME: Sign extend 1 to all ones if thats legal on the target.
 985   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
 986       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
 987       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
 988       TLI.getBooleanContents(Op1.getValueType()) !=
 989           TargetLowering::ZeroOrNegativeOneBooleanContent)
 990     return DAG.UnrollVectorOp(Op.getNode());
 991
 992   // If the mask and the type are different sizes, unroll the vector op. This
 993   // can occur when getSetCCResultType returns something that is different in
 994   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
 995   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
 996     return DAG.UnrollVectorOp(Op.getNode());
 997
 998   // Bitcast the operands to be the same type as the mask.
 999   // This is needed when we select between FP types because
1000   // the mask is a vector of integers.
1001   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1002   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1003
1004   SDValue AllOnes = DAG.getConstant(
1005     APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
1006   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
1007
1008   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1009   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1010   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1011   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
1012 }
1013
1014 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
1015   EVT VT = Op.getOperand(0).getValueType();
1016   SDLoc DL(Op);
1017
1018   // Make sure that the SINT_TO_FP and SRL instructions are available.
1019   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
1020       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
1021     return DAG.UnrollVectorOp(Op.getNode());
1022
1023   unsigned BW = VT.getScalarSizeInBits();
1024   assert((BW == 64 || BW == 32) &&
1025          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1026
1027   SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1028
1029   // Constants to clear the upper part of the word.
1030   // Notice that we can also use SHL+SHR, but using a constant is slightly
1031   // faster on x86.
1032   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1033   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1034
1035   // Two to the power of half-word-size.
1036   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
1037
1038   // Clear upper part of LO, lower HI
1039   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
1040   SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
1041
1042   // Convert hi and lo to floats
1043   // Convert the hi part back to the upper values
1044   // TODO: Can any fast-math-flags be set on these nodes?
1045   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
1046           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
1047   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
1048
1049   // Add the two halves
1050   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
1051 }
1052
1053 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
1054   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
1055     SDLoc DL(Op);
1056     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
1057     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1058     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
1059                        Zero, Op.getOperand(0));
1060   }
1061   return DAG.UnrollVectorOp(Op.getNode());
1062 }
1063
1064 SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
1065   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1066   // we can defer this to operation legalization where it will be lowered as
1067   // a+(-b).
1068   EVT VT = Op.getValueType();
1069   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1070       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1071     return Op; // Defer to LegalizeDAG
1072
1073   return DAG.UnrollVectorOp(Op.getNode());
1074 }
1075
1076 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
1077   EVT VT = Op.getValueType();
1078   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1079
1080   // If the non-ZERO_UNDEF version is supported we can use that instead.
1081   if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
1082       TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
1083     SDLoc DL(Op);
1084     return DAG.getNode(ISD::CTLZ, DL, VT, Op.getOperand(0));
1085   }
1086
1087   // If we have the appropriate vector bit operations, it is better to use them
1088   // than unrolling and expanding each component.
1089   if (isPowerOf2_32(NumBitsPerElt) &&
1090       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
1091       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1092       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1093     return Op;
1094
1095   // Otherwise go ahead and unroll.
1096   return DAG.UnrollVectorOp(Op.getNode());
1097 }
1098
1099 SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
1100   EVT VT = Op.getValueType();
1101   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1102
1103   // If the non-ZERO_UNDEF version is supported we can use that instead.
1104   if (TLI.isOperationLegalOrCustom(ISD::CTTZ, VT)) {
1105     SDLoc DL(Op);
1106     return DAG.getNode(ISD::CTTZ, DL, VT, Op.getOperand(0));
1107   }
1108
1109   // If we have the appropriate vector bit operations, it is better to use them
1110   // than unrolling and expanding each component.
1111   if (isPowerOf2_32(NumBitsPerElt) &&
1112       (TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) ||
1113        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) &&
1114       TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
1115       TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1116       TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT))
1117     return Op;
1118
1119   // Otherwise go ahead and unroll.
1120   return DAG.UnrollVectorOp(Op.getNode());
1121 }
1122
1123 SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
1124   EVT VT = Op.getValueType();
1125   EVT EltVT = VT.getVectorElementType();
1126   unsigned NumElems = VT.getVectorNumElements();
1127   unsigned NumOpers = Op.getNumOperands();
1128   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1129   EVT ValueVTs[] = {EltVT, MVT::Other};
1130   SDValue Chain = Op.getOperand(0);
1131   SDLoc dl(Op);
1132
1133   SmallVector<SDValue, 32> OpValues;
1134   SmallVector<SDValue, 32> OpChains;
1135   for (unsigned i = 0; i < NumElems; ++i) {
1136     SmallVector<SDValue, 4> Opers;
1137     SDValue Idx = DAG.getConstant(i, dl,
1138                                   TLI.getVectorIdxTy(DAG.getDataLayout()));
1139
1140     // The Chain is the first operand.
1141     Opers.push_back(Chain);
1142
1143     // Now process the remaining operands.
1144     for (unsigned j = 1; j < NumOpers; ++j) {
1145       SDValue Oper = Op.getOperand(j);
1146       EVT OperVT = Oper.getValueType();
1147
1148       if (OperVT.isVector())
1149         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1150                            EltVT, Oper, Idx);
1151
1152       Opers.push_back(Oper);
1153     }
1154
1155     SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
1156
1157     OpValues.push_back(ScalarOp.getValue(0));
1158     OpChains.push_back(ScalarOp.getValue(1));
1159   }
1160
1161   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1162   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1163
1164   AddLegalizedOperand(Op.getValue(0), Result);
1165   AddLegalizedOperand(Op.getValue(1), NewChain);
1166
1167   return Op.getResNo() ? NewChain : Result;
1168 }
1169
1170 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
1171   EVT VT = Op.getValueType();
1172   unsigned NumElems = VT.getVectorNumElements();
1173   EVT EltVT = VT.getVectorElementType();
1174   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
1175   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1176   SDLoc dl(Op);
1177   SmallVector<SDValue, 8> Ops(NumElems);
1178   for (unsigned i = 0; i < NumElems; ++i) {
1179     SDValue LHSElem = DAG.getNode(
1180         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1181         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1182     SDValue RHSElem = DAG.getNode(
1183         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1184         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1185     Ops[i] = DAG.getNode(ISD::SETCC, dl,
1186                          TLI.getSetCCResultType(DAG.getDataLayout(),
1187                                                 *DAG.getContext(), TmpEltVT),
1188                          LHSElem, RHSElem, CC);
1189     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
1190                            DAG.getConstant(APInt::getAllOnesValue
1191                                            (EltVT.getSizeInBits()), dl, EltVT),
1192                            DAG.getConstant(0, dl, EltVT));
1193   }
1194   return DAG.getBuildVector(VT, dl, Ops);
1195 }
1196
1197 bool SelectionDAG::LegalizeVectors() {
1198   return VectorLegalizer(*this).Run();
1199 }