lib/Target/X86/X86ISelLowering.h

   1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that X86 uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
  17
  18 #include "llvm/CodeGen/CallingConvLower.h"
  19 #include "llvm/CodeGen/SelectionDAG.h"
  20 #include "llvm/CodeGen/TargetLowering.h"
  21 #include "llvm/Target/TargetOptions.h"
  22
  23 namespace llvm {
  24   class X86Subtarget;
  25   class X86TargetMachine;
  26
  27   namespace X86ISD {
  28     // X86 Specific DAG Nodes
  29     enum NodeType : unsigned {
  30       // Start the numbering where the builtin ops leave off.
  31       FIRST_NUMBER = ISD::BUILTIN_OP_END,
  32
  33       /// Bit scan forward.
  34       BSF,
  35       /// Bit scan reverse.
  36       BSR,
  37
  38       /// Double shift instructions. These correspond to
  39       /// X86::SHLDxx and X86::SHRDxx instructions.
  40       SHLD,
  41       SHRD,
  42
  43       /// Bitwise logical AND of floating point values. This corresponds
  44       /// to X86::ANDPS or X86::ANDPD.
  45       FAND,
  46
  47       /// Bitwise logical OR of floating point values. This corresponds
  48       /// to X86::ORPS or X86::ORPD.
  49       FOR,
  50
  51       /// Bitwise logical XOR of floating point values. This corresponds
  52       /// to X86::XORPS or X86::XORPD.
  53       FXOR,
  54
  55       ///  Bitwise logical ANDNOT of floating point values. This
  56       /// corresponds to X86::ANDNPS or X86::ANDNPD.
  57       FANDN,
  58
  59       /// These operations represent an abstract X86 call
  60       /// instruction, which includes a bunch of information.  In particular the
  61       /// operands of these node are:
  62       ///
  63       ///     #0 - The incoming token chain
  64       ///     #1 - The callee
  65       ///     #2 - The number of arg bytes the caller pushes on the stack.
  66       ///     #3 - The number of arg bytes the callee pops off the stack.
  67       ///     #4 - The value to pass in AL/AX/EAX (optional)
  68       ///     #5 - The value to pass in DL/DX/EDX (optional)
  69       ///
  70       /// The result values of these nodes are:
  71       ///
  72       ///     #0 - The outgoing token chain
  73       ///     #1 - The first register result value (optional)
  74       ///     #2 - The second register result value (optional)
  75       ///
  76       CALL,
  77
  78       /// This operation implements the lowering for readcyclecounter.
  79       RDTSC_DAG,
  80
  81       /// X86 Read Time-Stamp Counter and Processor ID.
  82       RDTSCP_DAG,
  83
  84       /// X86 Read Performance Monitoring Counters.
  85       RDPMC_DAG,
  86
  87       /// X86 compare and logical compare instructions.
  88       CMP, COMI, UCOMI,
  89
  90       /// X86 bit-test instructions.
  91       BT,
  92
  93       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
  94       /// operand, usually produced by a CMP instruction.
  95       SETCC,
  96
  97       /// X86 Select
  98       SELECT, SELECTS,
  99
 100       // Same as SETCC except it's materialized with a sbb and the value is all
 101       // one's or all zero's.
 102       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
 103
 104       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
 105       /// Operands are two FP values to compare; result is a mask of
 106       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
 107       FSETCC,
 108
 109       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
 110       /// with optional rounding mode.
 111       FSETCCM, FSETCCM_RND,
 112
 113       /// X86 conditional moves. Operand 0 and operand 1 are the two values
 114       /// to select from. Operand 2 is the condition code, and operand 3 is the
 115       /// flag operand produced by a CMP or TEST instruction. It also writes a
 116       /// flag result.
 117       CMOV,
 118
 119       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
 120       /// is the block to branch if condition is true, operand 2 is the
 121       /// condition code, and operand 3 is the flag operand produced by a CMP
 122       /// or TEST instruction.
 123       BRCOND,
 124
 125       /// Return with a flag operand. Operand 0 is the chain operand, operand
 126       /// 1 is the number of bytes of stack to pop.
 127       RET_FLAG,
 128
 129       /// Return from interrupt. Operand 0 is the number of bytes to pop.
 130       IRET,
 131
 132       /// Repeat fill, corresponds to X86::REP_STOSx.
 133       REP_STOS,
 134
 135       /// Repeat move, corresponds to X86::REP_MOVSx.
 136       REP_MOVS,
 137
 138       /// On Darwin, this node represents the result of the popl
 139       /// at function entry, used for PIC code.
 140       GlobalBaseReg,
 141
 142       /// A wrapper node for TargetConstantPool, TargetJumpTable,
 143       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
 144       /// MCSymbol and TargetBlockAddress.
 145       Wrapper,
 146
 147       /// Special wrapper used under X86-64 PIC mode for RIP
 148       /// relative displacements.
 149       WrapperRIP,
 150
 151       /// Copies a 64-bit value from the low word of an XMM vector
 152       /// to an MMX vector.
 153       MOVDQ2Q,
 154
 155       /// Copies a 32-bit value from the low word of a MMX
 156       /// vector to a GPR.
 157       MMX_MOVD2W,
 158
 159       /// Copies a GPR into the low 32-bit word of a MMX vector
 160       /// and zero out the high word.
 161       MMX_MOVW2D,
 162
 163       /// Extract an 8-bit value from a vector and zero extend it to
 164       /// i32, corresponds to X86::PEXTRB.
 165       PEXTRB,
 166
 167       /// Extract a 16-bit value from a vector and zero extend it to
 168       /// i32, corresponds to X86::PEXTRW.
 169       PEXTRW,
 170
 171       /// Insert any element of a 4 x float vector into any element
 172       /// of a destination 4 x floatvector.
 173       INSERTPS,
 174
 175       /// Insert the lower 8-bits of a 32-bit value to a vector,
 176       /// corresponds to X86::PINSRB.
 177       PINSRB,
 178
 179       /// Insert the lower 16-bits of a 32-bit value to a vector,
 180       /// corresponds to X86::PINSRW.
 181       PINSRW,
 182
 183       /// Shuffle 16 8-bit values within a vector.
 184       PSHUFB,
 185
 186       /// Compute Sum of Absolute Differences.
 187       PSADBW,
 188       /// Compute Double Block Packed Sum-Absolute-Differences
 189       DBPSADBW,
 190
 191       /// Bitwise Logical AND NOT of Packed FP values.
 192       ANDNP,
 193
 194       /// Blend where the selector is an immediate.
 195       BLENDI,
 196
 197       /// Dynamic (non-constant condition) vector blend where only the sign bits
 198       /// of the condition elements are used. This is used to enforce that the
 199       /// condition mask is not valid for generic VSELECT optimizations.
 200       SHRUNKBLEND,
 201
 202       /// Combined add and sub on an FP vector.
 203       ADDSUB,
 204
 205       //  FP vector ops with rounding mode.
 206       FADD_RND, FADDS_RND,
 207       FSUB_RND, FSUBS_RND,
 208       FMUL_RND, FMULS_RND,
 209       FDIV_RND, FDIVS_RND,
 210       FMAX_RND, FMAXS_RND,
 211       FMIN_RND, FMINS_RND,
 212       FSQRT_RND, FSQRTS_RND,
 213
 214       // FP vector get exponent.
 215       FGETEXP_RND, FGETEXPS_RND,
 216       // Extract Normalized Mantissas.
 217       VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
 218       // FP Scale.
 219       SCALEF,
 220       SCALEFS,
 221
 222       // Integer add/sub with unsigned saturation.
 223       ADDUS,
 224       SUBUS,
 225
 226       // Integer add/sub with signed saturation.
 227       ADDS,
 228       SUBS,
 229
 230       // Unsigned Integer average.
 231       AVG,
 232
 233       /// Integer horizontal add/sub.
 234       HADD,
 235       HSUB,
 236
 237       /// Floating point horizontal add/sub.
 238       FHADD,
 239       FHSUB,
 240
 241       // Detect Conflicts Within a Vector
 242       CONFLICT,
 243
 244       /// Floating point max and min.
 245       FMAX, FMIN,
 246
 247       /// Commutative FMIN and FMAX.
 248       FMAXC, FMINC,
 249
 250       /// Scalar intrinsic floating point max and min.
 251       FMAXS, FMINS,
 252
 253       /// Floating point reciprocal-sqrt and reciprocal approximation.
 254       /// Note that these typically require refinement
 255       /// in order to obtain suitable precision.
 256       FRSQRT, FRCP,
 257
 258       // AVX-512 reciprocal approximations with a little more precision.
 259       RSQRT14, RSQRT14S, RCP14, RCP14S,
 260
 261       // Thread Local Storage.
 262       TLSADDR,
 263
 264       // Thread Local Storage. A call to get the start address
 265       // of the TLS block for the current module.
 266       TLSBASEADDR,
 267
 268       // Thread Local Storage.  When calling to an OS provided
 269       // thunk at the address from an earlier relocation.
 270       TLSCALL,
 271
 272       // Exception Handling helpers.
 273       EH_RETURN,
 274
 275       // SjLj exception handling setjmp.
 276       EH_SJLJ_SETJMP,
 277
 278       // SjLj exception handling longjmp.
 279       EH_SJLJ_LONGJMP,
 280
 281       // SjLj exception handling dispatch.
 282       EH_SJLJ_SETUP_DISPATCH,
 283
 284       /// Tail call return. See X86TargetLowering::LowerCall for
 285       /// the list of operands.
 286       TC_RETURN,
 287
 288       // Vector move to low scalar and zero higher vector elements.
 289       VZEXT_MOVL,
 290
 291       // Vector integer zero-extend.
 292       VZEXT,
 293       // Vector integer signed-extend.
 294       VSEXT,
 295
 296       // Vector integer truncate.
 297       VTRUNC,
 298       // Vector integer truncate with unsigned/signed saturation.
 299       VTRUNCUS, VTRUNCS,
 300
 301       // Vector FP extend.
 302       VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 303
 304       // Vector FP round.
 305       VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 306
 307       // Convert a vector to mask, set bits base on MSB.
 308       CVT2MASK,
 309
 310       // 128-bit vector logical left / right shift
 311       VSHLDQ, VSRLDQ,
 312
 313       // Vector shift elements
 314       VSHL, VSRL, VSRA,
 315
 316       // Vector variable shift right arithmetic.
 317       // Unlike ISD::SRA, in case shift count greater then element size
 318       // use sign bit to fill destination data element.
 319       VSRAV,
 320
 321       // Vector shift elements by immediate
 322       VSHLI, VSRLI, VSRAI,
 323
 324       // Shifts of mask registers.
 325       KSHIFTL, KSHIFTR,
 326
 327       // Bit rotate by immediate
 328       VROTLI, VROTRI,
 329
 330       // Vector packed double/float comparison.
 331       CMPP,
 332
 333       // Vector integer comparisons.
 334       PCMPEQ, PCMPGT,
 335       // Vector integer comparisons, the result is in a mask vector.
 336       PCMPEQM, PCMPGTM,
 337
 338       // v8i16 Horizontal minimum and position.
 339       PHMINPOS,
 340
 341       MULTISHIFT,
 342
 343       /// Vector comparison generating mask bits for fp and
 344       /// integer signed and unsigned data types.
 345       CMPM,
 346       CMPMU,
 347       // Vector comparison with rounding mode for FP values
 348       CMPM_RND,
 349
 350       // Arithmetic operations with FLAGS results.
 351       ADD, SUB, ADC, SBB, SMUL,
 352       INC, DEC, OR, XOR, AND,
 353
 354       // LOW, HI, FLAGS = umul LHS, RHS.
 355       UMUL,
 356
 357       // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
 358       SMUL8, UMUL8,
 359
 360       // 8-bit divrem that zero-extend the high result (AH).
 361       UDIVREM8_ZEXT_HREG,
 362       SDIVREM8_SEXT_HREG,
 363
 364       // X86-specific multiply by immediate.
 365       MUL_IMM,
 366
 367       // Vector sign bit extraction.
 368       MOVMSK,
 369
 370       // Vector bitwise comparisons.
 371       PTEST,
 372
 373       // Vector packed fp sign bitwise comparisons.
 374       TESTP,
 375
 376       // Vector "test" in AVX-512, the result is in a mask vector.
 377       TESTM,
 378       TESTNM,
 379
 380       // OR/AND test for masks.
 381       KORTEST,
 382       KTEST,
 383
 384       // Several flavors of instructions with vector shuffle behaviors.
 385       // Saturated signed/unnsigned packing.
 386       PACKSS,
 387       PACKUS,
 388       // Intra-lane alignr.
 389       PALIGNR,
 390       // AVX512 inter-lane alignr.
 391       VALIGN,
 392       PSHUFD,
 393       PSHUFHW,
 394       PSHUFLW,
 395       SHUFP,
 396       // VBMI2 Concat & Shift.
 397       VSHLD,
 398       VSHRD,
 399       VSHLDV,
 400       VSHRDV,
 401       //Shuffle Packed Values at 128-bit granularity.
 402       SHUF128,
 403       MOVDDUP,
 404       MOVSHDUP,
 405       MOVSLDUP,
 406       MOVLHPS,
 407       MOVHLPS,
 408       MOVLPS,
 409       MOVLPD,
 410       MOVSD,
 411       MOVSS,
 412       UNPCKL,
 413       UNPCKH,
 414       VPERMILPV,
 415       VPERMILPI,
 416       VPERMI,
 417       VPERM2X128,
 418
 419       // Variable Permute (VPERM).
 420       // Res = VPERMV MaskV, V0
 421       VPERMV,
 422
 423       // 3-op Variable Permute (VPERMT2).
 424       // Res = VPERMV3 V0, MaskV, V1
 425       VPERMV3,
 426
 427       // 3-op Variable Permute overwriting the index (VPERMI2).
 428       // Res = VPERMIV3 V0, MaskV, V1
 429       VPERMIV3,
 430
 431       // Bitwise ternary logic.
 432       VPTERNLOG,
 433       // Fix Up Special Packed Float32/64 values.
 434       VFIXUPIMM,
 435       VFIXUPIMMS,
 436       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
 437       VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
 438       // Reduce - Perform Reduction Transformation on scalar\packed FP.
 439       VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
 440       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
 441       // Also used by the legacy (V)ROUND intrinsics where we mask out the
 442       // scaling part of the immediate.
 443       VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
 444       // Tests Types Of a FP Values for packed types.
 445       VFPCLASS,
 446       // Tests Types Of a FP Values for scalar types.
 447       VFPCLASSS,
 448
 449       // Broadcast scalar to vector.
 450       VBROADCAST,
 451       // Broadcast mask to vector.
 452       VBROADCASTM,
 453       // Broadcast subvector to vector.
 454       SUBV_BROADCAST,
 455
 456       // Extract vector element.
 457       VEXTRACT,
 458
 459       /// SSE4A Extraction and Insertion.
 460       EXTRQI, INSERTQI,
 461
 462       // XOP arithmetic/logical shifts.
 463       VPSHA, VPSHL,
 464       // XOP signed/unsigned integer comparisons.
 465       VPCOM, VPCOMU,
 466       // XOP packed permute bytes.
 467       VPPERM,
 468       // XOP two source permutation.
 469       VPERMIL2,
 470
 471       // Vector multiply packed unsigned doubleword integers.
 472       PMULUDQ,
 473       // Vector multiply packed signed doubleword integers.
 474       PMULDQ,
 475       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
 476       MULHRS,
 477
 478       // Multiply and Add Packed Integers.
 479       VPMADDUBSW, VPMADDWD,
 480
 481       // AVX512IFMA multiply and add.
 482       // NOTE: These are different than the instruction and perform
 483       // op0 x op1 + op2.
 484       VPMADD52L, VPMADD52H,
 485
 486       // VNNI
 487       VPDPBUSD,
 488       VPDPBUSDS,
 489       VPDPWSSD,
 490       VPDPWSSDS,
 491
 492       // FMA nodes.
 493       // We use the target independent ISD::FMA for the non-inverted case.
 494       FNMADD,
 495       FMSUB,
 496       FNMSUB,
 497       FMADDSUB,
 498       FMSUBADD,
 499
 500       // FMA with rounding mode.
 501       FMADD_RND,
 502       FNMADD_RND,
 503       FMSUB_RND,
 504       FNMSUB_RND,
 505       FMADDSUB_RND,
 506       FMSUBADD_RND,
 507
 508       // FMA4 specific scalar intrinsics bits that zero the non-scalar bits.
 509       FMADD4S, FNMADD4S, FMSUB4S, FNMSUB4S,
 510
 511       // Scalar intrinsic FMA.
 512       FMADDS1, FMADDS3,
 513       FNMADDS1, FNMADDS3,
 514       FMSUBS1, FMSUBS3,
 515       FNMSUBS1, FNMSUBS3,
 516
 517       // Scalar intrinsic FMA with rounding mode.
 518       // Two versions, passthru bits on op1 or op3.
 519       FMADDS1_RND, FMADDS3_RND,
 520       FNMADDS1_RND, FNMADDS3_RND,
 521       FMSUBS1_RND, FMSUBS3_RND,
 522       FNMSUBS1_RND, FNMSUBS3_RND,
 523
 524       // Compress and expand.
 525       COMPRESS,
 526       EXPAND,
 527
 528       // Bits shuffle
 529       VPSHUFBITQMB,
 530
 531       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
 532       SINT_TO_FP_RND, UINT_TO_FP_RND,
 533       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 534
 535       // Vector float/double to signed/unsigned integer.
 536       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
 537       // Scalar float/double to signed/unsigned integer.
 538       CVTS2SI_RND, CVTS2UI_RND,
 539
 540       // Vector float/double to signed/unsigned integer with truncation.
 541       CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
 542       // Scalar float/double to signed/unsigned integer with truncation.
 543       CVTTS2SI_RND, CVTTS2UI_RND,
 544
 545       // Vector signed/unsigned integer to float/double.
 546       CVTSI2P, CVTUI2P,
 547
 548       // Save xmm argument registers to the stack, according to %al. An operator
 549       // is needed so that this can be expanded with control flow.
 550       VASTART_SAVE_XMM_REGS,
 551
 552       // Windows's _chkstk call to do stack probing.
 553       WIN_ALLOCA,
 554
 555       // For allocating variable amounts of stack space when using
 556       // segmented stacks. Check if the current stacklet has enough space, and
 557       // falls back to heap allocation if not.
 558       SEG_ALLOCA,
 559
 560       // Memory barriers.
 561       MEMBARRIER,
 562       MFENCE,
 563
 564       // Store FP status word into i16 register.
 565       FNSTSW16r,
 566
 567       // Store contents of %ah into %eflags.
 568       SAHF,
 569
 570       // Get a random integer and indicate whether it is valid in CF.
 571       RDRAND,
 572
 573       // Get a NIST SP800-90B & C compliant random integer and
 574       // indicate whether it is valid in CF.
 575       RDSEED,
 576
 577       // SSE42 string comparisons.
 578       PCMPISTRI,
 579       PCMPESTRI,
 580
 581       // Test if in transactional execution.
 582       XTEST,
 583
 584       // ERI instructions.
 585       RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
 586
 587       // Conversions between float and half-float.
 588       CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
 589
 590       // Galois Field Arithmetic Instructions
 591       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
 592
 593       // LWP insert record.
 594       LWPINS,
 595
 596       // Compare and swap.
 597       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
 598       LCMPXCHG8_DAG,
 599       LCMPXCHG16_DAG,
 600       LCMPXCHG8_SAVE_EBX_DAG,
 601       LCMPXCHG16_SAVE_RBX_DAG,
 602
 603       /// LOCK-prefixed arithmetic read-modify-write instructions.
 604       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
 605       LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
 606
 607       // Load, scalar_to_vector, and zero extend.
 608       VZEXT_LOAD,
 609
 610       // Store FP control world into i16 memory.
 611       FNSTCW16m,
 612
 613       /// This instruction implements FP_TO_SINT with the
 614       /// integer destination in memory and a FP reg source.  This corresponds
 615       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
 616       /// has two inputs (token chain and address) and two outputs (int value
 617       /// and token chain).
 618       FP_TO_INT16_IN_MEM,
 619       FP_TO_INT32_IN_MEM,
 620       FP_TO_INT64_IN_MEM,
 621
 622       /// This instruction implements SINT_TO_FP with the
 623       /// integer source in memory and FP reg result.  This corresponds to the
 624       /// X86::FILD*m instructions. It has three inputs (token chain, address,
 625       /// and source type) and two outputs (FP value and token chain). FILD_FLAG
 626       /// also produces a flag).
 627       FILD,
 628       FILD_FLAG,
 629
 630       /// This instruction implements an extending load to FP stack slots.
 631       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
 632       /// operand, ptr to load from, and a ValueType node indicating the type
 633       /// to load to.
 634       FLD,
 635
 636       /// This instruction implements a truncating store to FP stack
 637       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
 638       /// chain operand, value to store, address, and a ValueType to store it
 639       /// as.
 640       FST,
 641
 642       /// This instruction grabs the address of the next argument
 643       /// from a va_list. (reads and modifies the va_list in memory)
 644       VAARG_64,
 645
 646       // Vector truncating store with unsigned/signed saturation
 647       VTRUNCSTOREUS, VTRUNCSTORES,
 648       // Vector truncating masked store with unsigned/signed saturation
 649       VMTRUNCSTOREUS, VMTRUNCSTORES,
 650
 651       // X86 specific gather and scatter
 652       MGATHER, MSCATTER,
 653
 654       // WARNING: Do not add anything in the end unless you want the node to
 655       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
 656       // opcodes will be thought as target memory ops!
 657     };
 658   } // end namespace X86ISD
 659
 660   /// Define some predicates that are used for node matching.
 661   namespace X86 {
 662     /// Returns true if Elt is a constant zero or floating point constant +0.0.
 663     bool isZeroNode(SDValue Elt);
 664
 665     /// Returns true of the given offset can be
 666     /// fit into displacement field of the instruction.
 667     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
 668                                       bool hasSymbolicDisplacement = true);
 669
 670     /// Determines whether the callee is required to pop its
 671     /// own arguments. Callee pop is necessary to support tail calls.
 672     bool isCalleePop(CallingConv::ID CallingConv,
 673                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
 674
 675   } // end namespace X86
 676
 677   //===--------------------------------------------------------------------===//
 678   //  X86 Implementation of the TargetLowering interface
 679   class X86TargetLowering final : public TargetLowering {
 680   public:
 681     explicit X86TargetLowering(const X86TargetMachine &TM,
 682                                const X86Subtarget &STI);
 683
 684     unsigned getJumpTableEncoding() const override;
 685     bool useSoftFloat() const override;
 686
 687     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
 688                                ArgListTy &Args) const override;
 689
 690     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
 691       return MVT::i8;
 692     }
 693
 694     const MCExpr *
 695     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
 696                               const MachineBasicBlock *MBB, unsigned uid,
 697                               MCContext &Ctx) const override;
 698
 699     /// Returns relocation base for the given PIC jumptable.
 700     SDValue getPICJumpTableRelocBase(SDValue Table,
 701                                      SelectionDAG &DAG) const override;
 702     const MCExpr *
 703     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 704                                  unsigned JTI, MCContext &Ctx) const override;
 705
 706     /// Return the desired alignment for ByVal aggregate
 707     /// function arguments in the caller parameter area. For X86, aggregates
 708     /// that contains are placed at 16-byte boundaries while the rest are at
 709     /// 4-byte boundaries.
 710     unsigned getByValTypeAlignment(Type *Ty,
 711                                    const DataLayout &DL) const override;
 712
 713     /// Returns the target specific optimal type for load
 714     /// and store operations as a result of memset, memcpy, and memmove
 715     /// lowering. If DstAlign is zero that means it's safe to destination
 716     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 717     /// means there isn't a need to check it against alignment requirement,
 718     /// probably because the source does not need to be loaded. If 'IsMemset' is
 719     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 720     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 721     /// source is constant so it does not need to be loaded.
 722     /// It returns EVT::Other if the type should be determined using generic
 723     /// target-independent logic.
 724     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
 725                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
 726                             MachineFunction &MF) const override;
 727
 728     /// Returns true if it's safe to use load / store of the
 729     /// specified type to expand memcpy / memset inline. This is mostly true
 730     /// for all types except for some special cases. For example, on X86
 731     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
 732     /// also does type conversion. Note the specified type doesn't have to be
 733     /// legal as the hook is used before type legalization.
 734     bool isSafeMemOpType(MVT VT) const override;
 735
 736     /// Returns true if the target allows unaligned memory accesses of the
 737     /// specified type. Returns whether it is "fast" in the last argument.
 738     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
 739                                        bool *Fast) const override;
 740
 741     /// Provide custom lowering hooks for some operations.
 742     ///
 743     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 744
 745     /// Places new result values for the node in Results (their number
 746     /// and types must exactly match those of the original return values of
 747     /// the node), or leaves Results empty, which indicates that the node is not
 748     /// to be custom lowered after all.
 749     void LowerOperationWrapper(SDNode *N,
 750                                SmallVectorImpl<SDValue> &Results,
 751                                SelectionDAG &DAG) const override;
 752
 753     /// Replace the results of node with an illegal result
 754     /// type with new values built out of custom code.
 755     ///
 756     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
 757                             SelectionDAG &DAG) const override;
 758
 759     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 760
 761     // Return true if it is profitable to combine a BUILD_VECTOR with a
 762     // stride-pattern to a shuffle and a truncate.
 763     // Example of such a combine:
 764     // v4i32 build_vector((extract_elt V, 1),
 765     //                    (extract_elt V, 3),
 766     //                    (extract_elt V, 5),
 767     //                    (extract_elt V, 7))
 768     //  -->
 769     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
 770     // v4i64)
 771     bool isDesirableToCombineBuildVectorToShuffleTruncate(
 772         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
 773
 774     /// Return true if the target has native support for
 775     /// the specified value type and it is 'desirable' to use the type for the
 776     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
 777     /// instruction encodings are longer and some i16 instructions are slow.
 778     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
 779
 780     /// Return true if the target has native support for the
 781     /// specified value type and it is 'desirable' to use the type. e.g. On x86
 782     /// i16 is legal, but undesirable since i16 instruction encodings are longer
 783     /// and some i16 instructions are slow.
 784     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
 785
 786     MachineBasicBlock *
 787     EmitInstrWithCustomInserter(MachineInstr &MI,
 788                                 MachineBasicBlock *MBB) const override;
 789
 790     /// This method returns the name of a target specific DAG node.
 791     const char *getTargetNodeName(unsigned Opcode) const override;
 792
 793     bool mergeStoresAfterLegalization() const override { return true; }
 794
 795     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
 796                           const SelectionDAG &DAG) const override;
 797
 798     bool isCheapToSpeculateCttz() const override;
 799
 800     bool isCheapToSpeculateCtlz() const override;
 801
 802     bool isCtlzFast() const override;
 803
 804     bool hasBitPreservingFPLogic(EVT VT) const override {
 805       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
 806     }
 807
 808     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
 809       // If the pair to store is a mixture of float and int values, we will
 810       // save two bitwise instructions and one float-to-int instruction and
 811       // increase one store instruction. There is potentially a more
 812       // significant benefit because it avoids the float->int domain switch
 813       // for input value. So It is more likely a win.
 814       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
 815           (LTy.isInteger() && HTy.isFloatingPoint()))
 816         return true;
 817       // If the pair only contains int values, we will save two bitwise
 818       // instructions and increase one store instruction (costing one more
 819       // store buffer). Since the benefit is more blurred so we leave
 820       // such pair out until we get testcase to prove it is a win.
 821       return false;
 822     }
 823
 824     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
 825
 826     bool hasAndNotCompare(SDValue Y) const override;
 827
 828     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
 829       return VT.isScalarInteger();
 830     }
 831
 832     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
 833     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 834
 835     /// Return the value type to use for ISD::SETCC.
 836     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
 837                            EVT VT) const override;
 838
 839     /// Determine which of the bits specified in Mask are known to be either
 840     /// zero or one and return them in the KnownZero/KnownOne bitsets.
 841     void computeKnownBitsForTargetNode(const SDValue Op,
 842                                        KnownBits &Known,
 843                                        const APInt &DemandedElts,
 844                                        const SelectionDAG &DAG,
 845                                        unsigned Depth = 0) const override;
 846
 847     /// Determine the number of bits in the operation that are sign bits.
 848     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
 849                                              const APInt &DemandedElts,
 850                                              const SelectionDAG &DAG,
 851                                              unsigned Depth) const override;
 852
 853     SDValue unwrapAddress(SDValue N) const override;
 854
 855     bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
 856                         int64_t &Offset) const override;
 857
 858     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 859
 860     bool ExpandInlineAsm(CallInst *CI) const override;
 861
 862     ConstraintType getConstraintType(StringRef Constraint) const override;
 863
 864     /// Examine constraint string and operand type and determine a weight value.
 865     /// The operand object must already have been set up with the operand type.
 866     ConstraintWeight
 867       getSingleConstraintMatchWeight(AsmOperandInfo &info,
 868                                      const char *constraint) const override;
 869
 870     const char *LowerXConstraint(EVT ConstraintVT) const override;
 871
 872     /// Lower the specified operand into the Ops vector. If it is invalid, don't
 873     /// add anything to Ops. If hasMemory is true it means one of the asm
 874     /// constraint of the inline asm instruction being processed is 'm'.
 875     void LowerAsmOperandForConstraint(SDValue Op,
 876                                       std::string &Constraint,
 877                                       std::vector<SDValue> &Ops,
 878                                       SelectionDAG &DAG) const override;
 879
 880     unsigned
 881     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
 882       if (ConstraintCode == "i")
 883         return InlineAsm::Constraint_i;
 884       else if (ConstraintCode == "o")
 885         return InlineAsm::Constraint_o;
 886       else if (ConstraintCode == "v")
 887         return InlineAsm::Constraint_v;
 888       else if (ConstraintCode == "X")
 889         return InlineAsm::Constraint_X;
 890       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
 891     }
 892
 893     /// Given a physical register constraint
 894     /// (e.g. {edx}), return the register number and the register class for the
 895     /// register.  This should only be used for C_Register constraints.  On
 896     /// error, this returns a register number of 0.
 897     std::pair<unsigned, const TargetRegisterClass *>
 898     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 899                                  StringRef Constraint, MVT VT) const override;
 900
 901     /// Return true if the addressing mode represented
 902     /// by AM is legal for this target, for a load/store of the specified type.
 903     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
 904                                Type *Ty, unsigned AS,
 905                                Instruction *I = nullptr) const override;
 906
 907     /// Return true if the specified immediate is legal
 908     /// icmp immediate, that is the target has icmp instructions which can
 909     /// compare a register against the immediate without having to materialize
 910     /// the immediate into a register.
 911     bool isLegalICmpImmediate(int64_t Imm) const override;
 912
 913     /// Return true if the specified immediate is legal
 914     /// add immediate, that is the target has add instructions which can
 915     /// add a register and the immediate without having to materialize
 916     /// the immediate into a register.
 917     bool isLegalAddImmediate(int64_t Imm) const override;
 918
 919     /// \brief Return the cost of the scaling factor used in the addressing
 920     /// mode represented by AM for this target, for a load/store
 921     /// of the specified type.
 922     /// If the AM is supported, the return value must be >= 0.
 923     /// If the AM is not supported, it returns a negative value.
 924     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
 925                              unsigned AS) const override;
 926
 927     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 928
 929     /// Return true if it's free to truncate a value of
 930     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
 931     /// register EAX to i16 by referencing its sub-register AX.
 932     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
 933     bool isTruncateFree(EVT VT1, EVT VT2) const override;
 934
 935     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 936
 937     /// Return true if any actual instruction that defines a
 938     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
 939     /// register. This does not necessarily include registers defined in
 940     /// unknown ways, such as incoming arguments, or copies from unknown
 941     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
 942     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
 943     /// all instructions that define 32-bit values implicit zero-extend the
 944     /// result out to 64 bits.
 945     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
 946     bool isZExtFree(EVT VT1, EVT VT2) const override;
 947     bool isZExtFree(SDValue Val, EVT VT2) const override;
 948
 949     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
 950     /// extend node) is profitable.
 951     bool isVectorLoadExtDesirable(SDValue) const override;
 952
 953     /// Return true if an FMA operation is faster than a pair of fmul and fadd
 954     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
 955     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
 956     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 957
 958     /// Return true if it's profitable to narrow
 959     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
 960     /// from i32 to i8 but not from i32 to i16.
 961     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 962
 963     /// Given an intrinsic, checks if on the target the intrinsic will need to map
 964     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
 965     /// true and stores the intrinsic information into the IntrinsicInfo that was
 966     /// passed to the function.
 967     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
 968                             unsigned Intrinsic) const override;
 969
 970     /// Returns true if the target can instruction select the
 971     /// specified FP immediate natively. If false, the legalizer will
 972     /// materialize the FP immediate as a load from a constant pool.
 973     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 974
 975     /// Targets can use this to indicate that they only support *some*
 976     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
 977     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
 978     /// be legal.
 979     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
 980
 981     /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
 982     /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to
 983     /// replace a VAND with a constant pool entry.
 984     bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
 985                                 EVT VT) const override;
 986
 987     /// If true, then instruction selection should
 988     /// seek to shrink the FP constant of the specified type to a smaller type
 989     /// in order to save space and / or reduce runtime.
 990     bool ShouldShrinkFPConstant(EVT VT) const override {
 991       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
 992       // expensive than a straight movsd. On the other hand, it's important to
 993       // shrink long double fp constant since fldt is very slow.
 994       return !X86ScalarSSEf64 || VT == MVT::f80;
 995     }
 996
 997     /// Return true if we believe it is correct and profitable to reduce the
 998     /// load node to a smaller type.
 999     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1000                                EVT NewVT) const override;
1001
1002     /// Return true if the specified scalar FP type is computed in an SSE
1003     /// register, not on the X87 floating point stack.
1004     bool isScalarFPTypeInSSEReg(EVT VT) const {
1005       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1006              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1007     }
1008
1009     /// \brief Returns true if it is beneficial to convert a load of a constant
1010     /// to just the constant itself.
1011     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1012                                            Type *Ty) const override;
1013
1014     bool convertSelectOfConstantsToMath(EVT VT) const override;
1015
1016     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1017     /// with this index.
1018     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1019                                  unsigned Index) const override;
1020
1021     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1022                                       unsigned AddrSpace) const override {
1023       // If we can replace more than 2 scalar stores, there will be a reduction
1024       // in instructions even after we add a vector constant load.
1025       return NumElem > 2;
1026     }
1027
1028     /// Intel processors have a unified instruction and data cache
1029     const char * getClearCacheBuiltinName() const override {
1030       return nullptr; // nothing to do, move along.
1031     }
1032
1033     unsigned getRegisterByName(const char* RegName, EVT VT,
1034                                SelectionDAG &DAG) const override;
1035
1036     /// If a physical register, this returns the register that receives the
1037     /// exception address on entry to an EH pad.
1038     unsigned
1039     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1040
1041     /// If a physical register, this returns the register that receives the
1042     /// exception typeid on entry to a landing pad.
1043     unsigned
1044     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1045
1046     virtual bool needsFixedCatchObjects() const override;
1047
1048     /// This method returns a target specific FastISel object,
1049     /// or null if the target does not support "fast" ISel.
1050     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1051                              const TargetLibraryInfo *libInfo) const override;
1052
1053     /// If the target has a standard location for the stack protector cookie,
1054     /// returns the address of that location. Otherwise, returns nullptr.
1055     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1056
1057     bool useLoadStackGuardNode() const override;
1058     bool useStackGuardXorFP() const override;
1059     void insertSSPDeclarations(Module &M) const override;
1060     Value *getSDagStackGuard(const Module &M) const override;
1061     Value *getSSPStackGuardCheck(const Module &M) const override;
1062     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1063                                 const SDLoc &DL) const override;
1064
1065
1066     /// Return true if the target stores SafeStack pointer at a fixed offset in
1067     /// some non-standard address space, and populates the address space and
1068     /// offset as appropriate.
1069     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1070
1071     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1072                       SelectionDAG &DAG) const;
1073
1074     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1075
1076     /// \brief Customize the preferred legalization strategy for certain types.
1077     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1078
1079     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1080
1081     bool supportSwiftError() const override;
1082
1083     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1084
1085     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1086
1087     /// \brief Lower interleaved load(s) into target specific
1088     /// instructions/intrinsics.
1089     bool lowerInterleavedLoad(LoadInst *LI,
1090                               ArrayRef<ShuffleVectorInst *> Shuffles,
1091                               ArrayRef<unsigned> Indices,
1092                               unsigned Factor) const override;
1093
1094     /// \brief Lower interleaved store(s) into target specific
1095     /// instructions/intrinsics.
1096     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1097                                unsigned Factor) const override;
1098
1099
1100     void finalizeLowering(MachineFunction &MF) const override;
1101
1102   protected:
1103     std::pair<const TargetRegisterClass *, uint8_t>
1104     findRepresentativeClass(const TargetRegisterInfo *TRI,
1105                             MVT VT) const override;
1106
1107   private:
1108     /// Keep a reference to the X86Subtarget around so that we can
1109     /// make the right decision when generating code for different targets.
1110     const X86Subtarget &Subtarget;
1111
1112     /// Select between SSE or x87 floating point ops.
1113     /// When SSE is available, use it for f32 operations.
1114     /// When SSE2 is available, use it for f64 operations.
1115     bool X86ScalarSSEf32;
1116     bool X86ScalarSSEf64;
1117
1118     /// A list of legal FP immediates.
1119     std::vector<APFloat> LegalFPImmediates;
1120
1121     /// Indicate that this x86 target can instruction
1122     /// select the specified FP immediate natively.
1123     void addLegalFPImmediate(const APFloat& Imm) {
1124       LegalFPImmediates.push_back(Imm);
1125     }
1126
1127     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1128                             CallingConv::ID CallConv, bool isVarArg,
1129                             const SmallVectorImpl<ISD::InputArg> &Ins,
1130                             const SDLoc &dl, SelectionDAG &DAG,
1131                             SmallVectorImpl<SDValue> &InVals,
1132                             uint32_t *RegMask) const;
1133     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1134                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1135                              const SDLoc &dl, SelectionDAG &DAG,
1136                              const CCValAssign &VA, MachineFrameInfo &MFI,
1137                              unsigned i) const;
1138     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1139                              const SDLoc &dl, SelectionDAG &DAG,
1140                              const CCValAssign &VA,
1141                              ISD::ArgFlagsTy Flags) const;
1142
1143     // Call lowering helpers.
1144
1145     /// Check whether the call is eligible for tail call optimization. Targets
1146     /// that want to do tail call optimization should implement this function.
1147     bool IsEligibleForTailCallOptimization(SDValue Callee,
1148                                            CallingConv::ID CalleeCC,
1149                                            bool isVarArg,
1150                                            bool isCalleeStructRet,
1151                                            bool isCallerStructRet,
1152                                            Type *RetTy,
1153                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1154                                     const SmallVectorImpl<SDValue> &OutVals,
1155                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1156                                            SelectionDAG& DAG) const;
1157     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1158                                     SDValue Chain, bool IsTailCall,
1159                                     bool Is64Bit, int FPDiff,
1160                                     const SDLoc &dl) const;
1161
1162     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1163                                          SelectionDAG &DAG) const;
1164
1165     unsigned getAddressSpace(void) const;
1166
1167     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1168                                                bool isSigned,
1169                                                bool isReplace) const;
1170
1171     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1172     SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
1173     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1174     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1175     SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
1176     SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const;
1177     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1178
1179     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr) const;
1180     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1181     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1182     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1183                                int64_t Offset, SelectionDAG &DAG) const;
1184     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1185     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1186     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1187
1188     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1189     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1190     SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
1191     SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
1192     SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
1193     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1194     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1195     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1196     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1197     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1198     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1199     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1200     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1201     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1202     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1203     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1204     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1205     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1206     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1207     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1208     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1209     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1210     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1211     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1212     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1213     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1214     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1215     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1216     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1217
1218     SDValue
1219     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1220                          const SmallVectorImpl<ISD::InputArg> &Ins,
1221                          const SDLoc &dl, SelectionDAG &DAG,
1222                          SmallVectorImpl<SDValue> &InVals) const override;
1223     SDValue LowerCall(CallLoweringInfo &CLI,
1224                       SmallVectorImpl<SDValue> &InVals) const override;
1225
1226     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1227                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1228                         const SmallVectorImpl<SDValue> &OutVals,
1229                         const SDLoc &dl, SelectionDAG &DAG) const override;
1230
1231     bool supportSplitCSR(MachineFunction *MF) const override {
1232       return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
1233           MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
1234     }
1235     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1236     void insertCopiesSplitCSR(
1237       MachineBasicBlock *Entry,
1238       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1239
1240     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1241
1242     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1243
1244     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1245                             ISD::NodeType ExtendKind) const override;
1246
1247     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1248                         bool isVarArg,
1249                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1250                         LLVMContext &Context) const override;
1251
1252     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1253
1254     TargetLoweringBase::AtomicExpansionKind
1255     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1256     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1257     TargetLoweringBase::AtomicExpansionKind
1258     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1259
1260     LoadInst *
1261     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1262
1263     bool needsCmpXchgNb(Type *MemType) const;
1264
1265     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1266                                 MachineBasicBlock *DispatchBB, int FI) const;
1267
1268     // Utility function to emit the low-level va_arg code for X86-64.
1269     MachineBasicBlock *
1270     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1271                                   MachineBasicBlock *MBB) const;
1272
1273     /// Utility function to emit the xmm reg save portion of va_start.
1274     MachineBasicBlock *
1275     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1276                                              MachineBasicBlock *BB) const;
1277
1278     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1279                                                  MachineInstr &MI2,
1280                                                  MachineBasicBlock *BB) const;
1281
1282     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1283                                          MachineBasicBlock *BB) const;
1284
1285     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1286                                            MachineBasicBlock *BB) const;
1287
1288     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1289                                            MachineBasicBlock *BB) const;
1290
1291     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1292                                            MachineBasicBlock *BB) const;
1293
1294     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1295                                             MachineBasicBlock *BB) const;
1296
1297     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1298                                           MachineBasicBlock *BB) const;
1299
1300     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1301                                           MachineBasicBlock *BB) const;
1302
1303     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1304                                         MachineBasicBlock *MBB) const;
1305
1306     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1307                                          MachineBasicBlock *MBB) const;
1308
1309     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1310                                      MachineBasicBlock *MBB) const;
1311
1312     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1313                                              MachineBasicBlock *MBB) const;
1314
1315     /// Emit nodes that will be selected as "test Op0,Op0", or something
1316     /// equivalent, for use with the given x86 condition code.
1317     SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1318                      SelectionDAG &DAG) const;
1319
1320     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1321     /// equivalent, for use with the given x86 condition code.
1322     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1323                     SelectionDAG &DAG) const;
1324
1325     /// Convert a comparison if required by the subtarget.
1326     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1327
1328     /// Check if replacement of SQRT with RSQRT should be disabled.
1329     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1330
1331     /// Use rsqrt* to speed up sqrt calculations.
1332     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1333                             int &RefinementSteps, bool &UseOneConstNR,
1334                             bool Reciprocal) const override;
1335
1336     /// Use rcp* to speed up fdiv calculations.
1337     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1338                              int &RefinementSteps) const override;
1339
1340     /// Reassociate floating point divisions into multiply by reciprocal.
1341     unsigned combineRepeatedFPDivisors() const override;
1342   };
1343
1344   namespace X86 {
1345     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1346                              const TargetLibraryInfo *libInfo);
1347   } // end namespace X86
1348
1349   // Base class for all X86 non-masked store operations.
1350   class X86StoreSDNode : public MemSDNode {
1351   public:
1352     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1353                    SDVTList VTs, EVT MemVT,
1354                    MachineMemOperand *MMO)
1355       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1356     const SDValue &getValue() const { return getOperand(1); }
1357     const SDValue &getBasePtr() const { return getOperand(2); }
1358
1359     static bool classof(const SDNode *N) {
1360       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1361         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1362     }
1363   };
1364
1365   // Base class for all X86 masked store operations.
1366   // The class has the same order of operands as MaskedStoreSDNode for
1367   // convenience.
1368   class X86MaskedStoreSDNode : public MemSDNode {
1369   public:
1370     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1371                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1372                          MachineMemOperand *MMO)
1373       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1374
1375     const SDValue &getBasePtr() const { return getOperand(1); }
1376     const SDValue &getMask()    const { return getOperand(2); }
1377     const SDValue &getValue()   const { return getOperand(3); }
1378
1379     static bool classof(const SDNode *N) {
1380       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1381         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1382     }
1383   };
1384
1385   // X86 Truncating Store with Signed saturation.
1386   class TruncSStoreSDNode : public X86StoreSDNode {
1387   public:
1388     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1389                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1390       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1391
1392     static bool classof(const SDNode *N) {
1393       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1394     }
1395   };
1396
1397   // X86 Truncating Store with Unsigned saturation.
1398   class TruncUSStoreSDNode : public X86StoreSDNode {
1399   public:
1400     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1401                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1402       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1403
1404     static bool classof(const SDNode *N) {
1405       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1406     }
1407   };
1408
1409   // X86 Truncating Masked Store with Signed saturation.
1410   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1411   public:
1412     MaskedTruncSStoreSDNode(unsigned Order,
1413                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1414                          MachineMemOperand *MMO)
1415       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1416
1417     static bool classof(const SDNode *N) {
1418       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1419     }
1420   };
1421
1422   // X86 Truncating Masked Store with Unsigned saturation.
1423   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1424   public:
1425     MaskedTruncUSStoreSDNode(unsigned Order,
1426                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1427                             MachineMemOperand *MMO)
1428       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1429
1430     static bool classof(const SDNode *N) {
1431       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1432     }
1433   };
1434
1435   // X86 specific Gather/Scatter nodes.
1436   // The class has the same order of operands as MaskedGatherScatterSDNode for
1437   // convenience.
1438   class X86MaskedGatherScatterSDNode : public MemSDNode {
1439   public:
1440     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1441                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1442                                  MachineMemOperand *MMO)
1443         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1444
1445     const SDValue &getBasePtr() const { return getOperand(3); }
1446     const SDValue &getIndex()   const { return getOperand(4); }
1447     const SDValue &getMask()    const { return getOperand(2); }
1448     const SDValue &getValue()   const { return getOperand(1); }
1449
1450     static bool classof(const SDNode *N) {
1451       return N->getOpcode() == X86ISD::MGATHER ||
1452              N->getOpcode() == X86ISD::MSCATTER;
1453     }
1454   };
1455
1456   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1457   public:
1458     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1459                           EVT MemVT, MachineMemOperand *MMO)
1460         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1461                                        MMO) {}
1462
1463     static bool classof(const SDNode *N) {
1464       return N->getOpcode() == X86ISD::MGATHER;
1465     }
1466   };
1467
1468   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1469   public:
1470     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1471                            EVT MemVT, MachineMemOperand *MMO)
1472         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1473                                        MMO) {}
1474
1475     static bool classof(const SDNode *N) {
1476       return N->getOpcode() == X86ISD::MSCATTER;
1477     }
1478   };
1479
1480   /// Generate unpacklo/unpackhi shuffle mask.
1481   template <typename T = int>
1482   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1483                                bool Unary) {
1484     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1485     int NumElts = VT.getVectorNumElements();
1486     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1487     for (int i = 0; i < NumElts; ++i) {
1488       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1489       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1490       Pos += (Unary ? 0 : NumElts * (i % 2));
1491       Pos += (Lo ? 0 : NumEltsInLane / 2);
1492       Mask.push_back(Pos);
1493     }
1494   }
1495
1496   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1497   /// mask index with the scaled sequential indices for an equivalent narrowed
1498   /// mask. This is the reverse process to canWidenShuffleElements, but can
1499   /// always succeed.
1500   template <typename T>
1501   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1502                         SmallVectorImpl<T> &ScaledMask) {
1503     assert(0 < Scale && "Unexpected scaling factor");
1504     int NumElts = Mask.size();
1505     ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1506
1507     for (int i = 0; i != NumElts; ++i) {
1508       int M = Mask[i];
1509
1510       // Repeat sentinel values in every mask element.
1511       if (M < 0) {
1512         for (int s = 0; s != Scale; ++s)
1513           ScaledMask[(Scale * i) + s] = M;
1514         continue;
1515       }
1516
1517       // Scale mask element and increment across each mask element.
1518       for (int s = 0; s != Scale; ++s)
1519         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1520     }
1521   }
1522 } // end namespace llvm
1523
1524 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H