1 //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
\r
3 // The LLVM Compiler Infrastructure
\r
5 // This file is distributed under the University of Illinois Open Source
\r
6 // License. See LICENSE.TXT for details.
\r
8 //===----------------------------------------------------------------------===//
\r
10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
\r
13 //===----------------------------------------------------------------------===//
\r
15 #include "MCTargetDesc/NVPTXBaseInfo.h"
\r
17 #include "NVPTXISelLowering.h"
\r
18 #include "NVPTXSection.h"
\r
19 #include "NVPTXSubtarget.h"
\r
20 #include "NVPTXTargetMachine.h"
\r
21 #include "NVPTXTargetObjectFile.h"
\r
22 #include "NVPTXUtilities.h"
\r
23 #include "llvm/ADT/APInt.h"
\r
24 #include "llvm/ADT/SmallVector.h"
\r
25 #include "llvm/ADT/StringRef.h"
\r
26 #include "llvm/CodeGen/Analysis.h"
\r
27 #include "llvm/CodeGen/MachineFunction.h"
\r
28 #include "llvm/CodeGen/MachineMemOperand.h"
\r
29 #include "llvm/CodeGen/MachineValueType.h"
\r
30 #include "llvm/CodeGen/SelectionDAG.h"
\r
31 #include "llvm/CodeGen/SelectionDAGNodes.h"
\r
32 #include "llvm/CodeGen/ValueTypes.h"
\r
33 #include "llvm/IR/Argument.h"
\r
34 #include "llvm/IR/Attributes.h"
\r
35 #include "llvm/IR/CallSite.h"
\r
36 #include "llvm/IR/Constants.h"
\r
37 #include "llvm/IR/DataLayout.h"
\r
38 #include "llvm/IR/DerivedTypes.h"
\r
39 #include "llvm/IR/Function.h"
\r
40 #include "llvm/IR/GlobalValue.h"
\r
41 #include "llvm/IR/Instruction.h"
\r
42 #include "llvm/IR/Instructions.h"
\r
43 #include "llvm/IR/Module.h"
\r
44 #include "llvm/IR/Type.h"
\r
45 #include "llvm/IR/Value.h"
\r
46 #include "llvm/Support/Casting.h"
\r
47 #include "llvm/Support/CodeGen.h"
\r
48 #include "llvm/Support/CommandLine.h"
\r
49 #include "llvm/Support/ErrorHandling.h"
\r
50 #include "llvm/Support/MathExtras.h"
\r
51 #include "llvm/Support/raw_ostream.h"
\r
52 #include "llvm/Target/TargetCallingConv.h"
\r
53 #include "llvm/Target/TargetLowering.h"
\r
54 #include "llvm/Target/TargetMachine.h"
\r
55 #include "llvm/Target/TargetOptions.h"
\r
56 #include <algorithm>
\r
66 #define DEBUG_TYPE "nvptx-lower"
\r
68 using namespace llvm;
\r
70 static unsigned int uniqueCallSite = 0;
\r
72 static cl::opt<bool> sched4reg(
\r
74 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
\r
76 static cl::opt<unsigned>
\r
77 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
\r
78 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
\r
79 " 1: do it 2: do it aggressively"),
\r
82 static cl::opt<int> UsePrecDivF32(
\r
83 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
\r
84 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
\r
85 " IEEE Compliant F32 div.rnd if available."),
\r
88 static cl::opt<bool> UsePrecSqrtF32(
\r
89 "nvptx-prec-sqrtf32", cl::Hidden,
\r
90 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
\r
93 static cl::opt<bool> FtzEnabled(
\r
94 "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
\r
95 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
\r
98 int NVPTXTargetLowering::getDivF32Level() const {
\r
99 if (UsePrecDivF32.getNumOccurrences() > 0) {
\r
100 // If nvptx-prec-div32=N is used on the command-line, always honor it
\r
101 return UsePrecDivF32;
\r
103 // Otherwise, use div.approx if fast math is enabled
\r
104 if (getTargetMachine().Options.UnsafeFPMath)
\r
111 bool NVPTXTargetLowering::usePrecSqrtF32() const {
\r
112 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
\r
113 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
\r
114 return UsePrecSqrtF32;
\r
116 // Otherwise, use sqrt.approx if fast math is enabled
\r
117 return !getTargetMachine().Options.UnsafeFPMath;
\r
121 bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
\r
122 // TODO: Get rid of this flag; there can be only one way to do this.
\r
123 if (FtzEnabled.getNumOccurrences() > 0) {
\r
124 // If nvptx-f32ftz is used on the command-line, always honor it
\r
127 const Function *F = MF.getFunction();
\r
128 // Otherwise, check for an nvptx-f32ftz attribute on the function
\r
129 if (F->hasFnAttribute("nvptx-f32ftz"))
\r
130 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
\r
136 static bool IsPTXVectorType(MVT VT) {
\r
137 switch (VT.SimpleTy) {
\r
151 case MVT::v8f16: // <4 x f16x2>
\r
159 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
\r
160 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
\r
161 /// into their primitive components.
\r
162 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
\r
163 /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
\r
164 /// LowerCall, and LowerReturn.
\r
165 static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
\r
166 Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
\r
167 SmallVectorImpl<uint64_t> *Offsets = nullptr,
\r
168 uint64_t StartingOffset = 0) {
\r
169 SmallVector<EVT, 16> TempVTs;
\r
170 SmallVector<uint64_t, 16> TempOffsets;
\r
172 ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
\r
173 for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
\r
174 EVT VT = TempVTs[i];
\r
175 uint64_t Off = TempOffsets[i];
\r
176 // Split vectors into individual elements, except for v2f16, which
\r
177 // we will pass as a single scalar.
\r
178 if (VT.isVector()) {
\r
179 unsigned NumElts = VT.getVectorNumElements();
\r
180 EVT EltVT = VT.getVectorElementType();
\r
181 // Vectors with an even number of f16 elements will be passed to
\r
182 // us as an array of v2f16 elements. We must match this so we
\r
183 // stay in sync with Ins/Outs.
\r
184 if (EltVT == MVT::f16 && NumElts % 2 == 0) {
\r
185 EltVT = MVT::v2f16;
\r
188 for (unsigned j = 0; j != NumElts; ++j) {
\r
189 ValueVTs.push_back(EltVT);
\r
191 Offsets->push_back(Off + j * EltVT.getStoreSize());
\r
194 ValueVTs.push_back(VT);
\r
196 Offsets->push_back(Off);
\r
201 // Check whether we can merge loads/stores of some of the pieces of a
\r
202 // flattened function parameter or return value into a single vector
\r
205 // The flattened parameter is represented as a list of EVTs and
\r
206 // offsets, and the whole structure is aligned to ParamAlignment. This
\r
207 // function determines whether we can load/store pieces of the
\r
208 // parameter starting at index Idx using a single vectorized op of
\r
209 // size AccessSize. If so, it returns the number of param pieces
\r
210 // covered by the vector op. Otherwise, it returns 1.
\r
211 static unsigned CanMergeParamLoadStoresStartingAt(
\r
212 unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
\r
213 const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) {
\r
214 assert(isPowerOf2_32(AccessSize) && "must be a power of 2!");
\r
216 // Can't vectorize if param alignment is not sufficient.
\r
217 if (AccessSize > ParamAlignment)
\r
219 // Can't vectorize if offset is not aligned.
\r
220 if (Offsets[Idx] & (AccessSize - 1))
\r
223 EVT EltVT = ValueVTs[Idx];
\r
224 unsigned EltSize = EltVT.getStoreSize();
\r
226 // Element is too large to vectorize.
\r
227 if (EltSize >= AccessSize)
\r
230 unsigned NumElts = AccessSize / EltSize;
\r
231 // Can't vectorize if AccessBytes if not a multiple of EltSize.
\r
232 if (AccessSize != EltSize * NumElts)
\r
235 // We don't have enough elements to vectorize.
\r
236 if (Idx + NumElts > ValueVTs.size())
\r
239 // PTX ISA can only deal with 2- and 4-element vector ops.
\r
240 if (NumElts != 4 && NumElts != 2)
\r
243 for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
\r
244 // Types do not match.
\r
245 if (ValueVTs[j] != EltVT)
\r
248 // Elements are not contiguous.
\r
249 if (Offsets[j] - Offsets[j - 1] != EltSize)
\r
252 // OK. We can vectorize ValueVTs[i..i+NumElts)
\r
256 // Flags for tracking per-element vectorization state of loads/stores
\r
257 // of a flattened function parameter or return value.
\r
258 enum ParamVectorizationFlags {
\r
259 PVF_INNER = 0x0, // Middle elements of a vector.
\r
260 PVF_FIRST = 0x1, // First element of the vector.
\r
261 PVF_LAST = 0x2, // Last element of the vector.
\r
262 // Scalar is effectively a 1-element vector.
\r
263 PVF_SCALAR = PVF_FIRST | PVF_LAST
\r
266 // Computes whether and how we can vectorize the loads/stores of a
\r
267 // flattened function parameter or return value.
\r
269 // The flattened parameter is represented as the list of ValueVTs and
\r
270 // Offsets, and is aligned to ParamAlignment bytes. We return a vector
\r
271 // of the same size as ValueVTs indicating how each piece should be
\r
272 // loaded/stored (i.e. as a scalar, or as part of a vector
\r
274 static SmallVector<ParamVectorizationFlags, 16>
\r
275 VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
\r
276 const SmallVectorImpl<uint64_t> &Offsets,
\r
277 unsigned ParamAlignment) {
\r
278 // Set vector size to match ValueVTs and mark all elements as
\r
279 // scalars by default.
\r
280 SmallVector<ParamVectorizationFlags, 16> VectorInfo;
\r
281 VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
\r
283 // Check what we can vectorize using 128/64/32-bit accesses.
\r
284 for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
\r
285 // Skip elements we've already processed.
\r
286 assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
\r
287 for (unsigned AccessSize : {16, 8, 4, 2}) {
\r
288 unsigned NumElts = CanMergeParamLoadStoresStartingAt(
\r
289 I, AccessSize, ValueVTs, Offsets, ParamAlignment);
\r
290 // Mark vectorized elements.
\r
293 llvm_unreachable("Unexpected return value");
\r
295 // Can't vectorize using this size, try next smaller size.
\r
298 assert(I + 1 < E && "Not enough elements.");
\r
299 VectorInfo[I] = PVF_FIRST;
\r
300 VectorInfo[I + 1] = PVF_LAST;
\r
304 assert(I + 3 < E && "Not enough elements.");
\r
305 VectorInfo[I] = PVF_FIRST;
\r
306 VectorInfo[I + 1] = PVF_INNER;
\r
307 VectorInfo[I + 2] = PVF_INNER;
\r
308 VectorInfo[I + 3] = PVF_LAST;
\r
312 // Break out of the inner loop because we've already succeeded
\r
313 // using largest possible AccessSize.
\r
320 // NVPTXTargetLowering Constructor.
\r
321 NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
\r
322 const NVPTXSubtarget &STI)
\r
323 : TargetLowering(TM), nvTM(&TM), STI(STI) {
\r
324 // always lower memset, memcpy, and memmove intrinsics to load/store
\r
325 // instructions, rather
\r
326 // then generating calls to memset, mempcy or memmove.
\r
327 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
\r
328 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
\r
329 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
\r
331 setBooleanContents(ZeroOrNegativeOneBooleanContent);
\r
332 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
\r
334 // Jump is Expensive. Don't create extra control flow for 'and', 'or'
\r
335 // condition branches.
\r
336 setJumpIsExpensive(true);
\r
338 // Wide divides are _very_ slow. Try to reduce the width of the divide if
\r
340 addBypassSlowDiv(64, 32);
\r
342 // By default, use the Source scheduling
\r
344 setSchedulingPreference(Sched::RegPressure);
\r
346 setSchedulingPreference(Sched::Source);
\r
348 auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
\r
349 LegalizeAction NoF16Action) {
\r
350 setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
\r
353 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
\r
354 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
\r
355 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
\r
356 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
\r
357 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
\r
358 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
\r
359 addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
\r
360 addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
\r
362 // Conversion to/from FP16/FP16x2 is always legal.
\r
363 setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
\r
364 setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal);
\r
365 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
\r
366 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
\r
368 setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
\r
369 setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
\r
371 // Operations not directly supported by NVPTX.
\r
372 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
\r
373 setOperationAction(ISD::SELECT_CC, MVT::v2f16, Expand);
\r
374 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
\r
375 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
\r
376 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
\r
377 setOperationAction(ISD::SELECT_CC, MVT::i8, Expand);
\r
378 setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
\r
379 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
\r
380 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
\r
381 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
\r
382 setOperationAction(ISD::BR_CC, MVT::v2f16, Expand);
\r
383 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
\r
384 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
\r
385 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
\r
386 setOperationAction(ISD::BR_CC, MVT::i8, Expand);
\r
387 setOperationAction(ISD::BR_CC, MVT::i16, Expand);
\r
388 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
\r
389 setOperationAction(ISD::BR_CC, MVT::i64, Expand);
\r
390 // Some SIGN_EXTEND_INREG can be done using cvt instruction.
\r
391 // For others we will expand to a SHL/SRA pair.
\r
392 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal);
\r
393 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
\r
394 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
\r
395 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
\r
396 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
\r
398 setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
\r
399 setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
\r
400 setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom);
\r
401 setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom);
\r
402 setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
\r
403 setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
\r
405 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
\r
406 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
\r
408 if (STI.hasROT64()) {
\r
409 setOperationAction(ISD::ROTL, MVT::i64, Legal);
\r
410 setOperationAction(ISD::ROTR, MVT::i64, Legal);
\r
412 setOperationAction(ISD::ROTL, MVT::i64, Expand);
\r
413 setOperationAction(ISD::ROTR, MVT::i64, Expand);
\r
415 if (STI.hasROT32()) {
\r
416 setOperationAction(ISD::ROTL, MVT::i32, Legal);
\r
417 setOperationAction(ISD::ROTR, MVT::i32, Legal);
\r
419 setOperationAction(ISD::ROTL, MVT::i32, Expand);
\r
420 setOperationAction(ISD::ROTR, MVT::i32, Expand);
\r
423 setOperationAction(ISD::ROTL, MVT::i16, Expand);
\r
424 setOperationAction(ISD::ROTR, MVT::i16, Expand);
\r
425 setOperationAction(ISD::ROTL, MVT::i8, Expand);
\r
426 setOperationAction(ISD::ROTR, MVT::i8, Expand);
\r
427 setOperationAction(ISD::BSWAP, MVT::i16, Expand);
\r
428 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
\r
429 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
\r
431 // Indirect branch is not supported.
\r
432 // This also disables Jump Table creation.
\r
433 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
\r
434 setOperationAction(ISD::BRIND, MVT::Other, Expand);
\r
436 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
\r
437 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
\r
439 // We want to legalize constant related memmove and memcopy
\r
441 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
\r
443 // Turn FP extload into load/fpextend
\r
444 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
\r
445 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
\r
446 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
\r
447 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
\r
448 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
\r
449 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
\r
450 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
\r
451 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
\r
452 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
\r
453 // Turn FP truncstore into trunc + store.
\r
454 // FIXME: vector types should also be expanded
\r
455 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
\r
456 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
\r
457 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
\r
459 // PTX does not support load / store predicate registers
\r
460 setOperationAction(ISD::LOAD, MVT::i1, Custom);
\r
461 setOperationAction(ISD::STORE, MVT::i1, Custom);
\r
463 for (MVT VT : MVT::integer_valuetypes()) {
\r
464 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
\r
465 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
\r
466 setTruncStoreAction(VT, MVT::i1, Expand);
\r
469 // This is legal in NVPTX
\r
470 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
\r
471 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
\r
472 setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
\r
474 // TRAP can be lowered to PTX trap
\r
475 setOperationAction(ISD::TRAP, MVT::Other, Legal);
\r
477 setOperationAction(ISD::ADDC, MVT::i64, Expand);
\r
478 setOperationAction(ISD::ADDE, MVT::i64, Expand);
\r
480 // Register custom handling for vector loads/stores
\r
481 for (MVT VT : MVT::vector_valuetypes()) {
\r
482 if (IsPTXVectorType(VT)) {
\r
483 setOperationAction(ISD::LOAD, VT, Custom);
\r
484 setOperationAction(ISD::STORE, VT, Custom);
\r
485 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
\r
489 // Custom handling for i8 intrinsics
\r
490 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
\r
492 for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
\r
493 setOperationAction(ISD::ABS, Ty, Legal);
\r
494 setOperationAction(ISD::SMIN, Ty, Legal);
\r
495 setOperationAction(ISD::SMAX, Ty, Legal);
\r
496 setOperationAction(ISD::UMIN, Ty, Legal);
\r
497 setOperationAction(ISD::UMAX, Ty, Legal);
\r
499 setOperationAction(ISD::CTPOP, Ty, Legal);
\r
500 setOperationAction(ISD::CTLZ, Ty, Legal);
\r
503 setOperationAction(ISD::CTTZ, MVT::i16, Expand);
\r
504 setOperationAction(ISD::CTTZ, MVT::i32, Expand);
\r
505 setOperationAction(ISD::CTTZ, MVT::i64, Expand);
\r
507 // PTX does not directly support SELP of i1, so promote to i32 first
\r
508 setOperationAction(ISD::SELECT, MVT::i1, Custom);
\r
510 // PTX cannot multiply two i64s in a single instruction.
\r
511 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
\r
512 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
\r
514 // We have some custom DAG combine patterns for these nodes
\r
515 setTargetDAGCombine(ISD::ADD);
\r
516 setTargetDAGCombine(ISD::AND);
\r
517 setTargetDAGCombine(ISD::FADD);
\r
518 setTargetDAGCombine(ISD::MUL);
\r
519 setTargetDAGCombine(ISD::SHL);
\r
520 setTargetDAGCombine(ISD::SREM);
\r
521 setTargetDAGCombine(ISD::UREM);
\r
523 // setcc for f16x2 needs special handling to prevent legalizer's
\r
524 // attempt to scalarize it due to v2i1 not being legal.
\r
525 if (STI.allowFP16Math())
\r
526 setTargetDAGCombine(ISD::SETCC);
\r
528 // Promote fp16 arithmetic if fp16 hardware isn't available or the
\r
529 // user passed --nvptx-no-fp16-math. The flag is useful because,
\r
530 // although sm_53+ GPUs have some sort of FP16 support in
\r
531 // hardware, only sm_53 and sm_60 have full implementation. Others
\r
532 // only have token amount of hardware and are likely to run faster
\r
533 // by using fp32 units instead.
\r
534 for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
\r
535 setFP16OperationAction(Op, MVT::f16, Legal, Promote);
\r
536 setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
\r
539 // There's no neg.f16 instruction. Expand to (0-x).
\r
540 setOperationAction(ISD::FNEG, MVT::f16, Expand);
\r
541 setOperationAction(ISD::FNEG, MVT::v2f16, Expand);
\r
543 // (would be) Library functions.
\r
545 // These map to conversion instructions for scalar FP types.
\r
546 for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
\r
547 ISD::FROUND, ISD::FTRUNC}) {
\r
548 setOperationAction(Op, MVT::f16, Legal);
\r
549 setOperationAction(Op, MVT::f32, Legal);
\r
550 setOperationAction(Op, MVT::f64, Legal);
\r
551 setOperationAction(Op, MVT::v2f16, Expand);
\r
554 // 'Expand' implements FCOPYSIGN without calling an external library.
\r
555 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
\r
556 setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
\r
557 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
\r
558 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
\r
560 // These map to corresponding instructions for f32/f64. f16 must be
\r
561 // promoted to f32. v2f16 is expanded to f16, which is then promoted
\r
563 for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS,
\r
564 ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) {
\r
565 setOperationAction(Op, MVT::f16, Promote);
\r
566 setOperationAction(Op, MVT::f32, Legal);
\r
567 setOperationAction(Op, MVT::f64, Legal);
\r
568 setOperationAction(Op, MVT::v2f16, Expand);
\r
570 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
\r
571 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
\r
572 setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
\r
573 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
\r
575 // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
\r
576 // No FPOW or FREM in PTX.
\r
578 // Now deduce the information based on the above mentioned
\r
580 computeRegisterProperties(STI.getRegisterInfo());
\r
583 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
\r
584 switch ((NVPTXISD::NodeType)Opcode) {
\r
585 case NVPTXISD::FIRST_NUMBER:
\r
587 case NVPTXISD::CALL:
\r
588 return "NVPTXISD::CALL";
\r
589 case NVPTXISD::RET_FLAG:
\r
590 return "NVPTXISD::RET_FLAG";
\r
591 case NVPTXISD::LOAD_PARAM:
\r
592 return "NVPTXISD::LOAD_PARAM";
\r
593 case NVPTXISD::Wrapper:
\r
594 return "NVPTXISD::Wrapper";
\r
595 case NVPTXISD::DeclareParam:
\r
596 return "NVPTXISD::DeclareParam";
\r
597 case NVPTXISD::DeclareScalarParam:
\r
598 return "NVPTXISD::DeclareScalarParam";
\r
599 case NVPTXISD::DeclareRet:
\r
600 return "NVPTXISD::DeclareRet";
\r
601 case NVPTXISD::DeclareScalarRet:
\r
602 return "NVPTXISD::DeclareScalarRet";
\r
603 case NVPTXISD::DeclareRetParam:
\r
604 return "NVPTXISD::DeclareRetParam";
\r
605 case NVPTXISD::PrintCall:
\r
606 return "NVPTXISD::PrintCall";
\r
607 case NVPTXISD::PrintConvergentCall:
\r
608 return "NVPTXISD::PrintConvergentCall";
\r
609 case NVPTXISD::PrintCallUni:
\r
610 return "NVPTXISD::PrintCallUni";
\r
611 case NVPTXISD::PrintConvergentCallUni:
\r
612 return "NVPTXISD::PrintConvergentCallUni";
\r
613 case NVPTXISD::LoadParam:
\r
614 return "NVPTXISD::LoadParam";
\r
615 case NVPTXISD::LoadParamV2:
\r
616 return "NVPTXISD::LoadParamV2";
\r
617 case NVPTXISD::LoadParamV4:
\r
618 return "NVPTXISD::LoadParamV4";
\r
619 case NVPTXISD::StoreParam:
\r
620 return "NVPTXISD::StoreParam";
\r
621 case NVPTXISD::StoreParamV2:
\r
622 return "NVPTXISD::StoreParamV2";
\r
623 case NVPTXISD::StoreParamV4:
\r
624 return "NVPTXISD::StoreParamV4";
\r
625 case NVPTXISD::StoreParamS32:
\r
626 return "NVPTXISD::StoreParamS32";
\r
627 case NVPTXISD::StoreParamU32:
\r
628 return "NVPTXISD::StoreParamU32";
\r
629 case NVPTXISD::CallArgBegin:
\r
630 return "NVPTXISD::CallArgBegin";
\r
631 case NVPTXISD::CallArg:
\r
632 return "NVPTXISD::CallArg";
\r
633 case NVPTXISD::LastCallArg:
\r
634 return "NVPTXISD::LastCallArg";
\r
635 case NVPTXISD::CallArgEnd:
\r
636 return "NVPTXISD::CallArgEnd";
\r
637 case NVPTXISD::CallVoid:
\r
638 return "NVPTXISD::CallVoid";
\r
639 case NVPTXISD::CallVal:
\r
640 return "NVPTXISD::CallVal";
\r
641 case NVPTXISD::CallSymbol:
\r
642 return "NVPTXISD::CallSymbol";
\r
643 case NVPTXISD::Prototype:
\r
644 return "NVPTXISD::Prototype";
\r
645 case NVPTXISD::MoveParam:
\r
646 return "NVPTXISD::MoveParam";
\r
647 case NVPTXISD::StoreRetval:
\r
648 return "NVPTXISD::StoreRetval";
\r
649 case NVPTXISD::StoreRetvalV2:
\r
650 return "NVPTXISD::StoreRetvalV2";
\r
651 case NVPTXISD::StoreRetvalV4:
\r
652 return "NVPTXISD::StoreRetvalV4";
\r
653 case NVPTXISD::PseudoUseParam:
\r
654 return "NVPTXISD::PseudoUseParam";
\r
655 case NVPTXISD::RETURN:
\r
656 return "NVPTXISD::RETURN";
\r
657 case NVPTXISD::CallSeqBegin:
\r
658 return "NVPTXISD::CallSeqBegin";
\r
659 case NVPTXISD::CallSeqEnd:
\r
660 return "NVPTXISD::CallSeqEnd";
\r
661 case NVPTXISD::CallPrototype:
\r
662 return "NVPTXISD::CallPrototype";
\r
663 case NVPTXISD::LoadV2:
\r
664 return "NVPTXISD::LoadV2";
\r
665 case NVPTXISD::LoadV4:
\r
666 return "NVPTXISD::LoadV4";
\r
667 case NVPTXISD::LDGV2:
\r
668 return "NVPTXISD::LDGV2";
\r
669 case NVPTXISD::LDGV4:
\r
670 return "NVPTXISD::LDGV4";
\r
671 case NVPTXISD::LDUV2:
\r
672 return "NVPTXISD::LDUV2";
\r
673 case NVPTXISD::LDUV4:
\r
674 return "NVPTXISD::LDUV4";
\r
675 case NVPTXISD::StoreV2:
\r
676 return "NVPTXISD::StoreV2";
\r
677 case NVPTXISD::StoreV4:
\r
678 return "NVPTXISD::StoreV4";
\r
679 case NVPTXISD::FUN_SHFL_CLAMP:
\r
680 return "NVPTXISD::FUN_SHFL_CLAMP";
\r
681 case NVPTXISD::FUN_SHFR_CLAMP:
\r
682 return "NVPTXISD::FUN_SHFR_CLAMP";
\r
683 case NVPTXISD::IMAD:
\r
684 return "NVPTXISD::IMAD";
\r
685 case NVPTXISD::SETP_F16X2:
\r
686 return "NVPTXISD::SETP_F16X2";
\r
687 case NVPTXISD::Dummy:
\r
688 return "NVPTXISD::Dummy";
\r
689 case NVPTXISD::MUL_WIDE_SIGNED:
\r
690 return "NVPTXISD::MUL_WIDE_SIGNED";
\r
691 case NVPTXISD::MUL_WIDE_UNSIGNED:
\r
692 return "NVPTXISD::MUL_WIDE_UNSIGNED";
\r
693 case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32";
\r
694 case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
\r
695 case NVPTXISD::Tex1DFloatFloatLevel:
\r
696 return "NVPTXISD::Tex1DFloatFloatLevel";
\r
697 case NVPTXISD::Tex1DFloatFloatGrad:
\r
698 return "NVPTXISD::Tex1DFloatFloatGrad";
\r
699 case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32";
\r
700 case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float";
\r
701 case NVPTXISD::Tex1DS32FloatLevel:
\r
702 return "NVPTXISD::Tex1DS32FloatLevel";
\r
703 case NVPTXISD::Tex1DS32FloatGrad:
\r
704 return "NVPTXISD::Tex1DS32FloatGrad";
\r
705 case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32";
\r
706 case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float";
\r
707 case NVPTXISD::Tex1DU32FloatLevel:
\r
708 return "NVPTXISD::Tex1DU32FloatLevel";
\r
709 case NVPTXISD::Tex1DU32FloatGrad:
\r
710 return "NVPTXISD::Tex1DU32FloatGrad";
\r
711 case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32";
\r
712 case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
\r
713 case NVPTXISD::Tex1DArrayFloatFloatLevel:
\r
714 return "NVPTXISD::Tex1DArrayFloatFloatLevel";
\r
715 case NVPTXISD::Tex1DArrayFloatFloatGrad:
\r
716 return "NVPTXISD::Tex1DArrayFloatFloatGrad";
\r
717 case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32";
\r
718 case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float";
\r
719 case NVPTXISD::Tex1DArrayS32FloatLevel:
\r
720 return "NVPTXISD::Tex1DArrayS32FloatLevel";
\r
721 case NVPTXISD::Tex1DArrayS32FloatGrad:
\r
722 return "NVPTXISD::Tex1DArrayS32FloatGrad";
\r
723 case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32";
\r
724 case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float";
\r
725 case NVPTXISD::Tex1DArrayU32FloatLevel:
\r
726 return "NVPTXISD::Tex1DArrayU32FloatLevel";
\r
727 case NVPTXISD::Tex1DArrayU32FloatGrad:
\r
728 return "NVPTXISD::Tex1DArrayU32FloatGrad";
\r
729 case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32";
\r
730 case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
\r
731 case NVPTXISD::Tex2DFloatFloatLevel:
\r
732 return "NVPTXISD::Tex2DFloatFloatLevel";
\r
733 case NVPTXISD::Tex2DFloatFloatGrad:
\r
734 return "NVPTXISD::Tex2DFloatFloatGrad";
\r
735 case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32";
\r
736 case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float";
\r
737 case NVPTXISD::Tex2DS32FloatLevel:
\r
738 return "NVPTXISD::Tex2DS32FloatLevel";
\r
739 case NVPTXISD::Tex2DS32FloatGrad:
\r
740 return "NVPTXISD::Tex2DS32FloatGrad";
\r
741 case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32";
\r
742 case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float";
\r
743 case NVPTXISD::Tex2DU32FloatLevel:
\r
744 return "NVPTXISD::Tex2DU32FloatLevel";
\r
745 case NVPTXISD::Tex2DU32FloatGrad:
\r
746 return "NVPTXISD::Tex2DU32FloatGrad";
\r
747 case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32";
\r
748 case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
\r
749 case NVPTXISD::Tex2DArrayFloatFloatLevel:
\r
750 return "NVPTXISD::Tex2DArrayFloatFloatLevel";
\r
751 case NVPTXISD::Tex2DArrayFloatFloatGrad:
\r
752 return "NVPTXISD::Tex2DArrayFloatFloatGrad";
\r
753 case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32";
\r
754 case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float";
\r
755 case NVPTXISD::Tex2DArrayS32FloatLevel:
\r
756 return "NVPTXISD::Tex2DArrayS32FloatLevel";
\r
757 case NVPTXISD::Tex2DArrayS32FloatGrad:
\r
758 return "NVPTXISD::Tex2DArrayS32FloatGrad";
\r
759 case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32";
\r
760 case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float";
\r
761 case NVPTXISD::Tex2DArrayU32FloatLevel:
\r
762 return "NVPTXISD::Tex2DArrayU32FloatLevel";
\r
763 case NVPTXISD::Tex2DArrayU32FloatGrad:
\r
764 return "NVPTXISD::Tex2DArrayU32FloatGrad";
\r
765 case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32";
\r
766 case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
\r
767 case NVPTXISD::Tex3DFloatFloatLevel:
\r
768 return "NVPTXISD::Tex3DFloatFloatLevel";
\r
769 case NVPTXISD::Tex3DFloatFloatGrad:
\r
770 return "NVPTXISD::Tex3DFloatFloatGrad";
\r
771 case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32";
\r
772 case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float";
\r
773 case NVPTXISD::Tex3DS32FloatLevel:
\r
774 return "NVPTXISD::Tex3DS32FloatLevel";
\r
775 case NVPTXISD::Tex3DS32FloatGrad:
\r
776 return "NVPTXISD::Tex3DS32FloatGrad";
\r
777 case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32";
\r
778 case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float";
\r
779 case NVPTXISD::Tex3DU32FloatLevel:
\r
780 return "NVPTXISD::Tex3DU32FloatLevel";
\r
781 case NVPTXISD::Tex3DU32FloatGrad:
\r
782 return "NVPTXISD::Tex3DU32FloatGrad";
\r
783 case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat";
\r
784 case NVPTXISD::TexCubeFloatFloatLevel:
\r
785 return "NVPTXISD::TexCubeFloatFloatLevel";
\r
786 case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float";
\r
787 case NVPTXISD::TexCubeS32FloatLevel:
\r
788 return "NVPTXISD::TexCubeS32FloatLevel";
\r
789 case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float";
\r
790 case NVPTXISD::TexCubeU32FloatLevel:
\r
791 return "NVPTXISD::TexCubeU32FloatLevel";
\r
792 case NVPTXISD::TexCubeArrayFloatFloat:
\r
793 return "NVPTXISD::TexCubeArrayFloatFloat";
\r
794 case NVPTXISD::TexCubeArrayFloatFloatLevel:
\r
795 return "NVPTXISD::TexCubeArrayFloatFloatLevel";
\r
796 case NVPTXISD::TexCubeArrayS32Float:
\r
797 return "NVPTXISD::TexCubeArrayS32Float";
\r
798 case NVPTXISD::TexCubeArrayS32FloatLevel:
\r
799 return "NVPTXISD::TexCubeArrayS32FloatLevel";
\r
800 case NVPTXISD::TexCubeArrayU32Float:
\r
801 return "NVPTXISD::TexCubeArrayU32Float";
\r
802 case NVPTXISD::TexCubeArrayU32FloatLevel:
\r
803 return "NVPTXISD::TexCubeArrayU32FloatLevel";
\r
804 case NVPTXISD::Tld4R2DFloatFloat:
\r
805 return "NVPTXISD::Tld4R2DFloatFloat";
\r
806 case NVPTXISD::Tld4G2DFloatFloat:
\r
807 return "NVPTXISD::Tld4G2DFloatFloat";
\r
808 case NVPTXISD::Tld4B2DFloatFloat:
\r
809 return "NVPTXISD::Tld4B2DFloatFloat";
\r
810 case NVPTXISD::Tld4A2DFloatFloat:
\r
811 return "NVPTXISD::Tld4A2DFloatFloat";
\r
812 case NVPTXISD::Tld4R2DS64Float:
\r
813 return "NVPTXISD::Tld4R2DS64Float";
\r
814 case NVPTXISD::Tld4G2DS64Float:
\r
815 return "NVPTXISD::Tld4G2DS64Float";
\r
816 case NVPTXISD::Tld4B2DS64Float:
\r
817 return "NVPTXISD::Tld4B2DS64Float";
\r
818 case NVPTXISD::Tld4A2DS64Float:
\r
819 return "NVPTXISD::Tld4A2DS64Float";
\r
820 case NVPTXISD::Tld4R2DU64Float:
\r
821 return "NVPTXISD::Tld4R2DU64Float";
\r
822 case NVPTXISD::Tld4G2DU64Float:
\r
823 return "NVPTXISD::Tld4G2DU64Float";
\r
824 case NVPTXISD::Tld4B2DU64Float:
\r
825 return "NVPTXISD::Tld4B2DU64Float";
\r
826 case NVPTXISD::Tld4A2DU64Float:
\r
827 return "NVPTXISD::Tld4A2DU64Float";
\r
829 case NVPTXISD::TexUnified1DFloatS32:
\r
830 return "NVPTXISD::TexUnified1DFloatS32";
\r
831 case NVPTXISD::TexUnified1DFloatFloat:
\r
832 return "NVPTXISD::TexUnified1DFloatFloat";
\r
833 case NVPTXISD::TexUnified1DFloatFloatLevel:
\r
834 return "NVPTXISD::TexUnified1DFloatFloatLevel";
\r
835 case NVPTXISD::TexUnified1DFloatFloatGrad:
\r
836 return "NVPTXISD::TexUnified1DFloatFloatGrad";
\r
837 case NVPTXISD::TexUnified1DS32S32:
\r
838 return "NVPTXISD::TexUnified1DS32S32";
\r
839 case NVPTXISD::TexUnified1DS32Float:
\r
840 return "NVPTXISD::TexUnified1DS32Float";
\r
841 case NVPTXISD::TexUnified1DS32FloatLevel:
\r
842 return "NVPTXISD::TexUnified1DS32FloatLevel";
\r
843 case NVPTXISD::TexUnified1DS32FloatGrad:
\r
844 return "NVPTXISD::TexUnified1DS32FloatGrad";
\r
845 case NVPTXISD::TexUnified1DU32S32:
\r
846 return "NVPTXISD::TexUnified1DU32S32";
\r
847 case NVPTXISD::TexUnified1DU32Float:
\r
848 return "NVPTXISD::TexUnified1DU32Float";
\r
849 case NVPTXISD::TexUnified1DU32FloatLevel:
\r
850 return "NVPTXISD::TexUnified1DU32FloatLevel";
\r
851 case NVPTXISD::TexUnified1DU32FloatGrad:
\r
852 return "NVPTXISD::TexUnified1DU32FloatGrad";
\r
853 case NVPTXISD::TexUnified1DArrayFloatS32:
\r
854 return "NVPTXISD::TexUnified1DArrayFloatS32";
\r
855 case NVPTXISD::TexUnified1DArrayFloatFloat:
\r
856 return "NVPTXISD::TexUnified1DArrayFloatFloat";
\r
857 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
\r
858 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
\r
859 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
\r
860 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
\r
861 case NVPTXISD::TexUnified1DArrayS32S32:
\r
862 return "NVPTXISD::TexUnified1DArrayS32S32";
\r
863 case NVPTXISD::TexUnified1DArrayS32Float:
\r
864 return "NVPTXISD::TexUnified1DArrayS32Float";
\r
865 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
\r
866 return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
\r
867 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
\r
868 return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
\r
869 case NVPTXISD::TexUnified1DArrayU32S32:
\r
870 return "NVPTXISD::TexUnified1DArrayU32S32";
\r
871 case NVPTXISD::TexUnified1DArrayU32Float:
\r
872 return "NVPTXISD::TexUnified1DArrayU32Float";
\r
873 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
\r
874 return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
\r
875 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
\r
876 return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
\r
877 case NVPTXISD::TexUnified2DFloatS32:
\r
878 return "NVPTXISD::TexUnified2DFloatS32";
\r
879 case NVPTXISD::TexUnified2DFloatFloat:
\r
880 return "NVPTXISD::TexUnified2DFloatFloat";
\r
881 case NVPTXISD::TexUnified2DFloatFloatLevel:
\r
882 return "NVPTXISD::TexUnified2DFloatFloatLevel";
\r
883 case NVPTXISD::TexUnified2DFloatFloatGrad:
\r
884 return "NVPTXISD::TexUnified2DFloatFloatGrad";
\r
885 case NVPTXISD::TexUnified2DS32S32:
\r
886 return "NVPTXISD::TexUnified2DS32S32";
\r
887 case NVPTXISD::TexUnified2DS32Float:
\r
888 return "NVPTXISD::TexUnified2DS32Float";
\r
889 case NVPTXISD::TexUnified2DS32FloatLevel:
\r
890 return "NVPTXISD::TexUnified2DS32FloatLevel";
\r
891 case NVPTXISD::TexUnified2DS32FloatGrad:
\r
892 return "NVPTXISD::TexUnified2DS32FloatGrad";
\r
893 case NVPTXISD::TexUnified2DU32S32:
\r
894 return "NVPTXISD::TexUnified2DU32S32";
\r
895 case NVPTXISD::TexUnified2DU32Float:
\r
896 return "NVPTXISD::TexUnified2DU32Float";
\r
897 case NVPTXISD::TexUnified2DU32FloatLevel:
\r
898 return "NVPTXISD::TexUnified2DU32FloatLevel";
\r
899 case NVPTXISD::TexUnified2DU32FloatGrad:
\r
900 return "NVPTXISD::TexUnified2DU32FloatGrad";
\r
901 case NVPTXISD::TexUnified2DArrayFloatS32:
\r
902 return "NVPTXISD::TexUnified2DArrayFloatS32";
\r
903 case NVPTXISD::TexUnified2DArrayFloatFloat:
\r
904 return "NVPTXISD::TexUnified2DArrayFloatFloat";
\r
905 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
\r
906 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
\r
907 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
\r
908 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
\r
909 case NVPTXISD::TexUnified2DArrayS32S32:
\r
910 return "NVPTXISD::TexUnified2DArrayS32S32";
\r
911 case NVPTXISD::TexUnified2DArrayS32Float:
\r
912 return "NVPTXISD::TexUnified2DArrayS32Float";
\r
913 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
\r
914 return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
\r
915 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
\r
916 return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
\r
917 case NVPTXISD::TexUnified2DArrayU32S32:
\r
918 return "NVPTXISD::TexUnified2DArrayU32S32";
\r
919 case NVPTXISD::TexUnified2DArrayU32Float:
\r
920 return "NVPTXISD::TexUnified2DArrayU32Float";
\r
921 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
\r
922 return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
\r
923 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
\r
924 return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
\r
925 case NVPTXISD::TexUnified3DFloatS32:
\r
926 return "NVPTXISD::TexUnified3DFloatS32";
\r
927 case NVPTXISD::TexUnified3DFloatFloat:
\r
928 return "NVPTXISD::TexUnified3DFloatFloat";
\r
929 case NVPTXISD::TexUnified3DFloatFloatLevel:
\r
930 return "NVPTXISD::TexUnified3DFloatFloatLevel";
\r
931 case NVPTXISD::TexUnified3DFloatFloatGrad:
\r
932 return "NVPTXISD::TexUnified3DFloatFloatGrad";
\r
933 case NVPTXISD::TexUnified3DS32S32:
\r
934 return "NVPTXISD::TexUnified3DS32S32";
\r
935 case NVPTXISD::TexUnified3DS32Float:
\r
936 return "NVPTXISD::TexUnified3DS32Float";
\r
937 case NVPTXISD::TexUnified3DS32FloatLevel:
\r
938 return "NVPTXISD::TexUnified3DS32FloatLevel";
\r
939 case NVPTXISD::TexUnified3DS32FloatGrad:
\r
940 return "NVPTXISD::TexUnified3DS32FloatGrad";
\r
941 case NVPTXISD::TexUnified3DU32S32:
\r
942 return "NVPTXISD::TexUnified3DU32S32";
\r
943 case NVPTXISD::TexUnified3DU32Float:
\r
944 return "NVPTXISD::TexUnified3DU32Float";
\r
945 case NVPTXISD::TexUnified3DU32FloatLevel:
\r
946 return "NVPTXISD::TexUnified3DU32FloatLevel";
\r
947 case NVPTXISD::TexUnified3DU32FloatGrad:
\r
948 return "NVPTXISD::TexUnified3DU32FloatGrad";
\r
949 case NVPTXISD::TexUnifiedCubeFloatFloat:
\r
950 return "NVPTXISD::TexUnifiedCubeFloatFloat";
\r
951 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
\r
952 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
\r
953 case NVPTXISD::TexUnifiedCubeS32Float:
\r
954 return "NVPTXISD::TexUnifiedCubeS32Float";
\r
955 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
\r
956 return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
\r
957 case NVPTXISD::TexUnifiedCubeU32Float:
\r
958 return "NVPTXISD::TexUnifiedCubeU32Float";
\r
959 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
\r
960 return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
\r
961 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
\r
962 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
\r
963 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
\r
964 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
\r
965 case NVPTXISD::TexUnifiedCubeArrayS32Float:
\r
966 return "NVPTXISD::TexUnifiedCubeArrayS32Float";
\r
967 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
\r
968 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
\r
969 case NVPTXISD::TexUnifiedCubeArrayU32Float:
\r
970 return "NVPTXISD::TexUnifiedCubeArrayU32Float";
\r
971 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
\r
972 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
\r
973 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
\r
974 return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
\r
975 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
\r
976 return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
\r
977 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
\r
978 return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
\r
979 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
\r
980 return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
\r
981 case NVPTXISD::Tld4UnifiedR2DS64Float:
\r
982 return "NVPTXISD::Tld4UnifiedR2DS64Float";
\r
983 case NVPTXISD::Tld4UnifiedG2DS64Float:
\r
984 return "NVPTXISD::Tld4UnifiedG2DS64Float";
\r
985 case NVPTXISD::Tld4UnifiedB2DS64Float:
\r
986 return "NVPTXISD::Tld4UnifiedB2DS64Float";
\r
987 case NVPTXISD::Tld4UnifiedA2DS64Float:
\r
988 return "NVPTXISD::Tld4UnifiedA2DS64Float";
\r
989 case NVPTXISD::Tld4UnifiedR2DU64Float:
\r
990 return "NVPTXISD::Tld4UnifiedR2DU64Float";
\r
991 case NVPTXISD::Tld4UnifiedG2DU64Float:
\r
992 return "NVPTXISD::Tld4UnifiedG2DU64Float";
\r
993 case NVPTXISD::Tld4UnifiedB2DU64Float:
\r
994 return "NVPTXISD::Tld4UnifiedB2DU64Float";
\r
995 case NVPTXISD::Tld4UnifiedA2DU64Float:
\r
996 return "NVPTXISD::Tld4UnifiedA2DU64Float";
\r
998 case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp";
\r
999 case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp";
\r
1000 case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp";
\r
1001 case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp";
\r
1002 case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp";
\r
1003 case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp";
\r
1004 case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp";
\r
1005 case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp";
\r
1006 case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp";
\r
1007 case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp";
\r
1008 case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp";
\r
1010 case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp";
\r
1011 case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp";
\r
1012 case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp";
\r
1013 case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp";
\r
1014 case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
\r
1015 case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
\r
1016 case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
\r
1017 case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
\r
1018 case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
\r
1019 case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
\r
1020 case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
\r
1022 case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp";
\r
1023 case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp";
\r
1024 case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp";
\r
1025 case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp";
\r
1026 case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp";
\r
1027 case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp";
\r
1028 case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp";
\r
1029 case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp";
\r
1030 case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp";
\r
1031 case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp";
\r
1032 case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp";
\r
1034 case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp";
\r
1035 case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp";
\r
1036 case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp";
\r
1037 case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp";
\r
1038 case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
\r
1039 case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
\r
1040 case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
\r
1041 case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
\r
1042 case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
\r
1043 case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
\r
1044 case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
\r
1046 case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp";
\r
1047 case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp";
\r
1048 case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp";
\r
1049 case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp";
\r
1050 case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp";
\r
1051 case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp";
\r
1052 case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp";
\r
1053 case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp";
\r
1054 case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp";
\r
1055 case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp";
\r
1056 case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp";
\r
1058 case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
\r
1059 case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
\r
1060 case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
\r
1061 case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap";
\r
1062 case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
\r
1063 case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
\r
1064 case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
\r
1065 case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap";
\r
1066 case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
\r
1067 case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
\r
1068 case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
\r
1070 case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
\r
1071 case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
\r
1072 case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
\r
1073 case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap";
\r
1074 case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
\r
1075 case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
\r
1076 case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
\r
1077 case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap";
\r
1078 case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
\r
1079 case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
\r
1080 case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
\r
1082 case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
\r
1083 case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
\r
1084 case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
\r
1085 case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap";
\r
1086 case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
\r
1087 case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
\r
1088 case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
\r
1089 case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap";
\r
1090 case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
\r
1091 case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
\r
1092 case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
\r
1094 case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
\r
1095 case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
\r
1096 case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
\r
1097 case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap";
\r
1098 case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
\r
1099 case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
\r
1100 case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
\r
1101 case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap";
\r
1102 case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
\r
1103 case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
\r
1104 case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
\r
1106 case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
\r
1107 case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
\r
1108 case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
\r
1109 case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap";
\r
1110 case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
\r
1111 case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
\r
1112 case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
\r
1113 case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap";
\r
1114 case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
\r
1115 case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
\r
1116 case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
\r
1118 case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero";
\r
1119 case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero";
\r
1120 case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero";
\r
1121 case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero";
\r
1122 case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero";
\r
1123 case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero";
\r
1124 case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero";
\r
1125 case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero";
\r
1126 case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero";
\r
1127 case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero";
\r
1128 case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero";
\r
1130 case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero";
\r
1131 case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero";
\r
1132 case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero";
\r
1133 case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero";
\r
1134 case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero";
\r
1135 case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero";
\r
1136 case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero";
\r
1137 case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero";
\r
1138 case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero";
\r
1139 case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero";
\r
1140 case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero";
\r
1142 case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero";
\r
1143 case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero";
\r
1144 case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero";
\r
1145 case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero";
\r
1146 case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero";
\r
1147 case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero";
\r
1148 case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero";
\r
1149 case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero";
\r
1150 case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero";
\r
1151 case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero";
\r
1152 case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero";
\r
1154 case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero";
\r
1155 case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero";
\r
1156 case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero";
\r
1157 case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero";
\r
1158 case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero";
\r
1159 case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero";
\r
1160 case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero";
\r
1161 case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero";
\r
1162 case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero";
\r
1163 case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero";
\r
1164 case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero";
\r
1166 case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero";
\r
1167 case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero";
\r
1168 case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero";
\r
1169 case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero";
\r
1170 case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero";
\r
1171 case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero";
\r
1172 case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero";
\r
1173 case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero";
\r
1174 case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero";
\r
1175 case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
\r
1176 case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
\r
1181 TargetLoweringBase::LegalizeTypeAction
\r
1182 NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
\r
1183 if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
\r
1184 return TypeSplitVector;
\r
1185 if (VT == MVT::v2f16)
\r
1187 return TargetLoweringBase::getPreferredVectorAction(VT);
\r
1190 SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
\r
1191 int Enabled, int &ExtraSteps,
\r
1192 bool &UseOneConst,
\r
1193 bool Reciprocal) const {
\r
1194 if (!(Enabled == ReciprocalEstimate::Enabled ||
\r
1195 (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
\r
1198 if (ExtraSteps == ReciprocalEstimate::Unspecified)
\r
1201 SDLoc DL(Operand);
\r
1202 EVT VT = Operand.getValueType();
\r
1203 bool Ftz = useF32FTZ(DAG.getMachineFunction());
\r
1205 auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
\r
1206 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
\r
1207 DAG.getConstant(IID, DL, MVT::i32), Operand);
\r
1210 // The sqrt and rsqrt refinement processes assume we always start out with an
\r
1211 // approximation of the rsqrt. Therefore, if we're going to do any refinement
\r
1212 // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing
\r
1213 // any refinement, we must return a regular sqrt.
\r
1214 if (Reciprocal || ExtraSteps > 0) {
\r
1215 if (VT == MVT::f32)
\r
1216 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
\r
1217 : Intrinsic::nvvm_rsqrt_approx_f);
\r
1218 else if (VT == MVT::f64)
\r
1219 return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
\r
1223 if (VT == MVT::f32)
\r
1224 return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
\r
1225 : Intrinsic::nvvm_sqrt_approx_f);
\r
1227 // There's no sqrt.approx.f64 instruction, so we emit
\r
1228 // reciprocal(rsqrt(x)). This is faster than
\r
1229 // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain
\r
1231 return DAG.getNode(
\r
1232 ISD::INTRINSIC_WO_CHAIN, DL, VT,
\r
1233 DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
\r
1234 MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
\r
1240 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
\r
1242 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
\r
1243 auto PtrVT = getPointerTy(DAG.getDataLayout());
\r
1244 Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
\r
1245 return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
\r
1248 std::string NVPTXTargetLowering::getPrototype(
\r
1249 const DataLayout &DL, Type *retTy, const ArgListTy &Args,
\r
1250 const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
\r
1251 const ImmutableCallSite *CS) const {
\r
1252 auto PtrVT = getPointerTy(DL);
\r
1254 bool isABI = (STI.getSmVersion() >= 20);
\r
1255 assert(isABI && "Non-ABI compilation is not supported");
\r
1259 std::stringstream O;
\r
1260 O << "prototype_" << uniqueCallSite << " : .callprototype ";
\r
1262 if (retTy->getTypeID() == Type::VoidTyID) {
\r
1266 if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
\r
1267 unsigned size = 0;
\r
1268 if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
\r
1269 size = ITy->getBitWidth();
\r
1271 assert(retTy->isFloatingPointTy() &&
\r
1272 "Floating point type expected here");
\r
1273 size = retTy->getPrimitiveSizeInBits();
\r
1275 // PTX ABI requires all scalar return values to be at least 32
\r
1276 // bits in size. fp16 normally uses .b16 as its storage type in
\r
1277 // PTX, so its size must be adjusted here, too.
\r
1281 O << ".param .b" << size << " _";
\r
1282 } else if (isa<PointerType>(retTy)) {
\r
1283 O << ".param .b" << PtrVT.getSizeInBits() << " _";
\r
1284 } else if (retTy->isAggregateType() || retTy->isVectorTy()) {
\r
1285 auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
\r
1286 O << ".param .align " << retAlignment << " .b8 _["
\r
1287 << DL.getTypeAllocSize(retTy) << "]";
\r
1289 llvm_unreachable("Unknown return type");
\r
1295 bool first = true;
\r
1297 unsigned OIdx = 0;
\r
1298 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
\r
1299 Type *Ty = Args[i].Ty;
\r
1305 if (!Outs[OIdx].Flags.isByVal()) {
\r
1306 if (Ty->isAggregateType() || Ty->isVectorTy()) {
\r
1307 unsigned align = 0;
\r
1308 const CallInst *CallI = cast<CallInst>(CS->getInstruction());
\r
1309 // +1 because index 0 is reserved for return type alignment
\r
1310 if (!getAlign(*CallI, i + 1, align))
\r
1311 align = DL.getABITypeAlignment(Ty);
\r
1312 unsigned sz = DL.getTypeAllocSize(Ty);
\r
1313 O << ".param .align " << align << " .b8 ";
\r
1315 O << "[" << sz << "]";
\r
1316 // update the index for Outs
\r
1317 SmallVector<EVT, 16> vtparts;
\r
1318 ComputeValueVTs(*this, DL, Ty, vtparts);
\r
1319 if (unsigned len = vtparts.size())
\r
1323 // i8 types in IR will be i16 types in SDAG
\r
1324 assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
\r
1325 (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
\r
1326 "type mismatch between callee prototype and arguments");
\r
1329 if (isa<IntegerType>(Ty)) {
\r
1330 sz = cast<IntegerType>(Ty)->getBitWidth();
\r
1333 } else if (isa<PointerType>(Ty)) {
\r
1334 sz = PtrVT.getSizeInBits();
\r
1335 } else if (Ty->isHalfTy())
\r
1336 // PTX ABI requires all scalar parameters to be at least 32
\r
1337 // bits in size. fp16 normally uses .b16 as its storage type
\r
1338 // in PTX, so its size must be adjusted here, too.
\r
1341 sz = Ty->getPrimitiveSizeInBits();
\r
1342 O << ".param .b" << sz << " ";
\r
1346 auto *PTy = dyn_cast<PointerType>(Ty);
\r
1347 assert(PTy && "Param with byval attribute should be a pointer type");
\r
1348 Type *ETy = PTy->getElementType();
\r
1350 unsigned align = Outs[OIdx].Flags.getByValAlign();
\r
1351 unsigned sz = DL.getTypeAllocSize(ETy);
\r
1352 O << ".param .align " << align << " .b8 ";
\r
1354 O << "[" << sz << "]";
\r
1360 unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
\r
1361 const ImmutableCallSite *CS,
\r
1362 Type *Ty, unsigned Idx,
\r
1363 const DataLayout &DL) const {
\r
1365 // CallSite is zero, fallback to ABI type alignment
\r
1366 return DL.getABITypeAlignment(Ty);
\r
1369 unsigned Align = 0;
\r
1370 const Value *DirectCallee = CS->getCalledFunction();
\r
1372 if (!DirectCallee) {
\r
1373 // We don't have a direct function symbol, but that may be because of
\r
1374 // constant cast instructions in the call.
\r
1375 const Instruction *CalleeI = CS->getInstruction();
\r
1376 assert(CalleeI && "Call target is not a function or derived value?");
\r
1378 // With bitcast'd call targets, the instruction will be the call
\r
1379 if (isa<CallInst>(CalleeI)) {
\r
1380 // Check if we have call alignment metadata
\r
1381 if (getAlign(*cast<CallInst>(CalleeI), Idx, Align))
\r
1384 const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
\r
1385 // Ignore any bitcast instructions
\r
1386 while (isa<ConstantExpr>(CalleeV)) {
\r
1387 const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
\r
1388 if (!CE->isCast())
\r
1390 // Look through the bitcast
\r
1391 CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
\r
1394 // We have now looked past all of the bitcasts. Do we finally have a
\r
1396 if (isa<Function>(CalleeV))
\r
1397 DirectCallee = CalleeV;
\r
1401 // Check for function alignment information if we found that the
\r
1402 // ultimate target is a Function
\r
1404 if (getAlign(*cast<Function>(DirectCallee), Idx, Align))
\r
1407 // Call is indirect or alignment information is not available, fall back to
\r
1408 // the ABI type alignment
\r
1409 return DL.getABITypeAlignment(Ty);
\r
1412 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
\r
1413 SmallVectorImpl<SDValue> &InVals) const {
\r
1414 SelectionDAG &DAG = CLI.DAG;
\r
1415 SDLoc dl = CLI.DL;
\r
1416 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
\r
1417 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
\r
1418 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
\r
1419 SDValue Chain = CLI.Chain;
\r
1420 SDValue Callee = CLI.Callee;
\r
1421 bool &isTailCall = CLI.IsTailCall;
\r
1422 ArgListTy &Args = CLI.getArgs();
\r
1423 Type *RetTy = CLI.RetTy;
\r
1424 ImmutableCallSite *CS = CLI.CS;
\r
1425 const DataLayout &DL = DAG.getDataLayout();
\r
1427 bool isABI = (STI.getSmVersion() >= 20);
\r
1428 assert(isABI && "Non-ABI compilation is not supported");
\r
1432 SDValue tempChain = Chain;
\r
1433 Chain = DAG.getCALLSEQ_START(Chain, uniqueCallSite, 0, dl);
\r
1434 SDValue InFlag = Chain.getValue(1);
\r
1436 unsigned paramCount = 0;
\r
1437 // Args.size() and Outs.size() need not match.
\r
1438 // Outs.size() will be larger
\r
1439 // * if there is an aggregate argument with multiple fields (each field
\r
1440 // showing up separately in Outs)
\r
1441 // * if there is a vector argument with more than typical vector-length
\r
1442 // elements (generally if more than 4) where each vector element is
\r
1443 // individually present in Outs.
\r
1444 // So a different index should be used for indexing into Outs/OutVals.
\r
1445 // See similar issue in LowerFormalArguments.
\r
1446 unsigned OIdx = 0;
\r
1447 // Declare the .params or .reg need to pass values
\r
1448 // to the function
\r
1449 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
\r
1450 EVT VT = Outs[OIdx].VT;
\r
1451 Type *Ty = Args[i].Ty;
\r
1453 if (!Outs[OIdx].Flags.isByVal()) {
\r
1454 SmallVector<EVT, 16> VTs;
\r
1455 SmallVector<uint64_t, 16> Offsets;
\r
1456 ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
\r
1457 unsigned ArgAlign =
\r
1458 getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
\r
1459 unsigned AllocSize = DL.getTypeAllocSize(Ty);
\r
1460 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1461 bool NeedAlign; // Does argument declaration specify alignment?
\r
1462 if (Ty->isAggregateType() || Ty->isVectorTy()) {
\r
1463 // declare .param .align <align> .b8 .param<n>[<size>];
\r
1464 SDValue DeclareParamOps[] = {
\r
1465 Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
\r
1466 DAG.getConstant(paramCount, dl, MVT::i32),
\r
1467 DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
\r
1468 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
\r
1472 // declare .param .b<size> .param<n>;
\r
1473 if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) {
\r
1474 // PTX ABI requires integral types to be at least 32 bits in
\r
1475 // size. FP16 is loaded/stored using i16, so it's handled
\r
1479 SDValue DeclareScalarParamOps[] = {
\r
1480 Chain, DAG.getConstant(paramCount, dl, MVT::i32),
\r
1481 DAG.getConstant(AllocSize * 8, dl, MVT::i32),
\r
1482 DAG.getConstant(0, dl, MVT::i32), InFlag};
\r
1483 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
\r
1484 DeclareScalarParamOps);
\r
1485 NeedAlign = false;
\r
1487 InFlag = Chain.getValue(1);
\r
1489 // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
\r
1490 // than 32-bits are sign extended or zero extended, depending on
\r
1491 // whether they are signed or unsigned types. This case applies
\r
1492 // only to scalar parameters and not to aggregate values.
\r
1493 bool ExtendIntegerParam =
\r
1494 Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
\r
1496 auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
\r
1497 SmallVector<SDValue, 6> StoreOperands;
\r
1498 for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
\r
1500 if (VectorInfo[j] & PVF_FIRST) {
\r
1501 assert(StoreOperands.empty() && "Unfinished preceeding store.");
\r
1502 StoreOperands.push_back(Chain);
\r
1503 StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
\r
1504 StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32));
\r
1507 EVT EltVT = VTs[j];
\r
1508 SDValue StVal = OutVals[OIdx];
\r
1509 if (ExtendIntegerParam) {
\r
1510 assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
\r
1511 // zext/sext to i32
\r
1512 StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
\r
1513 : ISD::ZERO_EXTEND,
\r
1514 dl, MVT::i32, StVal);
\r
1515 } else if (EltVT.getSizeInBits() < 16) {
\r
1516 // Use 16-bit registers for small stores as it's the
\r
1517 // smallest general purpose register size supported by NVPTX.
\r
1518 StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
\r
1521 // Record the value to store.
\r
1522 StoreOperands.push_back(StVal);
\r
1524 if (VectorInfo[j] & PVF_LAST) {
\r
1525 unsigned NumElts = StoreOperands.size() - 3;
\r
1526 NVPTXISD::NodeType Op;
\r
1527 switch (NumElts) {
\r
1529 Op = NVPTXISD::StoreParam;
\r
1532 Op = NVPTXISD::StoreParamV2;
\r
1535 Op = NVPTXISD::StoreParamV4;
\r
1538 llvm_unreachable("Invalid vector info.");
\r
1541 StoreOperands.push_back(InFlag);
\r
1543 // Adjust type of the store op if we've extended the scalar
\r
1545 EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
\r
1546 unsigned EltAlign =
\r
1547 NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0;
\r
1549 Chain = DAG.getMemIntrinsicNode(
\r
1550 Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
\r
1551 TheStoreType, MachinePointerInfo(), EltAlign,
\r
1552 /* Volatile */ false, /* ReadMem */ false,
\r
1553 /* WriteMem */ true, /* Size */ 0);
\r
1554 InFlag = Chain.getValue(1);
\r
1557 StoreOperands.clear();
\r
1561 assert(StoreOperands.empty() && "Unfinished parameter store.");
\r
1562 if (VTs.size() > 0)
\r
1568 // ByVal arguments
\r
1569 SmallVector<EVT, 16> VTs;
\r
1570 SmallVector<uint64_t, 16> Offsets;
\r
1571 auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
\r
1572 assert(PTy && "Type of a byval parameter should be pointer");
\r
1573 ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0);
\r
1575 // declare .param .align <align> .b8 .param<n>[<size>];
\r
1576 unsigned sz = Outs[OIdx].Flags.getByValSize();
\r
1577 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1578 unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
\r
1579 // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
\r
1580 // so we don't need to worry about natural alignment or not.
\r
1581 // See TargetLowering::LowerCallTo().
\r
1583 // Enforce minumum alignment of 4 to work around ptxas miscompile
\r
1584 // for sm_50+. See corresponding alignment adjustment in
\r
1585 // emitFunctionParamList() for details.
\r
1588 SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
\r
1589 DAG.getConstant(paramCount, dl, MVT::i32),
\r
1590 DAG.getConstant(sz, dl, MVT::i32), InFlag};
\r
1591 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
\r
1593 InFlag = Chain.getValue(1);
\r
1594 for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
\r
1595 EVT elemtype = VTs[j];
\r
1596 int curOffset = Offsets[j];
\r
1597 unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
\r
1598 auto PtrVT = getPointerTy(DL);
\r
1599 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
\r
1600 DAG.getConstant(curOffset, dl, PtrVT));
\r
1601 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
\r
1602 MachinePointerInfo(), PartAlign);
\r
1603 if (elemtype.getSizeInBits() < 16) {
\r
1604 theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
\r
1606 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1607 SDValue CopyParamOps[] = { Chain,
\r
1608 DAG.getConstant(paramCount, dl, MVT::i32),
\r
1609 DAG.getConstant(curOffset, dl, MVT::i32),
\r
1611 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
\r
1612 CopyParamOps, elemtype,
\r
1613 MachinePointerInfo(), /* Align */ 0,
\r
1614 /* Volatile */ false, /* ReadMem */ false,
\r
1615 /* WriteMem */ true, /* Size */ 0);
\r
1617 InFlag = Chain.getValue(1);
\r
1622 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
\r
1623 unsigned retAlignment = 0;
\r
1626 if (Ins.size() > 0) {
\r
1627 SmallVector<EVT, 16> resvtparts;
\r
1628 ComputeValueVTs(*this, DL, RetTy, resvtparts);
\r
1631 // .param .align 16 .b8 retval0[<size-in-bytes>], or
\r
1632 // .param .b<size-in-bits> retval0
\r
1633 unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
\r
1634 // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
\r
1635 // these three types to match the logic in
\r
1636 // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
\r
1637 // Plus, this behavior is consistent with nvcc's.
\r
1638 if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() ||
\r
1639 RetTy->isPointerTy()) {
\r
1640 // Scalar needs to be at least 32bit wide
\r
1641 if (resultsz < 32)
\r
1643 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1644 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
\r
1645 DAG.getConstant(resultsz, dl, MVT::i32),
\r
1646 DAG.getConstant(0, dl, MVT::i32), InFlag };
\r
1647 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
\r
1649 InFlag = Chain.getValue(1);
\r
1651 retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
\r
1652 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1653 SDValue DeclareRetOps[] = { Chain,
\r
1654 DAG.getConstant(retAlignment, dl, MVT::i32),
\r
1655 DAG.getConstant(resultsz / 8, dl, MVT::i32),
\r
1656 DAG.getConstant(0, dl, MVT::i32), InFlag };
\r
1657 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
\r
1659 InFlag = Chain.getValue(1);
\r
1664 // This is indirect function call case : PTX requires a prototype of the
\r
1666 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
\r
1667 // to be emitted, and the label has to used as the last arg of call
\r
1669 // The prototype is embedded in a string and put as the operand for a
\r
1670 // CallPrototype SDNode which will print out to the value of the string.
\r
1671 SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1672 std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
\r
1673 const char *ProtoStr =
\r
1674 nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
\r
1675 SDValue ProtoOps[] = {
\r
1676 Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
\r
1678 Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
\r
1679 InFlag = Chain.getValue(1);
\r
1681 // Op to just print "call"
\r
1682 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1683 SDValue PrintCallOps[] = {
\r
1684 Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
\r
1686 // We model convergent calls as separate opcodes.
\r
1687 unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall;
\r
1688 if (CLI.IsConvergent)
\r
1689 Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
\r
1690 : NVPTXISD::PrintConvergentCall;
\r
1691 Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
\r
1692 InFlag = Chain.getValue(1);
\r
1694 // Ops to print out the function name
\r
1695 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1696 SDValue CallVoidOps[] = { Chain, Callee, InFlag };
\r
1697 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
\r
1698 InFlag = Chain.getValue(1);
\r
1700 // Ops to print out the param list
\r
1701 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1702 SDValue CallArgBeginOps[] = { Chain, InFlag };
\r
1703 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
\r
1705 InFlag = Chain.getValue(1);
\r
1707 for (unsigned i = 0, e = paramCount; i != e; ++i) {
\r
1710 opcode = NVPTXISD::LastCallArg;
\r
1712 opcode = NVPTXISD::CallArg;
\r
1713 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1714 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
\r
1715 DAG.getConstant(i, dl, MVT::i32), InFlag };
\r
1716 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
\r
1717 InFlag = Chain.getValue(1);
\r
1719 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1720 SDValue CallArgEndOps[] = { Chain,
\r
1721 DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
\r
1723 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
\r
1724 InFlag = Chain.getValue(1);
\r
1727 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
\r
1728 SDValue PrototypeOps[] = { Chain,
\r
1729 DAG.getConstant(uniqueCallSite, dl, MVT::i32),
\r
1731 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
\r
1732 InFlag = Chain.getValue(1);
\r
1735 // Generate loads from param memory/moves from registers for result
\r
1736 if (Ins.size() > 0) {
\r
1737 SmallVector<EVT, 16> VTs;
\r
1738 SmallVector<uint64_t, 16> Offsets;
\r
1739 ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
\r
1740 assert(VTs.size() == Ins.size() && "Bad value decomposition");
\r
1742 unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
\r
1743 auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
\r
1745 SmallVector<EVT, 6> LoadVTs;
\r
1746 int VecIdx = -1; // Index of the first element of the vector.
\r
1748 // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
\r
1749 // 32-bits are sign extended or zero extended, depending on whether
\r
1750 // they are signed or unsigned types.
\r
1751 bool ExtendIntegerRetVal =
\r
1752 RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
\r
1754 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
\r
1755 bool needTruncate = false;
\r
1756 EVT TheLoadType = VTs[i];
\r
1757 EVT EltType = Ins[i].VT;
\r
1758 unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]);
\r
1759 if (ExtendIntegerRetVal) {
\r
1760 TheLoadType = MVT::i32;
\r
1761 EltType = MVT::i32;
\r
1762 needTruncate = true;
\r
1763 } else if (TheLoadType.getSizeInBits() < 16) {
\r
1764 if (VTs[i].isInteger())
\r
1765 needTruncate = true;
\r
1766 EltType = MVT::i16;
\r
1769 // Record index of the very first element of the vector.
\r
1770 if (VectorInfo[i] & PVF_FIRST) {
\r
1771 assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
\r
1775 LoadVTs.push_back(EltType);
\r
1777 if (VectorInfo[i] & PVF_LAST) {
\r
1778 unsigned NumElts = LoadVTs.size();
\r
1779 LoadVTs.push_back(MVT::Other);
\r
1780 LoadVTs.push_back(MVT::Glue);
\r
1781 NVPTXISD::NodeType Op;
\r
1782 switch (NumElts) {
\r
1784 Op = NVPTXISD::LoadParam;
\r
1787 Op = NVPTXISD::LoadParamV2;
\r
1790 Op = NVPTXISD::LoadParamV4;
\r
1793 llvm_unreachable("Invalid vector info.");
\r
1796 SDValue LoadOperands[] = {
\r
1797 Chain, DAG.getConstant(1, dl, MVT::i32),
\r
1798 DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag};
\r
1799 SDValue RetVal = DAG.getMemIntrinsicNode(
\r
1800 Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
\r
1801 MachinePointerInfo(), EltAlign, /* Volatile */ false,
\r
1802 /* ReadMem */ true, /* WriteMem */ false, /* Size */ 0);
\r
1804 for (unsigned j = 0; j < NumElts; ++j) {
\r
1805 SDValue Ret = RetVal.getValue(j);
\r
1807 Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
\r
1808 InVals.push_back(Ret);
\r
1810 Chain = RetVal.getValue(NumElts);
\r
1811 InFlag = RetVal.getValue(NumElts + 1);
\r
1820 Chain = DAG.getCALLSEQ_END(Chain,
\r
1821 DAG.getIntPtrConstant(uniqueCallSite, dl, true),
\r
1822 DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
\r
1827 // set isTailCall to false for now, until we figure out how to express
\r
1828 // tail call optimization in PTX
\r
1829 isTailCall = false;
\r
1833 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
\r
1834 // (see LegalizeDAG.cpp). This is slow and uses local memory.
\r
1835 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
\r
1837 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
\r
1838 SDNode *Node = Op.getNode();
\r
1840 SmallVector<SDValue, 8> Ops;
\r
1841 unsigned NumOperands = Node->getNumOperands();
\r
1842 for (unsigned i = 0; i < NumOperands; ++i) {
\r
1843 SDValue SubOp = Node->getOperand(i);
\r
1844 EVT VVT = SubOp.getNode()->getValueType(0);
\r
1845 EVT EltVT = VVT.getVectorElementType();
\r
1846 unsigned NumSubElem = VVT.getVectorNumElements();
\r
1847 for (unsigned j = 0; j < NumSubElem; ++j) {
\r
1848 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
\r
1849 DAG.getIntPtrConstant(j, dl)));
\r
1852 return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
\r
1855 // We can init constant f16x2 with a single .b32 move. Normally it
\r
1856 // would get lowered as two constant loads and vector-packing move.
\r
1857 // mov.b16 %h1, 0x4000;
\r
1858 // mov.b16 %h2, 0x3C00;
\r
1859 // mov.b32 %hh2, {%h2, %h1};
\r
1860 // Instead we want just a constant move:
\r
1861 // mov.b32 %hh2, 0x40003C00
\r
1863 // This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0
\r
1864 // generates good SASS in both cases.
\r
1865 SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
\r
1866 SelectionDAG &DAG) const {
\r
1868 if (!(Op->getValueType(0) == MVT::v2f16 &&
\r
1869 isa<ConstantFPSDNode>(Op->getOperand(0)) &&
\r
1870 isa<ConstantFPSDNode>(Op->getOperand(1))))
\r
1874 cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt();
\r
1876 cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
\r
1878 DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
\r
1879 return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const);
\r
1882 SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
\r
1883 SelectionDAG &DAG) const {
\r
1884 SDValue Index = Op->getOperand(1);
\r
1885 // Constant index will be matched by tablegen.
\r
1886 if (isa<ConstantSDNode>(Index.getNode()))
\r
1889 // Extract individual elements and select one of them.
\r
1890 SDValue Vector = Op->getOperand(0);
\r
1891 EVT VectorVT = Vector.getValueType();
\r
1892 assert(VectorVT == MVT::v2f16 && "Unexpected vector type.");
\r
1893 EVT EltVT = VectorVT.getVectorElementType();
\r
1895 SDLoc dl(Op.getNode());
\r
1896 SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
\r
1897 DAG.getIntPtrConstant(0, dl));
\r
1898 SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
\r
1899 DAG.getIntPtrConstant(1, dl));
\r
1900 return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
\r
1901 ISD::CondCode::SETEQ);
\r
1904 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
\r
1905 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
\r
1907 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
\r
1909 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
\r
1910 SelectionDAG &DAG) const {
\r
1911 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
\r
1912 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
\r
1914 EVT VT = Op.getValueType();
\r
1915 unsigned VTBits = VT.getSizeInBits();
\r
1917 SDValue ShOpLo = Op.getOperand(0);
\r
1918 SDValue ShOpHi = Op.getOperand(1);
\r
1919 SDValue ShAmt = Op.getOperand(2);
\r
1920 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
\r
1922 if (VTBits == 32 && STI.getSmVersion() >= 35) {
\r
1923 // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
\r
1924 // {dHi, dLo} = {aHi, aLo} >> Amt
\r
1925 // dHi = aHi >> Amt
\r
1926 // dLo = shf.r.clamp aLo, aHi, Amt
\r
1928 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
\r
1929 SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
\r
1932 SDValue Ops[2] = { Lo, Hi };
\r
1933 return DAG.getMergeValues(Ops, dl);
\r
1936 // {dHi, dLo} = {aHi, aLo} >> Amt
\r
1937 // - if (Amt>=size) then
\r
1938 // dLo = aHi >> (Amt-size)
\r
1939 // dHi = aHi >> Amt (this is either all 0 or all 1)
\r
1941 // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
\r
1942 // dHi = aHi >> Amt
\r
1944 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
\r
1945 DAG.getConstant(VTBits, dl, MVT::i32),
\r
1947 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
\r
1948 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
\r
1949 DAG.getConstant(VTBits, dl, MVT::i32));
\r
1950 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
\r
1951 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
\r
1952 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
\r
1954 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
\r
1955 DAG.getConstant(VTBits, dl, MVT::i32),
\r
1957 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
\r
1958 SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
\r
1960 SDValue Ops[2] = { Lo, Hi };
\r
1961 return DAG.getMergeValues(Ops, dl);
\r
1965 /// LowerShiftLeftParts - Lower SHL_PARTS, which
\r
1966 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
\r
1968 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
\r
1970 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
\r
1971 SelectionDAG &DAG) const {
\r
1972 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
\r
1973 assert(Op.getOpcode() == ISD::SHL_PARTS);
\r
1975 EVT VT = Op.getValueType();
\r
1976 unsigned VTBits = VT.getSizeInBits();
\r
1978 SDValue ShOpLo = Op.getOperand(0);
\r
1979 SDValue ShOpHi = Op.getOperand(1);
\r
1980 SDValue ShAmt = Op.getOperand(2);
\r
1982 if (VTBits == 32 && STI.getSmVersion() >= 35) {
\r
1983 // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
\r
1984 // {dHi, dLo} = {aHi, aLo} << Amt
\r
1985 // dHi = shf.l.clamp aLo, aHi, Amt
\r
1986 // dLo = aLo << Amt
\r
1988 SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
\r
1990 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
\r
1992 SDValue Ops[2] = { Lo, Hi };
\r
1993 return DAG.getMergeValues(Ops, dl);
\r
1996 // {dHi, dLo} = {aHi, aLo} << Amt
\r
1997 // - if (Amt>=size) then
\r
1998 // dLo = aLo << Amt (all 0)
\r
1999 // dLo = aLo << (Amt-size)
\r
2001 // dLo = aLo << Amt
\r
2002 // dHi = (aHi << Amt) | (aLo >> (size-Amt))
\r
2004 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
\r
2005 DAG.getConstant(VTBits, dl, MVT::i32),
\r
2007 SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
\r
2008 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
\r
2009 DAG.getConstant(VTBits, dl, MVT::i32));
\r
2010 SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
\r
2011 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
\r
2012 SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
\r
2014 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
\r
2015 DAG.getConstant(VTBits, dl, MVT::i32),
\r
2017 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
\r
2018 SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
\r
2020 SDValue Ops[2] = { Lo, Hi };
\r
2021 return DAG.getMergeValues(Ops, dl);
\r
2026 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
\r
2027 switch (Op.getOpcode()) {
\r
2028 case ISD::RETURNADDR:
\r
2030 case ISD::FRAMEADDR:
\r
2032 case ISD::GlobalAddress:
\r
2033 return LowerGlobalAddress(Op, DAG);
\r
2034 case ISD::INTRINSIC_W_CHAIN:
\r
2036 case ISD::BUILD_VECTOR:
\r
2037 return LowerBUILD_VECTOR(Op, DAG);
\r
2038 case ISD::EXTRACT_SUBVECTOR:
\r
2040 case ISD::EXTRACT_VECTOR_ELT:
\r
2041 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
\r
2042 case ISD::CONCAT_VECTORS:
\r
2043 return LowerCONCAT_VECTORS(Op, DAG);
\r
2045 return LowerSTORE(Op, DAG);
\r
2047 return LowerLOAD(Op, DAG);
\r
2048 case ISD::SHL_PARTS:
\r
2049 return LowerShiftLeftParts(Op, DAG);
\r
2050 case ISD::SRA_PARTS:
\r
2051 case ISD::SRL_PARTS:
\r
2052 return LowerShiftRightParts(Op, DAG);
\r
2054 return LowerSelect(Op, DAG);
\r
2056 llvm_unreachable("Custom lowering not defined for operation");
\r
2060 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
\r
2061 SDValue Op0 = Op->getOperand(0);
\r
2062 SDValue Op1 = Op->getOperand(1);
\r
2063 SDValue Op2 = Op->getOperand(2);
\r
2064 SDLoc DL(Op.getNode());
\r
2066 assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
\r
2068 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
\r
2069 Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
\r
2070 SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
\r
2071 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
\r
2076 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
\r
2077 if (Op.getValueType() == MVT::i1)
\r
2078 return LowerLOADi1(Op, DAG);
\r
2080 // v2f16 is legal, so we can't rely on legalizer to handle unaligned
\r
2081 // loads and have to handle it here.
\r
2082 if (Op.getValueType() == MVT::v2f16) {
\r
2083 LoadSDNode *Load = cast<LoadSDNode>(Op);
\r
2084 EVT MemVT = Load->getMemoryVT();
\r
2085 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
\r
2086 Load->getAddressSpace(), Load->getAlignment())) {
\r
2088 std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
\r
2089 return DAG.getMergeValues(Ops, SDLoc(Op));
\r
2096 // v = ld i1* addr
\r
2098 // v1 = ld i8* addr (-> i16)
\r
2099 // v = trunc i16 to i1
\r
2100 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
\r
2101 SDNode *Node = Op.getNode();
\r
2102 LoadSDNode *LD = cast<LoadSDNode>(Node);
\r
2104 assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
\r
2105 assert(Node->getValueType(0) == MVT::i1 &&
\r
2106 "Custom lowering for i1 load only");
\r
2107 SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
\r
2108 LD->getPointerInfo(), LD->getAlignment(),
\r
2109 LD->getMemOperand()->getFlags());
\r
2110 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
\r
2111 // The legalizer (the caller) is expecting two values from the legalized
\r
2112 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
\r
2113 // in LegalizeDAG.cpp which also uses MergeValues.
\r
2114 SDValue Ops[] = { result, LD->getChain() };
\r
2115 return DAG.getMergeValues(Ops, dl);
\r
2118 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
\r
2119 StoreSDNode *Store = cast<StoreSDNode>(Op);
\r
2120 EVT VT = Store->getMemoryVT();
\r
2122 if (VT == MVT::i1)
\r
2123 return LowerSTOREi1(Op, DAG);
\r
2125 // v2f16 is legal, so we can't rely on legalizer to handle unaligned
\r
2126 // stores and have to handle it here.
\r
2127 if (VT == MVT::v2f16 &&
\r
2128 !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
\r
2129 Store->getAddressSpace(), Store->getAlignment()))
\r
2130 return expandUnalignedStore(Store, DAG);
\r
2132 if (VT.isVector())
\r
2133 return LowerSTOREVector(Op, DAG);
\r
2139 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
\r
2140 SDNode *N = Op.getNode();
\r
2141 SDValue Val = N->getOperand(1);
\r
2143 EVT ValVT = Val.getValueType();
\r
2145 if (ValVT.isVector()) {
\r
2146 // We only handle "native" vector sizes for now, e.g. <4 x double> is not
\r
2147 // legal. We can (and should) split that into 2 stores of <2 x double> here
\r
2148 // but I'm leaving that as a TODO for now.
\r
2149 if (!ValVT.isSimple())
\r
2151 switch (ValVT.getSimpleVT().SimpleTy) {
\r
2166 case MVT::v8f16: // <4 x f16x2>
\r
2167 // This is a "native" vector type
\r
2171 MemSDNode *MemSD = cast<MemSDNode>(N);
\r
2172 const DataLayout &TD = DAG.getDataLayout();
\r
2174 unsigned Align = MemSD->getAlignment();
\r
2175 unsigned PrefAlign =
\r
2176 TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
\r
2177 if (Align < PrefAlign) {
\r
2178 // This store is not sufficiently aligned, so bail out and let this vector
\r
2179 // store be scalarized. Note that we may still be able to emit smaller
\r
2180 // vector stores. For example, if we are storing a <4 x float> with an
\r
2181 // alignment of 8, this check will fail but the legalizer will try again
\r
2182 // with 2 x <2 x float>, which will succeed with an alignment of 8.
\r
2186 unsigned Opcode = 0;
\r
2187 EVT EltVT = ValVT.getVectorElementType();
\r
2188 unsigned NumElts = ValVT.getVectorNumElements();
\r
2190 // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
\r
2191 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
\r
2192 // stored type to i16 and propagate the "real" type as the memory type.
\r
2193 bool NeedExt = false;
\r
2194 if (EltVT.getSizeInBits() < 16)
\r
2197 bool StoreF16x2 = false;
\r
2198 switch (NumElts) {
\r
2202 Opcode = NVPTXISD::StoreV2;
\r
2205 Opcode = NVPTXISD::StoreV4;
\r
2208 // v8f16 is a special case. PTX doesn't have st.v8.f16
\r
2209 // instruction. Instead, we split the vector into v2f16 chunks and
\r
2210 // store them with st.v4.b32.
\r
2211 assert(EltVT == MVT::f16 && "Wrong type for the vector.");
\r
2212 Opcode = NVPTXISD::StoreV4;
\r
2213 StoreF16x2 = true;
\r
2217 SmallVector<SDValue, 8> Ops;
\r
2219 // First is the chain
\r
2220 Ops.push_back(N->getOperand(0));
\r
2223 // Combine f16,f16 -> v2f16
\r
2225 for (unsigned i = 0; i < NumElts; ++i) {
\r
2226 SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
\r
2227 DAG.getIntPtrConstant(i * 2, DL));
\r
2228 SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
\r
2229 DAG.getIntPtrConstant(i * 2 + 1, DL));
\r
2230 SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1);
\r
2231 Ops.push_back(V2);
\r
2234 // Then the split values
\r
2235 for (unsigned i = 0; i < NumElts; ++i) {
\r
2236 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
\r
2237 DAG.getIntPtrConstant(i, DL));
\r
2239 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
\r
2240 Ops.push_back(ExtVal);
\r
2244 // Then any remaining arguments
\r
2245 Ops.append(N->op_begin() + 2, N->op_end());
\r
2248 DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
\r
2249 MemSD->getMemoryVT(), MemSD->getMemOperand());
\r
2251 // return DCI.CombineTo(N, NewSt, true);
\r
2260 // v1 = zxt v to i16
\r
2261 // st.u8 i16, addr
\r
2262 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
\r
2263 SDNode *Node = Op.getNode();
\r
2265 StoreSDNode *ST = cast<StoreSDNode>(Node);
\r
2266 SDValue Tmp1 = ST->getChain();
\r
2267 SDValue Tmp2 = ST->getBasePtr();
\r
2268 SDValue Tmp3 = ST->getValue();
\r
2269 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
\r
2270 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
\r
2272 DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
\r
2273 ST->getAlignment(), ST->getMemOperand()->getFlags());
\r
2278 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
\r
2279 std::string ParamSym;
\r
2280 raw_string_ostream ParamStr(ParamSym);
\r
2282 ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
\r
2285 std::string *SavedStr =
\r
2286 nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
\r
2287 return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
\r
2290 // Check to see if the kernel argument is image*_t or sampler_t
\r
2292 static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
\r
2293 static const char *const specialTypes[] = { "struct._image2d_t",
\r
2294 "struct._image3d_t",
\r
2295 "struct._sampler_t" };
\r
2297 Type *Ty = arg->getType();
\r
2298 auto *PTy = dyn_cast<PointerType>(Ty);
\r
2306 auto *STy = dyn_cast<StructType>(PTy->getElementType());
\r
2307 if (!STy || STy->isLiteral())
\r
2310 return std::find(std::begin(specialTypes), std::end(specialTypes),
\r
2311 STy->getName()) != std::end(specialTypes);
\r
2314 SDValue NVPTXTargetLowering::LowerFormalArguments(
\r
2315 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
\r
2316 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
\r
2317 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
\r
2318 MachineFunction &MF = DAG.getMachineFunction();
\r
2319 const DataLayout &DL = DAG.getDataLayout();
\r
2320 auto PtrVT = getPointerTy(DAG.getDataLayout());
\r
2322 const Function *F = MF.getFunction();
\r
2323 const AttributeList &PAL = F->getAttributes();
\r
2324 const TargetLowering *TLI = STI.getTargetLowering();
\r
2326 SDValue Root = DAG.getRoot();
\r
2327 std::vector<SDValue> OutChains;
\r
2329 bool isABI = (STI.getSmVersion() >= 20);
\r
2330 assert(isABI && "Non-ABI compilation is not supported");
\r
2334 std::vector<Type *> argTypes;
\r
2335 std::vector<const Argument *> theArgs;
\r
2336 for (const Argument &I : F->args()) {
\r
2337 theArgs.push_back(&I);
\r
2338 argTypes.push_back(I.getType());
\r
2340 // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
\r
2341 // Ins.size() will be larger
\r
2342 // * if there is an aggregate argument with multiple fields (each field
\r
2343 // showing up separately in Ins)
\r
2344 // * if there is a vector argument with more than typical vector-length
\r
2345 // elements (generally if more than 4) where each vector element is
\r
2346 // individually present in Ins.
\r
2347 // So a different index should be used for indexing into Ins.
\r
2348 // See similar issue in LowerCall.
\r
2349 unsigned InsIdx = 0;
\r
2352 for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
\r
2353 Type *Ty = argTypes[i];
\r
2355 // If the kernel argument is image*_t or sampler_t, convert it to
\r
2356 // a i32 constant holding the parameter position. This can later
\r
2357 // matched in the AsmPrinter to output the correct mangled name.
\r
2358 if (isImageOrSamplerVal(
\r
2360 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
\r
2362 assert(isKernelFunction(*F) &&
\r
2363 "Only kernels can have image/sampler params");
\r
2364 InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
\r
2368 if (theArgs[i]->use_empty()) {
\r
2369 // argument is dead
\r
2370 if (Ty->isAggregateType()) {
\r
2371 SmallVector<EVT, 16> vtparts;
\r
2373 ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
\r
2374 assert(vtparts.size() > 0 && "empty aggregate type not expected");
\r
2375 for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
\r
2377 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
\r
2380 if (vtparts.size() > 0)
\r
2384 if (Ty->isVectorTy()) {
\r
2385 EVT ObjectVT = getValueType(DL, Ty);
\r
2386 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
\r
2387 for (unsigned parti = 0; parti < NumRegs; ++parti) {
\r
2388 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
\r
2395 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
\r
2399 // In the following cases, assign a node order of "idx+1"
\r
2400 // to newly created nodes. The SDNodes for params have to
\r
2401 // appear in the same order as their order of appearance
\r
2402 // in the original function. "idx+1" holds that order.
\r
2403 if (!PAL.hasParamAttribute(i, Attribute::ByVal)) {
\r
2404 bool aggregateIsPacked = false;
\r
2405 if (StructType *STy = dyn_cast<StructType>(Ty))
\r
2406 aggregateIsPacked = STy->isPacked();
\r
2408 SmallVector<EVT, 16> VTs;
\r
2409 SmallVector<uint64_t, 16> Offsets;
\r
2410 ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
\r
2411 assert(VTs.size() > 0 && "Unexpected empty type.");
\r
2413 VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty));
\r
2415 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
\r
2416 int VecIdx = -1; // Index of the first element of the current vector.
\r
2417 for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
\r
2418 if (VectorInfo[parti] & PVF_FIRST) {
\r
2419 assert(VecIdx == -1 && "Orphaned vector.");
\r
2423 // That's the last element of this store op.
\r
2424 if (VectorInfo[parti] & PVF_LAST) {
\r
2425 unsigned NumElts = parti - VecIdx + 1;
\r
2426 EVT EltVT = VTs[parti];
\r
2427 // i1 is loaded/stored as i8.
\r
2428 EVT LoadVT = EltVT;
\r
2429 if (EltVT == MVT::i1)
\r
2431 else if (EltVT == MVT::v2f16)
\r
2432 // getLoad needs a vector type, but it can't handle
\r
2433 // vectors which contain v2f16 elements. So we must load
\r
2434 // using i32 here and then bitcast back.
\r
2435 LoadVT = MVT::i32;
\r
2437 EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
\r
2439 DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
\r
2440 DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
\r
2441 Value *srcValue = Constant::getNullValue(PointerType::get(
\r
2442 EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
\r
2444 DAG.getLoad(VecVT, dl, Root, VecAddr,
\r
2445 MachinePointerInfo(srcValue), aggregateIsPacked,
\r
2446 MachineMemOperand::MODereferenceable |
\r
2447 MachineMemOperand::MOInvariant);
\r
2449 P.getNode()->setIROrder(idx + 1);
\r
2450 for (unsigned j = 0; j < NumElts; ++j) {
\r
2451 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
\r
2452 DAG.getIntPtrConstant(j, dl));
\r
2453 // We've loaded i1 as an i8 and now must truncate it back to i1
\r
2454 if (EltVT == MVT::i1)
\r
2455 Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
\r
2456 // v2f16 was loaded as an i32. Now we must bitcast it back.
\r
2457 else if (EltVT == MVT::v2f16)
\r
2458 Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
\r
2459 // Extend the element if necesary (e.g. an i8 is loaded
\r
2460 // into an i16 register)
\r
2461 if (Ins[InsIdx].VT.isInteger() &&
\r
2462 Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) {
\r
2463 unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
\r
2464 : ISD::ZERO_EXTEND;
\r
2465 Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
\r
2467 InVals.push_back(Elt);
\r
2470 // Reset vector tracking state.
\r
2475 if (VTs.size() > 0)
\r
2480 // Param has ByVal attribute
\r
2481 // Return MoveParam(param symbol).
\r
2482 // Ideally, the param symbol can be returned directly,
\r
2483 // but when SDNode builder decides to use it in a CopyToReg(),
\r
2484 // machine instruction fails because TargetExternalSymbol
\r
2485 // (not lowered) is target dependent, and CopyToReg assumes
\r
2486 // the source is lowered.
\r
2487 EVT ObjectVT = getValueType(DL, Ty);
\r
2488 assert(ObjectVT == Ins[InsIdx].VT &&
\r
2489 "Ins type did not match function type");
\r
2490 SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
\r
2491 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
\r
2493 p.getNode()->setIROrder(idx + 1);
\r
2494 InVals.push_back(p);
\r
2497 // Clang will check explicit VarArg and issue error if any. However, Clang
\r
2498 // will let code with
\r
2499 // implicit var arg like f() pass. See bug 617733.
\r
2500 // We treat this case as if the arg list is empty.
\r
2501 // if (F.isVarArg()) {
\r
2502 // assert(0 && "VarArg not supported yet!");
\r
2505 if (!OutChains.empty())
\r
2506 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
\r
2512 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
\r
2514 const SmallVectorImpl<ISD::OutputArg> &Outs,
\r
2515 const SmallVectorImpl<SDValue> &OutVals,
\r
2516 const SDLoc &dl, SelectionDAG &DAG) const {
\r
2517 MachineFunction &MF = DAG.getMachineFunction();
\r
2518 Type *RetTy = MF.getFunction()->getReturnType();
\r
2520 bool isABI = (STI.getSmVersion() >= 20);
\r
2521 assert(isABI && "Non-ABI compilation is not supported");
\r
2525 const DataLayout DL = DAG.getDataLayout();
\r
2526 SmallVector<EVT, 16> VTs;
\r
2527 SmallVector<uint64_t, 16> Offsets;
\r
2528 ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
\r
2529 assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
\r
2531 auto VectorInfo = VectorizePTXValueVTs(
\r
2532 VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1);
\r
2534 // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
\r
2535 // 32-bits are sign extended or zero extended, depending on whether
\r
2536 // they are signed or unsigned types.
\r
2537 bool ExtendIntegerRetVal =
\r
2538 RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
\r
2540 SmallVector<SDValue, 6> StoreOperands;
\r
2541 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
\r
2542 // New load/store. Record chain and offset operands.
\r
2543 if (VectorInfo[i] & PVF_FIRST) {
\r
2544 assert(StoreOperands.empty() && "Orphaned operand list.");
\r
2545 StoreOperands.push_back(Chain);
\r
2546 StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
\r
2549 SDValue RetVal = OutVals[i];
\r
2550 if (ExtendIntegerRetVal) {
\r
2551 RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
\r
2552 : ISD::ZERO_EXTEND,
\r
2553 dl, MVT::i32, RetVal);
\r
2554 } else if (RetVal.getValueSizeInBits() < 16) {
\r
2555 // Use 16-bit registers for small load-stores as it's the
\r
2556 // smallest general purpose register size supported by NVPTX.
\r
2557 RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
\r
2560 // Record the value to return.
\r
2561 StoreOperands.push_back(RetVal);
\r
2563 // That's the last element of this store op.
\r
2564 if (VectorInfo[i] & PVF_LAST) {
\r
2565 NVPTXISD::NodeType Op;
\r
2566 unsigned NumElts = StoreOperands.size() - 2;
\r
2567 switch (NumElts) {
\r
2569 Op = NVPTXISD::StoreRetval;
\r
2572 Op = NVPTXISD::StoreRetvalV2;
\r
2575 Op = NVPTXISD::StoreRetvalV4;
\r
2578 llvm_unreachable("Invalid vector info.");
\r
2581 // Adjust type of load/store op if we've extended the scalar
\r
2583 EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
\r
2584 Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other),
\r
2585 StoreOperands, TheStoreType,
\r
2586 MachinePointerInfo(), /* Align */ 1,
\r
2587 /* Volatile */ false, /* ReadMem */ false,
\r
2588 /* WriteMem */ true, /* Size */ 0);
\r
2589 // Cleanup vector state.
\r
2590 StoreOperands.clear();
\r
2594 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
\r
2597 void NVPTXTargetLowering::LowerAsmOperandForConstraint(
\r
2598 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
\r
2599 SelectionDAG &DAG) const {
\r
2600 if (Constraint.length() > 1)
\r
2603 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
\r
2606 static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
\r
2607 switch (Intrinsic) {
\r
2611 case Intrinsic::nvvm_tex_1d_v4f32_s32:
\r
2612 return NVPTXISD::Tex1DFloatS32;
\r
2613 case Intrinsic::nvvm_tex_1d_v4f32_f32:
\r
2614 return NVPTXISD::Tex1DFloatFloat;
\r
2615 case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
\r
2616 return NVPTXISD::Tex1DFloatFloatLevel;
\r
2617 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
\r
2618 return NVPTXISD::Tex1DFloatFloatGrad;
\r
2619 case Intrinsic::nvvm_tex_1d_v4s32_s32:
\r
2620 return NVPTXISD::Tex1DS32S32;
\r
2621 case Intrinsic::nvvm_tex_1d_v4s32_f32:
\r
2622 return NVPTXISD::Tex1DS32Float;
\r
2623 case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
\r
2624 return NVPTXISD::Tex1DS32FloatLevel;
\r
2625 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
\r
2626 return NVPTXISD::Tex1DS32FloatGrad;
\r
2627 case Intrinsic::nvvm_tex_1d_v4u32_s32:
\r
2628 return NVPTXISD::Tex1DU32S32;
\r
2629 case Intrinsic::nvvm_tex_1d_v4u32_f32:
\r
2630 return NVPTXISD::Tex1DU32Float;
\r
2631 case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
\r
2632 return NVPTXISD::Tex1DU32FloatLevel;
\r
2633 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
\r
2634 return NVPTXISD::Tex1DU32FloatGrad;
\r
2636 case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
\r
2637 return NVPTXISD::Tex1DArrayFloatS32;
\r
2638 case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
\r
2639 return NVPTXISD::Tex1DArrayFloatFloat;
\r
2640 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
\r
2641 return NVPTXISD::Tex1DArrayFloatFloatLevel;
\r
2642 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
\r
2643 return NVPTXISD::Tex1DArrayFloatFloatGrad;
\r
2644 case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
\r
2645 return NVPTXISD::Tex1DArrayS32S32;
\r
2646 case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
\r
2647 return NVPTXISD::Tex1DArrayS32Float;
\r
2648 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
\r
2649 return NVPTXISD::Tex1DArrayS32FloatLevel;
\r
2650 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
\r
2651 return NVPTXISD::Tex1DArrayS32FloatGrad;
\r
2652 case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
\r
2653 return NVPTXISD::Tex1DArrayU32S32;
\r
2654 case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
\r
2655 return NVPTXISD::Tex1DArrayU32Float;
\r
2656 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
\r
2657 return NVPTXISD::Tex1DArrayU32FloatLevel;
\r
2658 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
\r
2659 return NVPTXISD::Tex1DArrayU32FloatGrad;
\r
2661 case Intrinsic::nvvm_tex_2d_v4f32_s32:
\r
2662 return NVPTXISD::Tex2DFloatS32;
\r
2663 case Intrinsic::nvvm_tex_2d_v4f32_f32:
\r
2664 return NVPTXISD::Tex2DFloatFloat;
\r
2665 case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
\r
2666 return NVPTXISD::Tex2DFloatFloatLevel;
\r
2667 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
\r
2668 return NVPTXISD::Tex2DFloatFloatGrad;
\r
2669 case Intrinsic::nvvm_tex_2d_v4s32_s32:
\r
2670 return NVPTXISD::Tex2DS32S32;
\r
2671 case Intrinsic::nvvm_tex_2d_v4s32_f32:
\r
2672 return NVPTXISD::Tex2DS32Float;
\r
2673 case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
\r
2674 return NVPTXISD::Tex2DS32FloatLevel;
\r
2675 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
\r
2676 return NVPTXISD::Tex2DS32FloatGrad;
\r
2677 case Intrinsic::nvvm_tex_2d_v4u32_s32:
\r
2678 return NVPTXISD::Tex2DU32S32;
\r
2679 case Intrinsic::nvvm_tex_2d_v4u32_f32:
\r
2680 return NVPTXISD::Tex2DU32Float;
\r
2681 case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
\r
2682 return NVPTXISD::Tex2DU32FloatLevel;
\r
2683 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
\r
2684 return NVPTXISD::Tex2DU32FloatGrad;
\r
2686 case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
\r
2687 return NVPTXISD::Tex2DArrayFloatS32;
\r
2688 case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
\r
2689 return NVPTXISD::Tex2DArrayFloatFloat;
\r
2690 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
\r
2691 return NVPTXISD::Tex2DArrayFloatFloatLevel;
\r
2692 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
\r
2693 return NVPTXISD::Tex2DArrayFloatFloatGrad;
\r
2694 case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
\r
2695 return NVPTXISD::Tex2DArrayS32S32;
\r
2696 case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
\r
2697 return NVPTXISD::Tex2DArrayS32Float;
\r
2698 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
\r
2699 return NVPTXISD::Tex2DArrayS32FloatLevel;
\r
2700 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
\r
2701 return NVPTXISD::Tex2DArrayS32FloatGrad;
\r
2702 case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
\r
2703 return NVPTXISD::Tex2DArrayU32S32;
\r
2704 case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
\r
2705 return NVPTXISD::Tex2DArrayU32Float;
\r
2706 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
\r
2707 return NVPTXISD::Tex2DArrayU32FloatLevel;
\r
2708 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
\r
2709 return NVPTXISD::Tex2DArrayU32FloatGrad;
\r
2711 case Intrinsic::nvvm_tex_3d_v4f32_s32:
\r
2712 return NVPTXISD::Tex3DFloatS32;
\r
2713 case Intrinsic::nvvm_tex_3d_v4f32_f32:
\r
2714 return NVPTXISD::Tex3DFloatFloat;
\r
2715 case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
\r
2716 return NVPTXISD::Tex3DFloatFloatLevel;
\r
2717 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
\r
2718 return NVPTXISD::Tex3DFloatFloatGrad;
\r
2719 case Intrinsic::nvvm_tex_3d_v4s32_s32:
\r
2720 return NVPTXISD::Tex3DS32S32;
\r
2721 case Intrinsic::nvvm_tex_3d_v4s32_f32:
\r
2722 return NVPTXISD::Tex3DS32Float;
\r
2723 case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
\r
2724 return NVPTXISD::Tex3DS32FloatLevel;
\r
2725 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
\r
2726 return NVPTXISD::Tex3DS32FloatGrad;
\r
2727 case Intrinsic::nvvm_tex_3d_v4u32_s32:
\r
2728 return NVPTXISD::Tex3DU32S32;
\r
2729 case Intrinsic::nvvm_tex_3d_v4u32_f32:
\r
2730 return NVPTXISD::Tex3DU32Float;
\r
2731 case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
\r
2732 return NVPTXISD::Tex3DU32FloatLevel;
\r
2733 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
\r
2734 return NVPTXISD::Tex3DU32FloatGrad;
\r
2736 case Intrinsic::nvvm_tex_cube_v4f32_f32:
\r
2737 return NVPTXISD::TexCubeFloatFloat;
\r
2738 case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
\r
2739 return NVPTXISD::TexCubeFloatFloatLevel;
\r
2740 case Intrinsic::nvvm_tex_cube_v4s32_f32:
\r
2741 return NVPTXISD::TexCubeS32Float;
\r
2742 case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
\r
2743 return NVPTXISD::TexCubeS32FloatLevel;
\r
2744 case Intrinsic::nvvm_tex_cube_v4u32_f32:
\r
2745 return NVPTXISD::TexCubeU32Float;
\r
2746 case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
\r
2747 return NVPTXISD::TexCubeU32FloatLevel;
\r
2749 case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
\r
2750 return NVPTXISD::TexCubeArrayFloatFloat;
\r
2751 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
\r
2752 return NVPTXISD::TexCubeArrayFloatFloatLevel;
\r
2753 case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
\r
2754 return NVPTXISD::TexCubeArrayS32Float;
\r
2755 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
\r
2756 return NVPTXISD::TexCubeArrayS32FloatLevel;
\r
2757 case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
\r
2758 return NVPTXISD::TexCubeArrayU32Float;
\r
2759 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
\r
2760 return NVPTXISD::TexCubeArrayU32FloatLevel;
\r
2762 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
\r
2763 return NVPTXISD::Tld4R2DFloatFloat;
\r
2764 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
\r
2765 return NVPTXISD::Tld4G2DFloatFloat;
\r
2766 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
\r
2767 return NVPTXISD::Tld4B2DFloatFloat;
\r
2768 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
\r
2769 return NVPTXISD::Tld4A2DFloatFloat;
\r
2770 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
\r
2771 return NVPTXISD::Tld4R2DS64Float;
\r
2772 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
\r
2773 return NVPTXISD::Tld4G2DS64Float;
\r
2774 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
\r
2775 return NVPTXISD::Tld4B2DS64Float;
\r
2776 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
\r
2777 return NVPTXISD::Tld4A2DS64Float;
\r
2778 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
\r
2779 return NVPTXISD::Tld4R2DU64Float;
\r
2780 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
\r
2781 return NVPTXISD::Tld4G2DU64Float;
\r
2782 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
\r
2783 return NVPTXISD::Tld4B2DU64Float;
\r
2784 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
\r
2785 return NVPTXISD::Tld4A2DU64Float;
\r
2787 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
\r
2788 return NVPTXISD::TexUnified1DFloatS32;
\r
2789 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
\r
2790 return NVPTXISD::TexUnified1DFloatFloat;
\r
2791 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
\r
2792 return NVPTXISD::TexUnified1DFloatFloatLevel;
\r
2793 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
\r
2794 return NVPTXISD::TexUnified1DFloatFloatGrad;
\r
2795 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
\r
2796 return NVPTXISD::TexUnified1DS32S32;
\r
2797 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
\r
2798 return NVPTXISD::TexUnified1DS32Float;
\r
2799 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
\r
2800 return NVPTXISD::TexUnified1DS32FloatLevel;
\r
2801 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
\r
2802 return NVPTXISD::TexUnified1DS32FloatGrad;
\r
2803 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
\r
2804 return NVPTXISD::TexUnified1DU32S32;
\r
2805 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
\r
2806 return NVPTXISD::TexUnified1DU32Float;
\r
2807 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
\r
2808 return NVPTXISD::TexUnified1DU32FloatLevel;
\r
2809 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
\r
2810 return NVPTXISD::TexUnified1DU32FloatGrad;
\r
2812 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
\r
2813 return NVPTXISD::TexUnified1DArrayFloatS32;
\r
2814 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
\r
2815 return NVPTXISD::TexUnified1DArrayFloatFloat;
\r
2816 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
\r
2817 return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
\r
2818 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
\r
2819 return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
\r
2820 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
\r
2821 return NVPTXISD::TexUnified1DArrayS32S32;
\r
2822 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
\r
2823 return NVPTXISD::TexUnified1DArrayS32Float;
\r
2824 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
\r
2825 return NVPTXISD::TexUnified1DArrayS32FloatLevel;
\r
2826 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
\r
2827 return NVPTXISD::TexUnified1DArrayS32FloatGrad;
\r
2828 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
\r
2829 return NVPTXISD::TexUnified1DArrayU32S32;
\r
2830 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
\r
2831 return NVPTXISD::TexUnified1DArrayU32Float;
\r
2832 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
\r
2833 return NVPTXISD::TexUnified1DArrayU32FloatLevel;
\r
2834 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
\r
2835 return NVPTXISD::TexUnified1DArrayU32FloatGrad;
\r
2837 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
\r
2838 return NVPTXISD::TexUnified2DFloatS32;
\r
2839 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
\r
2840 return NVPTXISD::TexUnified2DFloatFloat;
\r
2841 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
\r
2842 return NVPTXISD::TexUnified2DFloatFloatLevel;
\r
2843 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
\r
2844 return NVPTXISD::TexUnified2DFloatFloatGrad;
\r
2845 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
\r
2846 return NVPTXISD::TexUnified2DS32S32;
\r
2847 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
\r
2848 return NVPTXISD::TexUnified2DS32Float;
\r
2849 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
\r
2850 return NVPTXISD::TexUnified2DS32FloatLevel;
\r
2851 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
\r
2852 return NVPTXISD::TexUnified2DS32FloatGrad;
\r
2853 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
\r
2854 return NVPTXISD::TexUnified2DU32S32;
\r
2855 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
\r
2856 return NVPTXISD::TexUnified2DU32Float;
\r
2857 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
\r
2858 return NVPTXISD::TexUnified2DU32FloatLevel;
\r
2859 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
\r
2860 return NVPTXISD::TexUnified2DU32FloatGrad;
\r
2862 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
\r
2863 return NVPTXISD::TexUnified2DArrayFloatS32;
\r
2864 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
\r
2865 return NVPTXISD::TexUnified2DArrayFloatFloat;
\r
2866 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
\r
2867 return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
\r
2868 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
\r
2869 return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
\r
2870 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
\r
2871 return NVPTXISD::TexUnified2DArrayS32S32;
\r
2872 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
\r
2873 return NVPTXISD::TexUnified2DArrayS32Float;
\r
2874 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
\r
2875 return NVPTXISD::TexUnified2DArrayS32FloatLevel;
\r
2876 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
\r
2877 return NVPTXISD::TexUnified2DArrayS32FloatGrad;
\r
2878 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
\r
2879 return NVPTXISD::TexUnified2DArrayU32S32;
\r
2880 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
\r
2881 return NVPTXISD::TexUnified2DArrayU32Float;
\r
2882 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
\r
2883 return NVPTXISD::TexUnified2DArrayU32FloatLevel;
\r
2884 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
\r
2885 return NVPTXISD::TexUnified2DArrayU32FloatGrad;
\r
2887 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
\r
2888 return NVPTXISD::TexUnified3DFloatS32;
\r
2889 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
\r
2890 return NVPTXISD::TexUnified3DFloatFloat;
\r
2891 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
\r
2892 return NVPTXISD::TexUnified3DFloatFloatLevel;
\r
2893 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
\r
2894 return NVPTXISD::TexUnified3DFloatFloatGrad;
\r
2895 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
\r
2896 return NVPTXISD::TexUnified3DS32S32;
\r
2897 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
\r
2898 return NVPTXISD::TexUnified3DS32Float;
\r
2899 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
\r
2900 return NVPTXISD::TexUnified3DS32FloatLevel;
\r
2901 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
\r
2902 return NVPTXISD::TexUnified3DS32FloatGrad;
\r
2903 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
\r
2904 return NVPTXISD::TexUnified3DU32S32;
\r
2905 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
\r
2906 return NVPTXISD::TexUnified3DU32Float;
\r
2907 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
\r
2908 return NVPTXISD::TexUnified3DU32FloatLevel;
\r
2909 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
\r
2910 return NVPTXISD::TexUnified3DU32FloatGrad;
\r
2912 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
\r
2913 return NVPTXISD::TexUnifiedCubeFloatFloat;
\r
2914 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
\r
2915 return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
\r
2916 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
\r
2917 return NVPTXISD::TexUnifiedCubeS32Float;
\r
2918 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
\r
2919 return NVPTXISD::TexUnifiedCubeS32FloatLevel;
\r
2920 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
\r
2921 return NVPTXISD::TexUnifiedCubeU32Float;
\r
2922 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
\r
2923 return NVPTXISD::TexUnifiedCubeU32FloatLevel;
\r
2925 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
\r
2926 return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
\r
2927 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
\r
2928 return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
\r
2929 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
\r
2930 return NVPTXISD::TexUnifiedCubeArrayS32Float;
\r
2931 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
\r
2932 return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
\r
2933 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
\r
2934 return NVPTXISD::TexUnifiedCubeArrayU32Float;
\r
2935 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
\r
2936 return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
\r
2938 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
\r
2939 return NVPTXISD::Tld4UnifiedR2DFloatFloat;
\r
2940 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
\r
2941 return NVPTXISD::Tld4UnifiedG2DFloatFloat;
\r
2942 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
\r
2943 return NVPTXISD::Tld4UnifiedB2DFloatFloat;
\r
2944 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
\r
2945 return NVPTXISD::Tld4UnifiedA2DFloatFloat;
\r
2946 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
\r
2947 return NVPTXISD::Tld4UnifiedR2DS64Float;
\r
2948 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
\r
2949 return NVPTXISD::Tld4UnifiedG2DS64Float;
\r
2950 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
\r
2951 return NVPTXISD::Tld4UnifiedB2DS64Float;
\r
2952 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
\r
2953 return NVPTXISD::Tld4UnifiedA2DS64Float;
\r
2954 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
\r
2955 return NVPTXISD::Tld4UnifiedR2DU64Float;
\r
2956 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
\r
2957 return NVPTXISD::Tld4UnifiedG2DU64Float;
\r
2958 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
\r
2959 return NVPTXISD::Tld4UnifiedB2DU64Float;
\r
2960 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
\r
2961 return NVPTXISD::Tld4UnifiedA2DU64Float;
\r
2965 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
\r
2966 switch (Intrinsic) {
\r
2969 case Intrinsic::nvvm_suld_1d_i8_clamp:
\r
2970 return NVPTXISD::Suld1DI8Clamp;
\r
2971 case Intrinsic::nvvm_suld_1d_i16_clamp:
\r
2972 return NVPTXISD::Suld1DI16Clamp;
\r
2973 case Intrinsic::nvvm_suld_1d_i32_clamp:
\r
2974 return NVPTXISD::Suld1DI32Clamp;
\r
2975 case Intrinsic::nvvm_suld_1d_i64_clamp:
\r
2976 return NVPTXISD::Suld1DI64Clamp;
\r
2977 case Intrinsic::nvvm_suld_1d_v2i8_clamp:
\r
2978 return NVPTXISD::Suld1DV2I8Clamp;
\r
2979 case Intrinsic::nvvm_suld_1d_v2i16_clamp:
\r
2980 return NVPTXISD::Suld1DV2I16Clamp;
\r
2981 case Intrinsic::nvvm_suld_1d_v2i32_clamp:
\r
2982 return NVPTXISD::Suld1DV2I32Clamp;
\r
2983 case Intrinsic::nvvm_suld_1d_v2i64_clamp:
\r
2984 return NVPTXISD::Suld1DV2I64Clamp;
\r
2985 case Intrinsic::nvvm_suld_1d_v4i8_clamp:
\r
2986 return NVPTXISD::Suld1DV4I8Clamp;
\r
2987 case Intrinsic::nvvm_suld_1d_v4i16_clamp:
\r
2988 return NVPTXISD::Suld1DV4I16Clamp;
\r
2989 case Intrinsic::nvvm_suld_1d_v4i32_clamp:
\r
2990 return NVPTXISD::Suld1DV4I32Clamp;
\r
2991 case Intrinsic::nvvm_suld_1d_array_i8_clamp:
\r
2992 return NVPTXISD::Suld1DArrayI8Clamp;
\r
2993 case Intrinsic::nvvm_suld_1d_array_i16_clamp:
\r
2994 return NVPTXISD::Suld1DArrayI16Clamp;
\r
2995 case Intrinsic::nvvm_suld_1d_array_i32_clamp:
\r
2996 return NVPTXISD::Suld1DArrayI32Clamp;
\r
2997 case Intrinsic::nvvm_suld_1d_array_i64_clamp:
\r
2998 return NVPTXISD::Suld1DArrayI64Clamp;
\r
2999 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
\r
3000 return NVPTXISD::Suld1DArrayV2I8Clamp;
\r
3001 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
\r
3002 return NVPTXISD::Suld1DArrayV2I16Clamp;
\r
3003 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
\r
3004 return NVPTXISD::Suld1DArrayV2I32Clamp;
\r
3005 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
\r
3006 return NVPTXISD::Suld1DArrayV2I64Clamp;
\r
3007 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
\r
3008 return NVPTXISD::Suld1DArrayV4I8Clamp;
\r
3009 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
\r
3010 return NVPTXISD::Suld1DArrayV4I16Clamp;
\r
3011 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
\r
3012 return NVPTXISD::Suld1DArrayV4I32Clamp;
\r
3013 case Intrinsic::nvvm_suld_2d_i8_clamp:
\r
3014 return NVPTXISD::Suld2DI8Clamp;
\r
3015 case Intrinsic::nvvm_suld_2d_i16_clamp:
\r
3016 return NVPTXISD::Suld2DI16Clamp;
\r
3017 case Intrinsic::nvvm_suld_2d_i32_clamp:
\r
3018 return NVPTXISD::Suld2DI32Clamp;
\r
3019 case Intrinsic::nvvm_suld_2d_i64_clamp:
\r
3020 return NVPTXISD::Suld2DI64Clamp;
\r
3021 case Intrinsic::nvvm_suld_2d_v2i8_clamp:
\r
3022 return NVPTXISD::Suld2DV2I8Clamp;
\r
3023 case Intrinsic::nvvm_suld_2d_v2i16_clamp:
\r
3024 return NVPTXISD::Suld2DV2I16Clamp;
\r
3025 case Intrinsic::nvvm_suld_2d_v2i32_clamp:
\r
3026 return NVPTXISD::Suld2DV2I32Clamp;
\r
3027 case Intrinsic::nvvm_suld_2d_v2i64_clamp:
\r
3028 return NVPTXISD::Suld2DV2I64Clamp;
\r
3029 case Intrinsic::nvvm_suld_2d_v4i8_clamp:
\r
3030 return NVPTXISD::Suld2DV4I8Clamp;
\r
3031 case Intrinsic::nvvm_suld_2d_v4i16_clamp:
\r
3032 return NVPTXISD::Suld2DV4I16Clamp;
\r
3033 case Intrinsic::nvvm_suld_2d_v4i32_clamp:
\r
3034 return NVPTXISD::Suld2DV4I32Clamp;
\r
3035 case Intrinsic::nvvm_suld_2d_array_i8_clamp:
\r
3036 return NVPTXISD::Suld2DArrayI8Clamp;
\r
3037 case Intrinsic::nvvm_suld_2d_array_i16_clamp:
\r
3038 return NVPTXISD::Suld2DArrayI16Clamp;
\r
3039 case Intrinsic::nvvm_suld_2d_array_i32_clamp:
\r
3040 return NVPTXISD::Suld2DArrayI32Clamp;
\r
3041 case Intrinsic::nvvm_suld_2d_array_i64_clamp:
\r
3042 return NVPTXISD::Suld2DArrayI64Clamp;
\r
3043 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
\r
3044 return NVPTXISD::Suld2DArrayV2I8Clamp;
\r
3045 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
\r
3046 return NVPTXISD::Suld2DArrayV2I16Clamp;
\r
3047 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
\r
3048 return NVPTXISD::Suld2DArrayV2I32Clamp;
\r
3049 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
\r
3050 return NVPTXISD::Suld2DArrayV2I64Clamp;
\r
3051 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
\r
3052 return NVPTXISD::Suld2DArrayV4I8Clamp;
\r
3053 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
\r
3054 return NVPTXISD::Suld2DArrayV4I16Clamp;
\r
3055 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
\r
3056 return NVPTXISD::Suld2DArrayV4I32Clamp;
\r
3057 case Intrinsic::nvvm_suld_3d_i8_clamp:
\r
3058 return NVPTXISD::Suld3DI8Clamp;
\r
3059 case Intrinsic::nvvm_suld_3d_i16_clamp:
\r
3060 return NVPTXISD::Suld3DI16Clamp;
\r
3061 case Intrinsic::nvvm_suld_3d_i32_clamp:
\r
3062 return NVPTXISD::Suld3DI32Clamp;
\r
3063 case Intrinsic::nvvm_suld_3d_i64_clamp:
\r
3064 return NVPTXISD::Suld3DI64Clamp;
\r
3065 case Intrinsic::nvvm_suld_3d_v2i8_clamp:
\r
3066 return NVPTXISD::Suld3DV2I8Clamp;
\r
3067 case Intrinsic::nvvm_suld_3d_v2i16_clamp:
\r
3068 return NVPTXISD::Suld3DV2I16Clamp;
\r
3069 case Intrinsic::nvvm_suld_3d_v2i32_clamp:
\r
3070 return NVPTXISD::Suld3DV2I32Clamp;
\r
3071 case Intrinsic::nvvm_suld_3d_v2i64_clamp:
\r
3072 return NVPTXISD::Suld3DV2I64Clamp;
\r
3073 case Intrinsic::nvvm_suld_3d_v4i8_clamp:
\r
3074 return NVPTXISD::Suld3DV4I8Clamp;
\r
3075 case Intrinsic::nvvm_suld_3d_v4i16_clamp:
\r
3076 return NVPTXISD::Suld3DV4I16Clamp;
\r
3077 case Intrinsic::nvvm_suld_3d_v4i32_clamp:
\r
3078 return NVPTXISD::Suld3DV4I32Clamp;
\r
3079 case Intrinsic::nvvm_suld_1d_i8_trap:
\r
3080 return NVPTXISD::Suld1DI8Trap;
\r
3081 case Intrinsic::nvvm_suld_1d_i16_trap:
\r
3082 return NVPTXISD::Suld1DI16Trap;
\r
3083 case Intrinsic::nvvm_suld_1d_i32_trap:
\r
3084 return NVPTXISD::Suld1DI32Trap;
\r
3085 case Intrinsic::nvvm_suld_1d_i64_trap:
\r
3086 return NVPTXISD::Suld1DI64Trap;
\r
3087 case Intrinsic::nvvm_suld_1d_v2i8_trap:
\r
3088 return NVPTXISD::Suld1DV2I8Trap;
\r
3089 case Intrinsic::nvvm_suld_1d_v2i16_trap:
\r
3090 return NVPTXISD::Suld1DV2I16Trap;
\r
3091 case Intrinsic::nvvm_suld_1d_v2i32_trap:
\r
3092 return NVPTXISD::Suld1DV2I32Trap;
\r
3093 case Intrinsic::nvvm_suld_1d_v2i64_trap:
\r
3094 return NVPTXISD::Suld1DV2I64Trap;
\r
3095 case Intrinsic::nvvm_suld_1d_v4i8_trap:
\r
3096 return NVPTXISD::Suld1DV4I8Trap;
\r
3097 case Intrinsic::nvvm_suld_1d_v4i16_trap:
\r
3098 return NVPTXISD::Suld1DV4I16Trap;
\r
3099 case Intrinsic::nvvm_suld_1d_v4i32_trap:
\r
3100 return NVPTXISD::Suld1DV4I32Trap;
\r
3101 case Intrinsic::nvvm_suld_1d_array_i8_trap:
\r
3102 return NVPTXISD::Suld1DArrayI8Trap;
\r
3103 case Intrinsic::nvvm_suld_1d_array_i16_trap:
\r
3104 return NVPTXISD::Suld1DArrayI16Trap;
\r
3105 case Intrinsic::nvvm_suld_1d_array_i32_trap:
\r
3106 return NVPTXISD::Suld1DArrayI32Trap;
\r
3107 case Intrinsic::nvvm_suld_1d_array_i64_trap:
\r
3108 return NVPTXISD::Suld1DArrayI64Trap;
\r
3109 case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
\r
3110 return NVPTXISD::Suld1DArrayV2I8Trap;
\r
3111 case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
\r
3112 return NVPTXISD::Suld1DArrayV2I16Trap;
\r
3113 case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
\r
3114 return NVPTXISD::Suld1DArrayV2I32Trap;
\r
3115 case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
\r
3116 return NVPTXISD::Suld1DArrayV2I64Trap;
\r
3117 case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
\r
3118 return NVPTXISD::Suld1DArrayV4I8Trap;
\r
3119 case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
\r
3120 return NVPTXISD::Suld1DArrayV4I16Trap;
\r
3121 case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
\r
3122 return NVPTXISD::Suld1DArrayV4I32Trap;
\r
3123 case Intrinsic::nvvm_suld_2d_i8_trap:
\r
3124 return NVPTXISD::Suld2DI8Trap;
\r
3125 case Intrinsic::nvvm_suld_2d_i16_trap:
\r
3126 return NVPTXISD::Suld2DI16Trap;
\r
3127 case Intrinsic::nvvm_suld_2d_i32_trap:
\r
3128 return NVPTXISD::Suld2DI32Trap;
\r
3129 case Intrinsic::nvvm_suld_2d_i64_trap:
\r
3130 return NVPTXISD::Suld2DI64Trap;
\r
3131 case Intrinsic::nvvm_suld_2d_v2i8_trap:
\r
3132 return NVPTXISD::Suld2DV2I8Trap;
\r
3133 case Intrinsic::nvvm_suld_2d_v2i16_trap:
\r
3134 return NVPTXISD::Suld2DV2I16Trap;
\r
3135 case Intrinsic::nvvm_suld_2d_v2i32_trap:
\r
3136 return NVPTXISD::Suld2DV2I32Trap;
\r
3137 case Intrinsic::nvvm_suld_2d_v2i64_trap:
\r
3138 return NVPTXISD::Suld2DV2I64Trap;
\r
3139 case Intrinsic::nvvm_suld_2d_v4i8_trap:
\r
3140 return NVPTXISD::Suld2DV4I8Trap;
\r
3141 case Intrinsic::nvvm_suld_2d_v4i16_trap:
\r
3142 return NVPTXISD::Suld2DV4I16Trap;
\r
3143 case Intrinsic::nvvm_suld_2d_v4i32_trap:
\r
3144 return NVPTXISD::Suld2DV4I32Trap;
\r
3145 case Intrinsic::nvvm_suld_2d_array_i8_trap:
\r
3146 return NVPTXISD::Suld2DArrayI8Trap;
\r
3147 case Intrinsic::nvvm_suld_2d_array_i16_trap:
\r
3148 return NVPTXISD::Suld2DArrayI16Trap;
\r
3149 case Intrinsic::nvvm_suld_2d_array_i32_trap:
\r
3150 return NVPTXISD::Suld2DArrayI32Trap;
\r
3151 case Intrinsic::nvvm_suld_2d_array_i64_trap:
\r
3152 return NVPTXISD::Suld2DArrayI64Trap;
\r
3153 case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
\r
3154 return NVPTXISD::Suld2DArrayV2I8Trap;
\r
3155 case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
\r
3156 return NVPTXISD::Suld2DArrayV2I16Trap;
\r
3157 case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
\r
3158 return NVPTXISD::Suld2DArrayV2I32Trap;
\r
3159 case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
\r
3160 return NVPTXISD::Suld2DArrayV2I64Trap;
\r
3161 case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
\r
3162 return NVPTXISD::Suld2DArrayV4I8Trap;
\r
3163 case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
\r
3164 return NVPTXISD::Suld2DArrayV4I16Trap;
\r
3165 case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
\r
3166 return NVPTXISD::Suld2DArrayV4I32Trap;
\r
3167 case Intrinsic::nvvm_suld_3d_i8_trap:
\r
3168 return NVPTXISD::Suld3DI8Trap;
\r
3169 case Intrinsic::nvvm_suld_3d_i16_trap:
\r
3170 return NVPTXISD::Suld3DI16Trap;
\r
3171 case Intrinsic::nvvm_suld_3d_i32_trap:
\r
3172 return NVPTXISD::Suld3DI32Trap;
\r
3173 case Intrinsic::nvvm_suld_3d_i64_trap:
\r
3174 return NVPTXISD::Suld3DI64Trap;
\r
3175 case Intrinsic::nvvm_suld_3d_v2i8_trap:
\r
3176 return NVPTXISD::Suld3DV2I8Trap;
\r
3177 case Intrinsic::nvvm_suld_3d_v2i16_trap:
\r
3178 return NVPTXISD::Suld3DV2I16Trap;
\r
3179 case Intrinsic::nvvm_suld_3d_v2i32_trap:
\r
3180 return NVPTXISD::Suld3DV2I32Trap;
\r
3181 case Intrinsic::nvvm_suld_3d_v2i64_trap:
\r
3182 return NVPTXISD::Suld3DV2I64Trap;
\r
3183 case Intrinsic::nvvm_suld_3d_v4i8_trap:
\r
3184 return NVPTXISD::Suld3DV4I8Trap;
\r
3185 case Intrinsic::nvvm_suld_3d_v4i16_trap:
\r
3186 return NVPTXISD::Suld3DV4I16Trap;
\r
3187 case Intrinsic::nvvm_suld_3d_v4i32_trap:
\r
3188 return NVPTXISD::Suld3DV4I32Trap;
\r
3189 case Intrinsic::nvvm_suld_1d_i8_zero:
\r
3190 return NVPTXISD::Suld1DI8Zero;
\r
3191 case Intrinsic::nvvm_suld_1d_i16_zero:
\r
3192 return NVPTXISD::Suld1DI16Zero;
\r
3193 case Intrinsic::nvvm_suld_1d_i32_zero:
\r
3194 return NVPTXISD::Suld1DI32Zero;
\r
3195 case Intrinsic::nvvm_suld_1d_i64_zero:
\r
3196 return NVPTXISD::Suld1DI64Zero;
\r
3197 case Intrinsic::nvvm_suld_1d_v2i8_zero:
\r
3198 return NVPTXISD::Suld1DV2I8Zero;
\r
3199 case Intrinsic::nvvm_suld_1d_v2i16_zero:
\r
3200 return NVPTXISD::Suld1DV2I16Zero;
\r
3201 case Intrinsic::nvvm_suld_1d_v2i32_zero:
\r
3202 return NVPTXISD::Suld1DV2I32Zero;
\r
3203 case Intrinsic::nvvm_suld_1d_v2i64_zero:
\r
3204 return NVPTXISD::Suld1DV2I64Zero;
\r
3205 case Intrinsic::nvvm_suld_1d_v4i8_zero:
\r
3206 return NVPTXISD::Suld1DV4I8Zero;
\r
3207 case Intrinsic::nvvm_suld_1d_v4i16_zero:
\r
3208 return NVPTXISD::Suld1DV4I16Zero;
\r
3209 case Intrinsic::nvvm_suld_1d_v4i32_zero:
\r
3210 return NVPTXISD::Suld1DV4I32Zero;
\r
3211 case Intrinsic::nvvm_suld_1d_array_i8_zero:
\r
3212 return NVPTXISD::Suld1DArrayI8Zero;
\r
3213 case Intrinsic::nvvm_suld_1d_array_i16_zero:
\r
3214 return NVPTXISD::Suld1DArrayI16Zero;
\r
3215 case Intrinsic::nvvm_suld_1d_array_i32_zero:
\r
3216 return NVPTXISD::Suld1DArrayI32Zero;
\r
3217 case Intrinsic::nvvm_suld_1d_array_i64_zero:
\r
3218 return NVPTXISD::Suld1DArrayI64Zero;
\r
3219 case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
\r
3220 return NVPTXISD::Suld1DArrayV2I8Zero;
\r
3221 case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
\r
3222 return NVPTXISD::Suld1DArrayV2I16Zero;
\r
3223 case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
\r
3224 return NVPTXISD::Suld1DArrayV2I32Zero;
\r
3225 case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
\r
3226 return NVPTXISD::Suld1DArrayV2I64Zero;
\r
3227 case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
\r
3228 return NVPTXISD::Suld1DArrayV4I8Zero;
\r
3229 case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
\r
3230 return NVPTXISD::Suld1DArrayV4I16Zero;
\r
3231 case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
\r
3232 return NVPTXISD::Suld1DArrayV4I32Zero;
\r
3233 case Intrinsic::nvvm_suld_2d_i8_zero:
\r
3234 return NVPTXISD::Suld2DI8Zero;
\r
3235 case Intrinsic::nvvm_suld_2d_i16_zero:
\r
3236 return NVPTXISD::Suld2DI16Zero;
\r
3237 case Intrinsic::nvvm_suld_2d_i32_zero:
\r
3238 return NVPTXISD::Suld2DI32Zero;
\r
3239 case Intrinsic::nvvm_suld_2d_i64_zero:
\r
3240 return NVPTXISD::Suld2DI64Zero;
\r
3241 case Intrinsic::nvvm_suld_2d_v2i8_zero:
\r
3242 return NVPTXISD::Suld2DV2I8Zero;
\r
3243 case Intrinsic::nvvm_suld_2d_v2i16_zero:
\r
3244 return NVPTXISD::Suld2DV2I16Zero;
\r
3245 case Intrinsic::nvvm_suld_2d_v2i32_zero:
\r
3246 return NVPTXISD::Suld2DV2I32Zero;
\r
3247 case Intrinsic::nvvm_suld_2d_v2i64_zero:
\r
3248 return NVPTXISD::Suld2DV2I64Zero;
\r
3249 case Intrinsic::nvvm_suld_2d_v4i8_zero:
\r
3250 return NVPTXISD::Suld2DV4I8Zero;
\r
3251 case Intrinsic::nvvm_suld_2d_v4i16_zero:
\r
3252 return NVPTXISD::Suld2DV4I16Zero;
\r
3253 case Intrinsic::nvvm_suld_2d_v4i32_zero:
\r
3254 return NVPTXISD::Suld2DV4I32Zero;
\r
3255 case Intrinsic::nvvm_suld_2d_array_i8_zero:
\r
3256 return NVPTXISD::Suld2DArrayI8Zero;
\r
3257 case Intrinsic::nvvm_suld_2d_array_i16_zero:
\r
3258 return NVPTXISD::Suld2DArrayI16Zero;
\r
3259 case Intrinsic::nvvm_suld_2d_array_i32_zero:
\r
3260 return NVPTXISD::Suld2DArrayI32Zero;
\r
3261 case Intrinsic::nvvm_suld_2d_array_i64_zero:
\r
3262 return NVPTXISD::Suld2DArrayI64Zero;
\r
3263 case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
\r
3264 return NVPTXISD::Suld2DArrayV2I8Zero;
\r
3265 case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
\r
3266 return NVPTXISD::Suld2DArrayV2I16Zero;
\r
3267 case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
\r
3268 return NVPTXISD::Suld2DArrayV2I32Zero;
\r
3269 case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
\r
3270 return NVPTXISD::Suld2DArrayV2I64Zero;
\r
3271 case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
\r
3272 return NVPTXISD::Suld2DArrayV4I8Zero;
\r
3273 case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
\r
3274 return NVPTXISD::Suld2DArrayV4I16Zero;
\r
3275 case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
\r
3276 return NVPTXISD::Suld2DArrayV4I32Zero;
\r
3277 case Intrinsic::nvvm_suld_3d_i8_zero:
\r
3278 return NVPTXISD::Suld3DI8Zero;
\r
3279 case Intrinsic::nvvm_suld_3d_i16_zero:
\r
3280 return NVPTXISD::Suld3DI16Zero;
\r
3281 case Intrinsic::nvvm_suld_3d_i32_zero:
\r
3282 return NVPTXISD::Suld3DI32Zero;
\r
3283 case Intrinsic::nvvm_suld_3d_i64_zero:
\r
3284 return NVPTXISD::Suld3DI64Zero;
\r
3285 case Intrinsic::nvvm_suld_3d_v2i8_zero:
\r
3286 return NVPTXISD::Suld3DV2I8Zero;
\r
3287 case Intrinsic::nvvm_suld_3d_v2i16_zero:
\r
3288 return NVPTXISD::Suld3DV2I16Zero;
\r
3289 case Intrinsic::nvvm_suld_3d_v2i32_zero:
\r
3290 return NVPTXISD::Suld3DV2I32Zero;
\r
3291 case Intrinsic::nvvm_suld_3d_v2i64_zero:
\r
3292 return NVPTXISD::Suld3DV2I64Zero;
\r
3293 case Intrinsic::nvvm_suld_3d_v4i8_zero:
\r
3294 return NVPTXISD::Suld3DV4I8Zero;
\r
3295 case Intrinsic::nvvm_suld_3d_v4i16_zero:
\r
3296 return NVPTXISD::Suld3DV4I16Zero;
\r
3297 case Intrinsic::nvvm_suld_3d_v4i32_zero:
\r
3298 return NVPTXISD::Suld3DV4I32Zero;
\r
3302 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
\r
3303 // TgtMemIntrinsic
\r
3304 // because we need the information that is only available in the "Value" type
\r
3306 // pointer. In particular, the address space information.
\r
3307 bool NVPTXTargetLowering::getTgtMemIntrinsic(
\r
3308 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
\r
3309 switch (Intrinsic) {
\r
3313 case Intrinsic::nvvm_atomic_load_add_f32:
\r
3314 case Intrinsic::nvvm_atomic_load_inc_32:
\r
3315 case Intrinsic::nvvm_atomic_load_dec_32:
\r
3317 case Intrinsic::nvvm_atomic_add_gen_f_cta:
\r
3318 case Intrinsic::nvvm_atomic_add_gen_f_sys:
\r
3319 case Intrinsic::nvvm_atomic_add_gen_i_cta:
\r
3320 case Intrinsic::nvvm_atomic_add_gen_i_sys:
\r
3321 case Intrinsic::nvvm_atomic_and_gen_i_cta:
\r
3322 case Intrinsic::nvvm_atomic_and_gen_i_sys:
\r
3323 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
\r
3324 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
\r
3325 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
\r
3326 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
\r
3327 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
\r
3328 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
\r
3329 case Intrinsic::nvvm_atomic_max_gen_i_cta:
\r
3330 case Intrinsic::nvvm_atomic_max_gen_i_sys:
\r
3331 case Intrinsic::nvvm_atomic_min_gen_i_cta:
\r
3332 case Intrinsic::nvvm_atomic_min_gen_i_sys:
\r
3333 case Intrinsic::nvvm_atomic_or_gen_i_cta:
\r
3334 case Intrinsic::nvvm_atomic_or_gen_i_sys:
\r
3335 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
\r
3336 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
\r
3337 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
\r
3338 case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
\r
3339 auto &DL = I.getModule()->getDataLayout();
\r
3340 Info.opc = ISD::INTRINSIC_W_CHAIN;
\r
3341 Info.memVT = getValueType(DL, I.getType());
\r
3342 Info.ptrVal = I.getArgOperand(0);
\r
3345 Info.readMem = true;
\r
3346 Info.writeMem = true;
\r
3351 case Intrinsic::nvvm_ldu_global_i:
\r
3352 case Intrinsic::nvvm_ldu_global_f:
\r
3353 case Intrinsic::nvvm_ldu_global_p: {
\r
3354 auto &DL = I.getModule()->getDataLayout();
\r
3355 Info.opc = ISD::INTRINSIC_W_CHAIN;
\r
3356 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
\r
3357 Info.memVT = getValueType(DL, I.getType());
\r
3358 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
\r
3359 Info.memVT = getPointerTy(DL);
\r
3361 Info.memVT = getValueType(DL, I.getType());
\r
3362 Info.ptrVal = I.getArgOperand(0);
\r
3365 Info.readMem = true;
\r
3366 Info.writeMem = false;
\r
3367 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
\r
3371 case Intrinsic::nvvm_ldg_global_i:
\r
3372 case Intrinsic::nvvm_ldg_global_f:
\r
3373 case Intrinsic::nvvm_ldg_global_p: {
\r
3374 auto &DL = I.getModule()->getDataLayout();
\r
3376 Info.opc = ISD::INTRINSIC_W_CHAIN;
\r
3377 if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
\r
3378 Info.memVT = getValueType(DL, I.getType());
\r
3379 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
\r
3380 Info.memVT = getPointerTy(DL);
\r
3382 Info.memVT = getValueType(DL, I.getType());
\r
3383 Info.ptrVal = I.getArgOperand(0);
\r
3386 Info.readMem = true;
\r
3387 Info.writeMem = false;
\r
3388 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
\r
3393 case Intrinsic::nvvm_tex_1d_v4f32_s32:
\r
3394 case Intrinsic::nvvm_tex_1d_v4f32_f32:
\r
3395 case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
\r
3396 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
\r
3397 case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
\r
3398 case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
\r
3399 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
\r
3400 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
\r
3401 case Intrinsic::nvvm_tex_2d_v4f32_s32:
\r
3402 case Intrinsic::nvvm_tex_2d_v4f32_f32:
\r
3403 case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
\r
3404 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
\r
3405 case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
\r
3406 case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
\r
3407 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
\r
3408 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
\r
3409 case Intrinsic::nvvm_tex_3d_v4f32_s32:
\r
3410 case Intrinsic::nvvm_tex_3d_v4f32_f32:
\r
3411 case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
\r
3412 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
\r
3413 case Intrinsic::nvvm_tex_cube_v4f32_f32:
\r
3414 case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
\r
3415 case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
\r
3416 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
\r
3417 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
\r
3418 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
\r
3419 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
\r
3420 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
\r
3421 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
\r
3422 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
\r
3423 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
\r
3424 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
\r
3425 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
\r
3426 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
\r
3427 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
\r
3428 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
\r
3429 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
\r
3430 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
\r
3431 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
\r
3432 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
\r
3433 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
\r
3434 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
\r
3435 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
\r
3436 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
\r
3437 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
\r
3438 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
\r
3439 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
\r
3440 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
\r
3441 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
\r
3442 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
\r
3443 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
\r
3444 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
\r
3445 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
\r
3446 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
\r
3447 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
\r
3448 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
\r
3449 Info.opc = getOpcForTextureInstr(Intrinsic);
\r
3450 Info.memVT = MVT::v4f32;
\r
3451 Info.ptrVal = nullptr;
\r
3454 Info.readMem = true;
\r
3455 Info.writeMem = false;
\r
3459 case Intrinsic::nvvm_tex_1d_v4s32_s32:
\r
3460 case Intrinsic::nvvm_tex_1d_v4s32_f32:
\r
3461 case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
\r
3462 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
\r
3463 case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
\r
3464 case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
\r
3465 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
\r
3466 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
\r
3467 case Intrinsic::nvvm_tex_2d_v4s32_s32:
\r
3468 case Intrinsic::nvvm_tex_2d_v4s32_f32:
\r
3469 case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
\r
3470 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
\r
3471 case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
\r
3472 case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
\r
3473 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
\r
3474 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
\r
3475 case Intrinsic::nvvm_tex_3d_v4s32_s32:
\r
3476 case Intrinsic::nvvm_tex_3d_v4s32_f32:
\r
3477 case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
\r
3478 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
\r
3479 case Intrinsic::nvvm_tex_cube_v4s32_f32:
\r
3480 case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
\r
3481 case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
\r
3482 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
\r
3483 case Intrinsic::nvvm_tex_cube_v4u32_f32:
\r
3484 case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
\r
3485 case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
\r
3486 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
\r
3487 case Intrinsic::nvvm_tex_1d_v4u32_s32:
\r
3488 case Intrinsic::nvvm_tex_1d_v4u32_f32:
\r
3489 case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
\r
3490 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
\r
3491 case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
\r
3492 case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
\r
3493 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
\r
3494 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
\r
3495 case Intrinsic::nvvm_tex_2d_v4u32_s32:
\r
3496 case Intrinsic::nvvm_tex_2d_v4u32_f32:
\r
3497 case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
\r
3498 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
\r
3499 case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
\r
3500 case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
\r
3501 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
\r
3502 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
\r
3503 case Intrinsic::nvvm_tex_3d_v4u32_s32:
\r
3504 case Intrinsic::nvvm_tex_3d_v4u32_f32:
\r
3505 case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
\r
3506 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
\r
3507 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
\r
3508 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
\r
3509 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
\r
3510 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
\r
3511 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
\r
3512 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
\r
3513 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
\r
3514 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
\r
3515 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
\r
3516 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
\r
3517 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
\r
3518 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
\r
3519 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
\r
3520 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
\r
3521 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
\r
3522 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
\r
3523 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
\r
3524 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
\r
3525 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
\r
3526 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
\r
3527 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
\r
3528 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
\r
3529 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
\r
3530 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
\r
3531 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
\r
3532 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
\r
3533 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
\r
3534 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
\r
3535 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
\r
3536 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
\r
3537 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
\r
3538 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
\r
3539 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
\r
3540 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
\r
3541 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
\r
3542 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
\r
3543 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
\r
3544 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
\r
3545 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
\r
3546 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
\r
3547 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
\r
3548 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
\r
3549 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
\r
3550 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
\r
3551 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
\r
3552 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
\r
3553 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
\r
3554 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
\r
3555 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
\r
3556 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
\r
3557 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
\r
3558 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
\r
3559 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
\r
3560 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
\r
3561 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
\r
3562 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
\r
3563 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
\r
3564 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
\r
3565 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
\r
3566 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
\r
3567 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
\r
3568 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
\r
3569 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
\r
3570 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
\r
3571 Info.opc = getOpcForTextureInstr(Intrinsic);
\r
3572 Info.memVT = MVT::v4i32;
\r
3573 Info.ptrVal = nullptr;
\r
3576 Info.readMem = true;
\r
3577 Info.writeMem = false;
\r
3581 case Intrinsic::nvvm_suld_1d_i8_clamp:
\r
3582 case Intrinsic::nvvm_suld_1d_v2i8_clamp:
\r
3583 case Intrinsic::nvvm_suld_1d_v4i8_clamp:
\r
3584 case Intrinsic::nvvm_suld_1d_array_i8_clamp:
\r
3585 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
\r
3586 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
\r
3587 case Intrinsic::nvvm_suld_2d_i8_clamp:
\r
3588 case Intrinsic::nvvm_suld_2d_v2i8_clamp:
\r
3589 case Intrinsic::nvvm_suld_2d_v4i8_clamp:
\r
3590 case Intrinsic::nvvm_suld_2d_array_i8_clamp:
\r
3591 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
\r
3592 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
\r
3593 case Intrinsic::nvvm_suld_3d_i8_clamp:
\r
3594 case Intrinsic::nvvm_suld_3d_v2i8_clamp:
\r
3595 case Intrinsic::nvvm_suld_3d_v4i8_clamp:
\r
3596 case Intrinsic::nvvm_suld_1d_i8_trap:
\r
3597 case Intrinsic::nvvm_suld_1d_v2i8_trap:
\r
3598 case Intrinsic::nvvm_suld_1d_v4i8_trap:
\r
3599 case Intrinsic::nvvm_suld_1d_array_i8_trap:
\r
3600 case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
\r
3601 case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
\r
3602 case Intrinsic::nvvm_suld_2d_i8_trap:
\r
3603 case Intrinsic::nvvm_suld_2d_v2i8_trap:
\r
3604 case Intrinsic::nvvm_suld_2d_v4i8_trap:
\r
3605 case Intrinsic::nvvm_suld_2d_array_i8_trap:
\r
3606 case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
\r
3607 case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
\r
3608 case Intrinsic::nvvm_suld_3d_i8_trap:
\r
3609 case Intrinsic::nvvm_suld_3d_v2i8_trap:
\r
3610 case Intrinsic::nvvm_suld_3d_v4i8_trap:
\r
3611 case Intrinsic::nvvm_suld_1d_i8_zero:
\r
3612 case Intrinsic::nvvm_suld_1d_v2i8_zero:
\r
3613 case Intrinsic::nvvm_suld_1d_v4i8_zero:
\r
3614 case Intrinsic::nvvm_suld_1d_array_i8_zero:
\r
3615 case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
\r
3616 case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
\r
3617 case Intrinsic::nvvm_suld_2d_i8_zero:
\r
3618 case Intrinsic::nvvm_suld_2d_v2i8_zero:
\r
3619 case Intrinsic::nvvm_suld_2d_v4i8_zero:
\r
3620 case Intrinsic::nvvm_suld_2d_array_i8_zero:
\r
3621 case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
\r
3622 case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
\r
3623 case Intrinsic::nvvm_suld_3d_i8_zero:
\r
3624 case Intrinsic::nvvm_suld_3d_v2i8_zero:
\r
3625 case Intrinsic::nvvm_suld_3d_v4i8_zero:
\r
3626 Info.opc = getOpcForSurfaceInstr(Intrinsic);
\r
3627 Info.memVT = MVT::i8;
\r
3628 Info.ptrVal = nullptr;
\r
3631 Info.readMem = true;
\r
3632 Info.writeMem = false;
\r
3636 case Intrinsic::nvvm_suld_1d_i16_clamp:
\r
3637 case Intrinsic::nvvm_suld_1d_v2i16_clamp:
\r
3638 case Intrinsic::nvvm_suld_1d_v4i16_clamp:
\r
3639 case Intrinsic::nvvm_suld_1d_array_i16_clamp:
\r
3640 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
\r
3641 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
\r
3642 case Intrinsic::nvvm_suld_2d_i16_clamp:
\r
3643 case Intrinsic::nvvm_suld_2d_v2i16_clamp:
\r
3644 case Intrinsic::nvvm_suld_2d_v4i16_clamp:
\r
3645 case Intrinsic::nvvm_suld_2d_array_i16_clamp:
\r
3646 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
\r
3647 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
\r
3648 case Intrinsic::nvvm_suld_3d_i16_clamp:
\r
3649 case Intrinsic::nvvm_suld_3d_v2i16_clamp:
\r
3650 case Intrinsic::nvvm_suld_3d_v4i16_clamp:
\r
3651 case Intrinsic::nvvm_suld_1d_i16_trap:
\r
3652 case Intrinsic::nvvm_suld_1d_v2i16_trap:
\r
3653 case Intrinsic::nvvm_suld_1d_v4i16_trap:
\r
3654 case Intrinsic::nvvm_suld_1d_array_i16_trap:
\r
3655 case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
\r
3656 case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
\r
3657 case Intrinsic::nvvm_suld_2d_i16_trap:
\r
3658 case Intrinsic::nvvm_suld_2d_v2i16_trap:
\r
3659 case Intrinsic::nvvm_suld_2d_v4i16_trap:
\r
3660 case Intrinsic::nvvm_suld_2d_array_i16_trap:
\r
3661 case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
\r
3662 case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
\r
3663 case Intrinsic::nvvm_suld_3d_i16_trap:
\r
3664 case Intrinsic::nvvm_suld_3d_v2i16_trap:
\r
3665 case Intrinsic::nvvm_suld_3d_v4i16_trap:
\r
3666 case Intrinsic::nvvm_suld_1d_i16_zero:
\r
3667 case Intrinsic::nvvm_suld_1d_v2i16_zero:
\r
3668 case Intrinsic::nvvm_suld_1d_v4i16_zero:
\r
3669 case Intrinsic::nvvm_suld_1d_array_i16_zero:
\r
3670 case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
\r
3671 case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
\r
3672 case Intrinsic::nvvm_suld_2d_i16_zero:
\r
3673 case Intrinsic::nvvm_suld_2d_v2i16_zero:
\r
3674 case Intrinsic::nvvm_suld_2d_v4i16_zero:
\r
3675 case Intrinsic::nvvm_suld_2d_array_i16_zero:
\r
3676 case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
\r
3677 case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
\r
3678 case Intrinsic::nvvm_suld_3d_i16_zero:
\r
3679 case Intrinsic::nvvm_suld_3d_v2i16_zero:
\r
3680 case Intrinsic::nvvm_suld_3d_v4i16_zero:
\r
3681 Info.opc = getOpcForSurfaceInstr(Intrinsic);
\r
3682 Info.memVT = MVT::i16;
\r
3683 Info.ptrVal = nullptr;
\r
3686 Info.readMem = true;
\r
3687 Info.writeMem = false;
\r
3691 case Intrinsic::nvvm_suld_1d_i32_clamp:
\r
3692 case Intrinsic::nvvm_suld_1d_v2i32_clamp:
\r
3693 case Intrinsic::nvvm_suld_1d_v4i32_clamp:
\r
3694 case Intrinsic::nvvm_suld_1d_array_i32_clamp:
\r
3695 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
\r
3696 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
\r
3697 case Intrinsic::nvvm_suld_2d_i32_clamp:
\r
3698 case Intrinsic::nvvm_suld_2d_v2i32_clamp:
\r
3699 case Intrinsic::nvvm_suld_2d_v4i32_clamp:
\r
3700 case Intrinsic::nvvm_suld_2d_array_i32_clamp:
\r
3701 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
\r
3702 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
\r
3703 case Intrinsic::nvvm_suld_3d_i32_clamp:
\r
3704 case Intrinsic::nvvm_suld_3d_v2i32_clamp:
\r
3705 case Intrinsic::nvvm_suld_3d_v4i32_clamp:
\r
3706 case Intrinsic::nvvm_suld_1d_i32_trap:
\r
3707 case Intrinsic::nvvm_suld_1d_v2i32_trap:
\r
3708 case Intrinsic::nvvm_suld_1d_v4i32_trap:
\r
3709 case Intrinsic::nvvm_suld_1d_array_i32_trap:
\r
3710 case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
\r
3711 case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
\r
3712 case Intrinsic::nvvm_suld_2d_i32_trap:
\r
3713 case Intrinsic::nvvm_suld_2d_v2i32_trap:
\r
3714 case Intrinsic::nvvm_suld_2d_v4i32_trap:
\r
3715 case Intrinsic::nvvm_suld_2d_array_i32_trap:
\r
3716 case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
\r
3717 case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
\r
3718 case Intrinsic::nvvm_suld_3d_i32_trap:
\r
3719 case Intrinsic::nvvm_suld_3d_v2i32_trap:
\r
3720 case Intrinsic::nvvm_suld_3d_v4i32_trap:
\r
3721 case Intrinsic::nvvm_suld_1d_i32_zero:
\r
3722 case Intrinsic::nvvm_suld_1d_v2i32_zero:
\r
3723 case Intrinsic::nvvm_suld_1d_v4i32_zero:
\r
3724 case Intrinsic::nvvm_suld_1d_array_i32_zero:
\r
3725 case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
\r
3726 case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
\r
3727 case Intrinsic::nvvm_suld_2d_i32_zero:
\r
3728 case Intrinsic::nvvm_suld_2d_v2i32_zero:
\r
3729 case Intrinsic::nvvm_suld_2d_v4i32_zero:
\r
3730 case Intrinsic::nvvm_suld_2d_array_i32_zero:
\r
3731 case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
\r
3732 case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
\r
3733 case Intrinsic::nvvm_suld_3d_i32_zero:
\r
3734 case Intrinsic::nvvm_suld_3d_v2i32_zero:
\r
3735 case Intrinsic::nvvm_suld_3d_v4i32_zero:
\r
3736 Info.opc = getOpcForSurfaceInstr(Intrinsic);
\r
3737 Info.memVT = MVT::i32;
\r
3738 Info.ptrVal = nullptr;
\r
3741 Info.readMem = true;
\r
3742 Info.writeMem = false;
\r
3746 case Intrinsic::nvvm_suld_1d_i64_clamp:
\r
3747 case Intrinsic::nvvm_suld_1d_v2i64_clamp:
\r
3748 case Intrinsic::nvvm_suld_1d_array_i64_clamp:
\r
3749 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
\r
3750 case Intrinsic::nvvm_suld_2d_i64_clamp:
\r
3751 case Intrinsic::nvvm_suld_2d_v2i64_clamp:
\r
3752 case Intrinsic::nvvm_suld_2d_array_i64_clamp:
\r
3753 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
\r
3754 case Intrinsic::nvvm_suld_3d_i64_clamp:
\r
3755 case Intrinsic::nvvm_suld_3d_v2i64_clamp:
\r
3756 case Intrinsic::nvvm_suld_1d_i64_trap:
\r
3757 case Intrinsic::nvvm_suld_1d_v2i64_trap:
\r
3758 case Intrinsic::nvvm_suld_1d_array_i64_trap:
\r
3759 case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
\r
3760 case Intrinsic::nvvm_suld_2d_i64_trap:
\r
3761 case Intrinsic::nvvm_suld_2d_v2i64_trap:
\r
3762 case Intrinsic::nvvm_suld_2d_array_i64_trap:
\r
3763 case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
\r
3764 case Intrinsic::nvvm_suld_3d_i64_trap:
\r
3765 case Intrinsic::nvvm_suld_3d_v2i64_trap:
\r
3766 case Intrinsic::nvvm_suld_1d_i64_zero:
\r
3767 case Intrinsic::nvvm_suld_1d_v2i64_zero:
\r
3768 case Intrinsic::nvvm_suld_1d_array_i64_zero:
\r
3769 case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
\r
3770 case Intrinsic::nvvm_suld_2d_i64_zero:
\r
3771 case Intrinsic::nvvm_suld_2d_v2i64_zero:
\r
3772 case Intrinsic::nvvm_suld_2d_array_i64_zero:
\r
3773 case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
\r
3774 case Intrinsic::nvvm_suld_3d_i64_zero:
\r
3775 case Intrinsic::nvvm_suld_3d_v2i64_zero:
\r
3776 Info.opc = getOpcForSurfaceInstr(Intrinsic);
\r
3777 Info.memVT = MVT::i64;
\r
3778 Info.ptrVal = nullptr;
\r
3781 Info.readMem = true;
\r
3782 Info.writeMem = false;
\r
3789 /// isLegalAddressingMode - Return true if the addressing mode represented
\r
3790 /// by AM is legal for this target, for a load/store of the specified type.
\r
3791 /// Used to guide target specific optimizations, like loop strength reduction
\r
3792 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
\r
3793 /// (CodeGenPrepare.cpp)
\r
3794 bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
\r
3795 const AddrMode &AM, Type *Ty,
\r
3796 unsigned AS) const {
\r
3797 // AddrMode - This represents an addressing mode of:
\r
3798 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
\r
3800 // The legal address modes are
\r
3803 // - [areg+immoff]
\r
3807 return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
\r
3810 switch (AM.Scale) {
\r
3811 case 0: // "r", "r+i" or "i" is allowed
\r
3814 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
\r
3816 // Otherwise we have r+i.
\r
3819 // No scale > 1 is allowed
\r
3825 //===----------------------------------------------------------------------===//
\r
3826 // NVPTX Inline Assembly Support
\r
3827 //===----------------------------------------------------------------------===//
\r
3829 /// getConstraintType - Given a constraint letter, return the type of
\r
3830 /// constraint it is for this target.
\r
3831 NVPTXTargetLowering::ConstraintType
\r
3832 NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
\r
3833 if (Constraint.size() == 1) {
\r
3834 switch (Constraint[0]) {
\r
3846 return C_RegisterClass;
\r
3849 return TargetLowering::getConstraintType(Constraint);
\r
3852 std::pair<unsigned, const TargetRegisterClass *>
\r
3853 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
\r
3854 StringRef Constraint,
\r
3856 if (Constraint.size() == 1) {
\r
3857 switch (Constraint[0]) {
\r
3859 return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
\r
3861 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
\r
3863 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
\r
3865 return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
\r
3868 return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
\r
3870 return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
\r
3872 return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
\r
3875 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
\r
3878 //===----------------------------------------------------------------------===//
\r
3879 // NVPTX DAG Combining
\r
3880 //===----------------------------------------------------------------------===//
\r
3882 bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
\r
3883 CodeGenOpt::Level OptLevel) const {
\r
3884 // Always honor command-line argument
\r
3885 if (FMAContractLevelOpt.getNumOccurrences() > 0)
\r
3886 return FMAContractLevelOpt > 0;
\r
3888 // Do not contract if we're not optimizing the code.
\r
3889 if (OptLevel == 0)
\r
3892 // Honor TargetOptions flags that explicitly say fusion is okay.
\r
3893 if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
\r
3896 return allowUnsafeFPMath(MF);
\r
3899 bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
\r
3900 // Honor TargetOptions flags that explicitly say unsafe math is okay.
\r
3901 if (MF.getTarget().Options.UnsafeFPMath)
\r
3904 // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
\r
3905 const Function *F = MF.getFunction();
\r
3906 if (F->hasFnAttribute("unsafe-fp-math")) {
\r
3907 Attribute Attr = F->getFnAttribute("unsafe-fp-math");
\r
3908 StringRef Val = Attr.getValueAsString();
\r
3909 if (Val == "true")
\r
3916 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
\r
3917 /// operands N0 and N1. This is a helper for PerformADDCombine that is
\r
3918 /// called with the default operands, and if that fails, with commuted
\r
3920 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
\r
3921 TargetLowering::DAGCombinerInfo &DCI,
\r
3922 const NVPTXSubtarget &Subtarget,
\r
3923 CodeGenOpt::Level OptLevel) {
\r
3924 SelectionDAG &DAG = DCI.DAG;
\r
3925 // Skip non-integer, non-scalar case
\r
3926 EVT VT=N0.getValueType();
\r
3927 if (VT.isVector())
\r
3930 // fold (add (mul a, b), c) -> (mad a, b, c)
\r
3932 if (N0.getOpcode() == ISD::MUL) {
\r
3933 assert (VT.isInteger());
\r
3935 // Since integer multiply-add costs the same as integer multiply
\r
3936 // but is more costly than integer add, do the fusion only when
\r
3937 // the mul is only used in the add.
\r
3938 if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
\r
3939 !N0.getNode()->hasOneUse())
\r
3943 return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
\r
3944 N0.getOperand(0), N0.getOperand(1), N1);
\r
3946 else if (N0.getOpcode() == ISD::FMUL) {
\r
3947 if (VT == MVT::f32 || VT == MVT::f64) {
\r
3948 const auto *TLI = static_cast<const NVPTXTargetLowering *>(
\r
3949 &DAG.getTargetLoweringInfo());
\r
3950 if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
\r
3953 // For floating point:
\r
3954 // Do the fusion only when the mul has less than 5 uses and all
\r
3956 // The heuristic is that if a use is not an add, then that use
\r
3957 // cannot be fused into fma, therefore mul is still needed anyway.
\r
3958 // If there are more than 4 uses, even if they are all add, fusing
\r
3959 // them will increase register pressue.
\r
3962 int nonAddCount = 0;
\r
3963 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
\r
3964 UE = N0.getNode()->use_end();
\r
3967 SDNode *User = *UI;
\r
3968 if (User->getOpcode() != ISD::FADD)
\r
3973 if (nonAddCount) {
\r
3974 int orderNo = N->getIROrder();
\r
3975 int orderNo2 = N0.getNode()->getIROrder();
\r
3976 // simple heuristics here for considering potential register
\r
3977 // pressure, the logics here is that the differnce are used
\r
3978 // to measure the distance between def and use, the longer distance
\r
3979 // more likely cause register pressure.
\r
3980 if (orderNo - orderNo2 < 500)
\r
3983 // Now, check if at least one of the FMUL's operands is live beyond the node N,
\r
3984 // which guarantees that the FMA will not increase register pressure at node N.
\r
3985 bool opIsLive = false;
\r
3986 const SDNode *left = N0.getOperand(0).getNode();
\r
3987 const SDNode *right = N0.getOperand(1).getNode();
\r
3989 if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
\r
3993 for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
\r
3994 SDNode *User = *UI;
\r
3995 int orderNo3 = User->getIROrder();
\r
3996 if (orderNo3 > orderNo) {
\r
4003 for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
\r
4004 SDNode *User = *UI;
\r
4005 int orderNo3 = User->getIROrder();
\r
4006 if (orderNo3 > orderNo) {
\r
4016 return DAG.getNode(ISD::FMA, SDLoc(N), VT,
\r
4017 N0.getOperand(0), N0.getOperand(1), N1);
\r
4024 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
\r
4026 static SDValue PerformADDCombine(SDNode *N,
\r
4027 TargetLowering::DAGCombinerInfo &DCI,
\r
4028 const NVPTXSubtarget &Subtarget,
\r
4029 CodeGenOpt::Level OptLevel) {
\r
4030 SDValue N0 = N->getOperand(0);
\r
4031 SDValue N1 = N->getOperand(1);
\r
4033 // First try with the default operand order.
\r
4034 if (SDValue Result =
\r
4035 PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel))
\r
4038 // If that didn't work, try again with the operands commuted.
\r
4039 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
\r
4042 static SDValue PerformANDCombine(SDNode *N,
\r
4043 TargetLowering::DAGCombinerInfo &DCI) {
\r
4044 // The type legalizer turns a vector load of i8 values into a zextload to i16
\r
4045 // registers, optionally ANY_EXTENDs it (if target type is integer),
\r
4046 // and ANDs off the high 8 bits. Since we turn this load into a
\r
4047 // target-specific DAG node, the DAG combiner fails to eliminate these AND
\r
4048 // nodes. Do that here.
\r
4049 SDValue Val = N->getOperand(0);
\r
4050 SDValue Mask = N->getOperand(1);
\r
4052 if (isa<ConstantSDNode>(Val)) {
\r
4053 std::swap(Val, Mask);
\r
4057 // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
\r
4058 if (Val.getOpcode() == ISD::ANY_EXTEND) {
\r
4060 Val = Val->getOperand(0);
\r
4063 if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
\r
4064 Val = Val->getOperand(0);
\r
4067 if (Val->getOpcode() == NVPTXISD::LoadV2 ||
\r
4068 Val->getOpcode() == NVPTXISD::LoadV4) {
\r
4069 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
\r
4071 // Not an AND with a constant
\r
4075 uint64_t MaskVal = MaskCnst->getZExtValue();
\r
4076 if (MaskVal != 0xff) {
\r
4077 // Not an AND that chops off top 8 bits
\r
4081 MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
\r
4083 // Not a MemSDNode?!?
\r
4087 EVT MemVT = Mem->getMemoryVT();
\r
4088 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
\r
4089 // We only handle the i8 case
\r
4093 unsigned ExtType =
\r
4094 cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
\r
4096 if (ExtType == ISD::SEXTLOAD) {
\r
4097 // If for some reason the load is a sextload, the and is needed to zero
\r
4098 // out the high 8 bits
\r
4102 bool AddTo = false;
\r
4103 if (AExt.getNode() != nullptr) {
\r
4104 // Re-insert the ext as a zext.
\r
4105 Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
\r
4106 AExt.getValueType(), Val);
\r
4110 // If we get here, the AND is unnecessary. Just replace it with the load
\r
4111 DCI.CombineTo(N, Val, AddTo);
\r
4117 static SDValue PerformREMCombine(SDNode *N,
\r
4118 TargetLowering::DAGCombinerInfo &DCI,
\r
4119 CodeGenOpt::Level OptLevel) {
\r
4120 assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
\r
4122 // Don't do anything at less than -O2.
\r
4123 if (OptLevel < CodeGenOpt::Default)
\r
4126 SelectionDAG &DAG = DCI.DAG;
\r
4128 EVT VT = N->getValueType(0);
\r
4129 bool IsSigned = N->getOpcode() == ISD::SREM;
\r
4130 unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
\r
4132 const SDValue &Num = N->getOperand(0);
\r
4133 const SDValue &Den = N->getOperand(1);
\r
4135 for (const SDNode *U : Num->uses()) {
\r
4136 if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
\r
4137 U->getOperand(1) == Den) {
\r
4138 // Num % Den -> Num - (Num / Den) * Den
\r
4139 return DAG.getNode(ISD::SUB, DL, VT, Num,
\r
4140 DAG.getNode(ISD::MUL, DL, VT,
\r
4141 DAG.getNode(DivOpc, DL, VT, Num, Den),
\r
4148 enum OperandSignedness {
\r
4154 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
\r
4155 /// that can be demoted to \p OptSize bits without loss of information. The
\r
4156 /// signedness of the operand, if determinable, is placed in \p S.
\r
4157 static bool IsMulWideOperandDemotable(SDValue Op,
\r
4159 OperandSignedness &S) {
\r
4162 if (Op.getOpcode() == ISD::SIGN_EXTEND ||
\r
4163 Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
\r
4164 EVT OrigVT = Op.getOperand(0).getValueType();
\r
4165 if (OrigVT.getSizeInBits() <= OptSize) {
\r
4169 } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
\r
4170 EVT OrigVT = Op.getOperand(0).getValueType();
\r
4171 if (OrigVT.getSizeInBits() <= OptSize) {
\r
4180 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
\r
4181 /// be demoted to \p OptSize bits without loss of information. If the operands
\r
4182 /// contain a constant, it should appear as the RHS operand. The signedness of
\r
4183 /// the operands is placed in \p IsSigned.
\r
4184 static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
\r
4187 OperandSignedness LHSSign;
\r
4189 // The LHS operand must be a demotable op
\r
4190 if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
\r
4193 // We should have been able to determine the signedness from the LHS
\r
4194 if (LHSSign == Unknown)
\r
4197 IsSigned = (LHSSign == Signed);
\r
4199 // The RHS can be a demotable op or a constant
\r
4200 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
\r
4201 const APInt &Val = CI->getAPIntValue();
\r
4202 if (LHSSign == Unsigned) {
\r
4203 return Val.isIntN(OptSize);
\r
4205 return Val.isSignedIntN(OptSize);
\r
4208 OperandSignedness RHSSign;
\r
4209 if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
\r
4212 return LHSSign == RHSSign;
\r
4216 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
\r
4217 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
\r
4218 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
\r
4220 static SDValue TryMULWIDECombine(SDNode *N,
\r
4221 TargetLowering::DAGCombinerInfo &DCI) {
\r
4222 EVT MulType = N->getValueType(0);
\r
4223 if (MulType != MVT::i32 && MulType != MVT::i64) {
\r
4228 unsigned OptSize = MulType.getSizeInBits() >> 1;
\r
4229 SDValue LHS = N->getOperand(0);
\r
4230 SDValue RHS = N->getOperand(1);
\r
4232 // Canonicalize the multiply so the constant (if any) is on the right
\r
4233 if (N->getOpcode() == ISD::MUL) {
\r
4234 if (isa<ConstantSDNode>(LHS)) {
\r
4235 std::swap(LHS, RHS);
\r
4239 // If we have a SHL, determine the actual multiply amount
\r
4240 if (N->getOpcode() == ISD::SHL) {
\r
4241 ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
\r
4246 APInt ShiftAmt = ShlRHS->getAPIntValue();
\r
4247 unsigned BitWidth = MulType.getSizeInBits();
\r
4248 if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
\r
4249 APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
\r
4250 RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
\r
4257 // Verify that our operands are demotable
\r
4258 if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
\r
4263 if (MulType == MVT::i32) {
\r
4264 DemotedVT = MVT::i16;
\r
4266 DemotedVT = MVT::i32;
\r
4269 // Truncate the operands to the correct size. Note that these are just for
\r
4270 // type consistency and will (likely) be eliminated in later phases.
\r
4271 SDValue TruncLHS =
\r
4272 DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
\r
4273 SDValue TruncRHS =
\r
4274 DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
\r
4278 Opc = NVPTXISD::MUL_WIDE_SIGNED;
\r
4280 Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
\r
4283 return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
\r
4286 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
\r
4287 static SDValue PerformMULCombine(SDNode *N,
\r
4288 TargetLowering::DAGCombinerInfo &DCI,
\r
4289 CodeGenOpt::Level OptLevel) {
\r
4290 if (OptLevel > 0) {
\r
4291 // Try mul.wide combining at OptLevel > 0
\r
4292 if (SDValue Ret = TryMULWIDECombine(N, DCI))
\r
4299 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
\r
4300 static SDValue PerformSHLCombine(SDNode *N,
\r
4301 TargetLowering::DAGCombinerInfo &DCI,
\r
4302 CodeGenOpt::Level OptLevel) {
\r
4303 if (OptLevel > 0) {
\r
4304 // Try mul.wide combining at OptLevel > 0
\r
4305 if (SDValue Ret = TryMULWIDECombine(N, DCI))
\r
4312 static SDValue PerformSETCCCombine(SDNode *N,
\r
4313 TargetLowering::DAGCombinerInfo &DCI) {
\r
4314 EVT CCType = N->getValueType(0);
\r
4315 SDValue A = N->getOperand(0);
\r
4316 SDValue B = N->getOperand(1);
\r
4318 if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16)
\r
4322 // setp.f16x2 returns two scalar predicates, which we need to
\r
4323 // convert back to v2i1. The returned result will be scalarized by
\r
4324 // the legalizer, but the comparison will remain a single vector
\r
4326 SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL,
\r
4327 DCI.DAG.getVTList(MVT::i1, MVT::i1),
\r
4328 {A, B, N->getOperand(2)});
\r
4329 return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
\r
4330 CCNode.getValue(1));
\r
4333 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
\r
4334 DAGCombinerInfo &DCI) const {
\r
4335 CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
\r
4336 switch (N->getOpcode()) {
\r
4340 return PerformADDCombine(N, DCI, STI, OptLevel);
\r
4342 return PerformMULCombine(N, DCI, OptLevel);
\r
4344 return PerformSHLCombine(N, DCI, OptLevel);
\r
4346 return PerformANDCombine(N, DCI);
\r
4349 return PerformREMCombine(N, DCI, OptLevel);
\r
4351 return PerformSETCCCombine(N, DCI);
\r
4356 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
\r
4357 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
\r
4358 SmallVectorImpl<SDValue> &Results) {
\r
4359 EVT ResVT = N->getValueType(0);
\r
4362 assert(ResVT.isVector() && "Vector load must have vector type");
\r
4364 // We only handle "native" vector sizes for now, e.g. <4 x double> is not
\r
4365 // legal. We can (and should) split that into 2 loads of <2 x double> here
\r
4366 // but I'm leaving that as a TODO for now.
\r
4367 assert(ResVT.isSimple() && "Can only handle simple types");
\r
4368 switch (ResVT.getSimpleVT().SimpleTy) {
\r
4383 case MVT::v8f16: // <4 x f16x2>
\r
4384 // This is a "native" vector type
\r
4388 LoadSDNode *LD = cast<LoadSDNode>(N);
\r
4390 unsigned Align = LD->getAlignment();
\r
4391 auto &TD = DAG.getDataLayout();
\r
4392 unsigned PrefAlign =
\r
4393 TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
\r
4394 if (Align < PrefAlign) {
\r
4395 // This load is not sufficiently aligned, so bail out and let this vector
\r
4396 // load be scalarized. Note that we may still be able to emit smaller
\r
4397 // vector loads. For example, if we are loading a <4 x float> with an
\r
4398 // alignment of 8, this check will fail but the legalizer will try again
\r
4399 // with 2 x <2 x float>, which will succeed with an alignment of 8.
\r
4403 EVT EltVT = ResVT.getVectorElementType();
\r
4404 unsigned NumElts = ResVT.getVectorNumElements();
\r
4406 // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
\r
4407 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
\r
4408 // loaded type to i16 and propagate the "real" type as the memory type.
\r
4409 bool NeedTrunc = false;
\r
4410 if (EltVT.getSizeInBits() < 16) {
\r
4415 unsigned Opcode = 0;
\r
4416 SDVTList LdResVTs;
\r
4417 bool LoadF16x2 = false;
\r
4419 switch (NumElts) {
\r
4423 Opcode = NVPTXISD::LoadV2;
\r
4424 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
\r
4427 Opcode = NVPTXISD::LoadV4;
\r
4428 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
\r
4429 LdResVTs = DAG.getVTList(ListVTs);
\r
4433 // v8f16 is a special case. PTX doesn't have ld.v8.f16
\r
4434 // instruction. Instead, we split the vector into v2f16 chunks and
\r
4435 // load them with ld.v4.b32.
\r
4436 assert(EltVT == MVT::f16 && "Unsupported v8 vector type.");
\r
4438 Opcode = NVPTXISD::LoadV4;
\r
4439 EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16,
\r
4441 LdResVTs = DAG.getVTList(ListVTs);
\r
4446 // Copy regular operands
\r
4447 SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
\r
4449 // The select routine does not have access to the LoadSDNode instance, so
\r
4450 // pass along the extension information
\r
4451 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
\r
4453 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
\r
4454 LD->getMemoryVT(),
\r
4455 LD->getMemOperand());
\r
4457 SmallVector<SDValue, 8> ScalarRes;
\r
4459 // Split v2f16 subvectors back into individual elements.
\r
4461 for (unsigned i = 0; i < NumElts; ++i) {
\r
4462 SDValue SubVector = NewLD.getValue(i);
\r
4463 SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
\r
4464 DAG.getIntPtrConstant(0, DL));
\r
4465 SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
\r
4466 DAG.getIntPtrConstant(1, DL));
\r
4467 ScalarRes.push_back(E0);
\r
4468 ScalarRes.push_back(E1);
\r
4471 for (unsigned i = 0; i < NumElts; ++i) {
\r
4472 SDValue Res = NewLD.getValue(i);
\r
4474 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
\r
4475 ScalarRes.push_back(Res);
\r
4479 SDValue LoadChain = NewLD.getValue(NumElts);
\r
4481 SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
\r
4483 Results.push_back(BuildVec);
\r
4484 Results.push_back(LoadChain);
\r
4487 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
\r
4488 SmallVectorImpl<SDValue> &Results) {
\r
4489 SDValue Chain = N->getOperand(0);
\r
4490 SDValue Intrin = N->getOperand(1);
\r
4493 // Get the intrinsic ID
\r
4494 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
\r
4495 switch (IntrinNo) {
\r
4498 case Intrinsic::nvvm_ldg_global_i:
\r
4499 case Intrinsic::nvvm_ldg_global_f:
\r
4500 case Intrinsic::nvvm_ldg_global_p:
\r
4501 case Intrinsic::nvvm_ldu_global_i:
\r
4502 case Intrinsic::nvvm_ldu_global_f:
\r
4503 case Intrinsic::nvvm_ldu_global_p: {
\r
4504 EVT ResVT = N->getValueType(0);
\r
4506 if (ResVT.isVector()) {
\r
4509 unsigned NumElts = ResVT.getVectorNumElements();
\r
4510 EVT EltVT = ResVT.getVectorElementType();
\r
4512 // Since LDU/LDG are target nodes, we cannot rely on DAG type
\r
4514 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
\r
4515 // loaded type to i16 and propagate the "real" type as the memory type.
\r
4516 bool NeedTrunc = false;
\r
4517 if (EltVT.getSizeInBits() < 16) {
\r
4522 unsigned Opcode = 0;
\r
4523 SDVTList LdResVTs;
\r
4525 switch (NumElts) {
\r
4529 switch (IntrinNo) {
\r
4532 case Intrinsic::nvvm_ldg_global_i:
\r
4533 case Intrinsic::nvvm_ldg_global_f:
\r
4534 case Intrinsic::nvvm_ldg_global_p:
\r
4535 Opcode = NVPTXISD::LDGV2;
\r
4537 case Intrinsic::nvvm_ldu_global_i:
\r
4538 case Intrinsic::nvvm_ldu_global_f:
\r
4539 case Intrinsic::nvvm_ldu_global_p:
\r
4540 Opcode = NVPTXISD::LDUV2;
\r
4543 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
\r
4546 switch (IntrinNo) {
\r
4549 case Intrinsic::nvvm_ldg_global_i:
\r
4550 case Intrinsic::nvvm_ldg_global_f:
\r
4551 case Intrinsic::nvvm_ldg_global_p:
\r
4552 Opcode = NVPTXISD::LDGV4;
\r
4554 case Intrinsic::nvvm_ldu_global_i:
\r
4555 case Intrinsic::nvvm_ldu_global_f:
\r
4556 case Intrinsic::nvvm_ldu_global_p:
\r
4557 Opcode = NVPTXISD::LDUV4;
\r
4560 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
\r
4561 LdResVTs = DAG.getVTList(ListVTs);
\r
4566 SmallVector<SDValue, 8> OtherOps;
\r
4568 // Copy regular operands
\r
4570 OtherOps.push_back(Chain); // Chain
\r
4571 // Skip operand 1 (intrinsic ID)
\r
4573 OtherOps.append(N->op_begin() + 2, N->op_end());
\r
4575 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
\r
4577 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
\r
4578 MemSD->getMemoryVT(),
\r
4579 MemSD->getMemOperand());
\r
4581 SmallVector<SDValue, 4> ScalarRes;
\r
4583 for (unsigned i = 0; i < NumElts; ++i) {
\r
4584 SDValue Res = NewLD.getValue(i);
\r
4587 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
\r
4588 ScalarRes.push_back(Res);
\r
4591 SDValue LoadChain = NewLD.getValue(NumElts);
\r
4593 SDValue BuildVec =
\r
4594 DAG.getBuildVector(ResVT, DL, ScalarRes);
\r
4596 Results.push_back(BuildVec);
\r
4597 Results.push_back(LoadChain);
\r
4600 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
\r
4601 "Custom handling of non-i8 ldu/ldg?");
\r
4603 // Just copy all operands as-is
\r
4604 SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
\r
4606 // Force output to i16
\r
4607 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
\r
4609 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
\r
4611 // We make sure the memory type is i8, which will be used during isel
\r
4612 // to select the proper instruction.
\r
4614 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
\r
4615 MVT::i8, MemSD->getMemOperand());
\r
4617 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
\r
4618 NewLD.getValue(0)));
\r
4619 Results.push_back(NewLD.getValue(1));
\r
4625 void NVPTXTargetLowering::ReplaceNodeResults(
\r
4626 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
\r
4627 switch (N->getOpcode()) {
\r
4629 report_fatal_error("Unhandled custom legalization");
\r
4631 ReplaceLoadVector(N, DAG, Results);
\r
4633 case ISD::INTRINSIC_W_CHAIN:
\r
4634 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
\r
4639 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
\r
4640 void NVPTXSection::anchor() {}
\r
4642 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
\r
4643 delete static_cast<NVPTXSection *>(TextSection);
\r
4644 delete static_cast<NVPTXSection *>(DataSection);
\r
4645 delete static_cast<NVPTXSection *>(BSSSection);
\r
4646 delete static_cast<NVPTXSection *>(ReadOnlySection);
\r
4648 delete static_cast<NVPTXSection *>(StaticCtorSection);
\r
4649 delete static_cast<NVPTXSection *>(StaticDtorSection);
\r
4650 delete static_cast<NVPTXSection *>(LSDASection);
\r
4651 delete static_cast<NVPTXSection *>(EHFrameSection);
\r
4652 delete static_cast<NVPTXSection *>(DwarfAbbrevSection);
\r
4653 delete static_cast<NVPTXSection *>(DwarfInfoSection);
\r
4654 delete static_cast<NVPTXSection *>(DwarfLineSection);
\r
4655 delete static_cast<NVPTXSection *>(DwarfFrameSection);
\r
4656 delete static_cast<NVPTXSection *>(DwarfPubTypesSection);
\r
4657 delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection);
\r
4658 delete static_cast<NVPTXSection *>(DwarfStrSection);
\r
4659 delete static_cast<NVPTXSection *>(DwarfLocSection);
\r
4660 delete static_cast<NVPTXSection *>(DwarfARangesSection);
\r
4661 delete static_cast<NVPTXSection *>(DwarfRangesSection);
\r
4662 delete static_cast<NVPTXSection *>(DwarfMacinfoSection);
\r
4665 MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
\r
4666 const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
\r
4667 return getDataSection();
\r