From d6663f565ca0b4400e4f07b78405e3ff6eb246f3 Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Thu, 17 Jul 2014 11:59:04 +0000 Subject: [PATCH] [NVPTX] Add more surface/texture intrinsics, including CUDA unified texture fetch This also uses TSFlags to mark machine instructions that are surface/texture accesses, as well as the vector width for surface operations. This is used to simplify some of the switch statements that need to detect surface/texture instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213256 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsNVVM.td | 2021 ++++++++++++- lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 11 + lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 232 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 1844 +++++++++-- lib/Target/NVPTX/NVPTXISelLowering.cpp | 1386 ++++++++- lib/Target/NVPTX/NVPTXISelLowering.h | 310 +- lib/Target/NVPTX/NVPTXInstrFormats.td | 24 +- lib/Target/NVPTX/NVPTXIntrinsics.td | 4025 ++++++++++++++++++++++--- lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp | 278 +- lib/Target/NVPTX/NVPTXSubtarget.h | 7 +- test/CodeGen/NVPTX/surf-read-cuda.ll | 53 + test/CodeGen/NVPTX/surf-write-cuda.ll | 42 + test/CodeGen/NVPTX/tex-read-cuda.ll | 46 + test/CodeGen/NVPTX/tex-read.ll | 4 +- 14 files changed, 8927 insertions(+), 1356 deletions(-) create mode 100644 test/CodeGen/NVPTX/surf-read-cuda.ll create mode 100644 test/CodeGen/NVPTX/surf-write-cuda.ll create mode 100644 test/CodeGen/NVPTX/tex-read-cuda.ll diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index 6baf01875d2..cd512841a1a 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -1041,10 +1041,11 @@ def int_nvvm_read_ptx_sreg_envreg31 // Texture Fetch -def int_nvvm_tex_1d_v4f32_i32 +// texmode_independent +def int_nvvm_tex_1d_v4f32_s32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.1d.v4f32.i32">; + "llvm.nvvm.tex.1d.v4f32.s32">; def int_nvvm_tex_1d_v4f32_f32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [], @@ -1058,28 +1059,45 @@ def int_nvvm_tex_1d_grad_v4f32_f32 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], "llvm.nvvm.tex.1d.grad.v4f32.f32">; -def int_nvvm_tex_1d_v4i32_i32 +def int_nvvm_tex_1d_v4s32_s32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.1d.v4i32.i32">; -def int_nvvm_tex_1d_v4i32_f32 + "llvm.nvvm.tex.1d.v4s32.s32">; +def int_nvvm_tex_1d_v4s32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [], - "llvm.nvvm.tex.1d.v4i32.f32">; -def int_nvvm_tex_1d_level_v4i32_f32 + "llvm.nvvm.tex.1d.v4s32.f32">; +def int_nvvm_tex_1d_level_v4s32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.1d.level.v4i32.f32.level">; -def int_nvvm_tex_1d_grad_v4i32_f32 + "llvm.nvvm.tex.1d.level.v4s32.f32">; +def int_nvvm_tex_1d_grad_v4s32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.1d.grad.v4i32.f32">; + "llvm.nvvm.tex.1d.grad.v4s32.f32">; +def int_nvvm_tex_1d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.1d.v4u32.s32">; +def int_nvvm_tex_1d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.v4u32.f32">; +def int_nvvm_tex_1d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.level.v4u32.f32">; +def int_nvvm_tex_1d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.1d.grad.v4u32.f32">; -def int_nvvm_tex_1d_array_v4f32_i32 +def int_nvvm_tex_1d_array_v4f32_s32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.1d.array.v4f32.i32">; + "llvm.nvvm.tex.1d.array.v4f32.s32">; def int_nvvm_tex_1d_array_v4f32_f32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], @@ -1094,29 +1112,47 @@ def int_nvvm_tex_1d_array_grad_v4f32_f32 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], "llvm.nvvm.tex.1d.array.grad.v4f32.f32">; -def int_nvvm_tex_1d_array_v4i32_i32 +def int_nvvm_tex_1d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.1d.array.v4s32.s32">; +def int_nvvm_tex_1d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.v4s32.f32">; +def int_nvvm_tex_1d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.level.v4s32.f32">; +def int_nvvm_tex_1d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.grad.v4s32.f32">; +def int_nvvm_tex_1d_array_v4u32_s32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.1d.array.v4i32.i32">; -def int_nvvm_tex_1d_array_v4i32_f32 + "llvm.nvvm.tex.1d.array.v4u32.s32">; +def int_nvvm_tex_1d_array_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], - "llvm.nvvm.tex.1d.array.v4i32.f32">; -def int_nvvm_tex_1d_array_level_v4i32_f32 + "llvm.nvvm.tex.1d.array.v4u32.f32">; +def int_nvvm_tex_1d_array_level_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.1d.array.level.v4i32.f32">; -def int_nvvm_tex_1d_array_grad_v4i32_f32 + "llvm.nvvm.tex.1d.array.level.v4u32.f32">; +def int_nvvm_tex_1d_array_grad_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.1d.array.grad.v4i32.f32">; + "llvm.nvvm.tex.1d.array.grad.v4u32.f32">; -def int_nvvm_tex_2d_v4f32_i32 +def int_nvvm_tex_2d_v4f32_s32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.2d.v4f32.i32">; + "llvm.nvvm.tex.2d.v4f32.s32">; def int_nvvm_tex_2d_v4f32_f32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], @@ -1131,30 +1167,48 @@ def int_nvvm_tex_2d_grad_v4f32_f32 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], "llvm.nvvm.tex.2d.grad.v4f32.f32">; -def int_nvvm_tex_2d_v4i32_i32 +def int_nvvm_tex_2d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.2d.v4s32.s32">; +def int_nvvm_tex_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.v4s32.f32">; +def int_nvvm_tex_2d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.level.v4s32.f32">; +def int_nvvm_tex_2d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.grad.v4s32.f32">; +def int_nvvm_tex_2d_v4u32_s32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.2d.v4i32.i32">; -def int_nvvm_tex_2d_v4i32_f32 + "llvm.nvvm.tex.2d.v4u32.s32">; +def int_nvvm_tex_2d_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.2d.v4i32.f32">; -def int_nvvm_tex_2d_level_v4i32_f32 + "llvm.nvvm.tex.2d.v4u32.f32">; +def int_nvvm_tex_2d_level_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.2d.level.v4i32.f32">; -def int_nvvm_tex_2d_grad_v4i32_f32 + "llvm.nvvm.tex.2d.level.v4u32.f32">; +def int_nvvm_tex_2d_grad_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.2d.grad.v4i32.f32">; + "llvm.nvvm.tex.2d.grad.v4u32.f32">; -def int_nvvm_tex_2d_array_v4f32_i32 +def int_nvvm_tex_2d_array_v4f32_s32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.2d.array.v4f32.i32">; + "llvm.nvvm.tex.2d.array.v4f32.s32">; def int_nvvm_tex_2d_array_v4f32_f32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, @@ -1171,32 +1225,53 @@ def int_nvvm_tex_2d_array_grad_v4f32_f32 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], "llvm.nvvm.tex.2d.array.grad.v4f32.f32">; -def int_nvvm_tex_2d_array_v4i32_i32 +def int_nvvm_tex_2d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], [], + "llvm.nvvm.tex.2d.array.v4s32.s32">; +def int_nvvm_tex_2d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.v4s32.f32">; +def int_nvvm_tex_2d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.level.v4s32.f32">; +def int_nvvm_tex_2d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.grad.v4s32.f32">; +def int_nvvm_tex_2d_array_v4u32_s32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.tex.2d.array.v4i32.i32">; -def int_nvvm_tex_2d_array_v4i32_f32 + "llvm.nvvm.tex.2d.array.v4u32.s32">; +def int_nvvm_tex_2d_array_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.2d.array.v4i32.f32">; -def int_nvvm_tex_2d_array_level_v4i32_f32 + "llvm.nvvm.tex.2d.array.v4u32.f32">; +def int_nvvm_tex_2d_array_level_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.2d.array.level.v4i32.f32">; -def int_nvvm_tex_2d_array_grad_v4i32_f32 + "llvm.nvvm.tex.2d.array.level.v4u32.f32">; +def int_nvvm_tex_2d_array_grad_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.2d.array.grad.v4i32.f32">; + "llvm.nvvm.tex.2d.array.grad.v4u32.f32">; -def int_nvvm_tex_3d_v4f32_i32 +def int_nvvm_tex_3d_v4f32_s32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [], "llvm.nvvm.tex.3d.v4f32.i32">; + [], "llvm.nvvm.tex.3d.v4f32.s32">; def int_nvvm_tex_3d_v4f32_f32 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, @@ -1213,212 +1288,1237 @@ def int_nvvm_tex_3d_grad_v4f32_f32 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], "llvm.nvvm.tex.3d.grad.v4f32.f32">; -def int_nvvm_tex_3d_v4i32_i32 +def int_nvvm_tex_3d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [], "llvm.nvvm.tex.3d.v4s32.s32">; +def int_nvvm_tex_3d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.3d.v4s32.f32">; +def int_nvvm_tex_3d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.3d.level.v4s32.f32">; +def int_nvvm_tex_3d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.3d.grad.v4s32.f32">; +def int_nvvm_tex_3d_v4u32_s32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [], "llvm.nvvm.tex.3d.v4i32.i32">; -def int_nvvm_tex_3d_v4i32_f32 + [], "llvm.nvvm.tex.3d.v4u32.s32">; +def int_nvvm_tex_3d_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.3d.v4i32.f32">; -def int_nvvm_tex_3d_level_v4i32_f32 + "llvm.nvvm.tex.3d.v4u32.f32">; +def int_nvvm_tex_3d_level_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.3d.level.v4i32.f32">; -def int_nvvm_tex_3d_grad_v4i32_f32 + "llvm.nvvm.tex.3d.level.v4u32.f32">; +def int_nvvm_tex_3d_grad_v4u32_f32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], - "llvm.nvvm.tex.3d.grad.v4i32.f32">; + "llvm.nvvm.tex.3d.grad.v4u32.f32">; -// Surface Load -def int_nvvm_suld_1d_i8_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.i8.trap">; -def int_nvvm_suld_1d_i16_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.i16.trap">; -def int_nvvm_suld_1d_i32_trap - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.i32.trap">; -def int_nvvm_suld_1d_v2i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.v2i8.trap">; -def int_nvvm_suld_1d_v2i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.v2i16.trap">; -def int_nvvm_suld_1d_v2i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.v2i32.trap">; -def int_nvvm_suld_1d_v4i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], +def int_nvvm_tex_cube_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.v4f32.f32">; +def int_nvvm_tex_cube_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.level.v4f32.f32">; +def int_nvvm_tex_cube_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.v4s32.f32">; +def int_nvvm_tex_cube_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.level.v4s32.f32">; +def int_nvvm_tex_cube_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.v4u32.f32">; +def int_nvvm_tex_cube_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.level.v4u32.f32">; + +def int_nvvm_tex_cube_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.array.v4f32.f32">; +def int_nvvm_tex_cube_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.array.level.v4f32.f32">; +def int_nvvm_tex_cube_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.array.v4s32.f32">; +def int_nvvm_tex_cube_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.array.level.v4s32.f32">; +def int_nvvm_tex_cube_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.array.v4u32.f32">; +def int_nvvm_tex_cube_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.cube.array.level.v4u32.f32">; + +def int_nvvm_tld4_r_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.r.2d.v4f32.f32">; +def int_nvvm_tld4_g_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.g.2d.v4f32.f32">; +def int_nvvm_tld4_b_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.b.2d.v4f32.f32">; +def int_nvvm_tld4_a_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.a.2d.v4f32.f32">; +def int_nvvm_tld4_r_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.r.2d.v4s32.f32">; +def int_nvvm_tld4_g_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.g.2d.v4s32.f32">; +def int_nvvm_tld4_b_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.b.2d.v4s32.f32">; +def int_nvvm_tld4_a_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.a.2d.v4s32.f32">; +def int_nvvm_tld4_r_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.r.2d.v4u32.f32">; +def int_nvvm_tld4_g_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.g.2d.v4u32.f32">; +def int_nvvm_tld4_b_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.b.2d.v4u32.f32">; +def int_nvvm_tld4_a_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.a.2d.v4u32.f32">; + + +// texmode_unified +def int_nvvm_tex_unified_1d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.v4i8.trap">; -def int_nvvm_suld_1d_v4i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + "llvm.nvvm.tex.unified.1d.v4f32.s32">; +def int_nvvm_tex_unified_1d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.v4f32.f32">; +def int_nvvm_tex_unified_1d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.level.v4f32.f32">; +def int_nvvm_tex_unified_1d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.grad.v4f32.f32">; +def int_nvvm_tex_unified_1d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.v4i16.trap">; -def int_nvvm_suld_1d_v4i32_trap + "llvm.nvvm.tex.unified.1d.v4s32.s32">; +def int_nvvm_tex_unified_1d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.v4s32.f32">; +def int_nvvm_tex_unified_1d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.level.v4s32.f32">; +def int_nvvm_tex_unified_1d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.grad.v4s32.f32">; +def int_nvvm_tex_unified_1d_v4u32_s32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.v4i32.trap">; + "llvm.nvvm.tex.unified.1d.v4u32.s32">; +def int_nvvm_tex_unified_1d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.v4u32.f32">; +def int_nvvm_tex_unified_1d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.level.v4u32.f32">; +def int_nvvm_tex_unified_1d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.grad.v4u32.f32">; -def int_nvvm_suld_1d_array_i8_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.i8.trap">; -def int_nvvm_suld_1d_array_i16_trap - : Intrinsic<[llvm_i16_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.i16.trap">; -def int_nvvm_suld_1d_array_i32_trap - : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.i32.trap">; -def int_nvvm_suld_1d_array_v2i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], +def int_nvvm_tex_unified_1d_array_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.v2i8.trap">; -def int_nvvm_suld_1d_array_v2i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + "llvm.nvvm.tex.unified.1d.array.v4f32.s32">; +def int_nvvm_tex_unified_1d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.v4f32.f32">; +def int_nvvm_tex_unified_1d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.level.v4f32.f32">; +def int_nvvm_tex_unified_1d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.grad.v4f32.f32">; +def int_nvvm_tex_unified_1d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.v2i16.trap">; -def int_nvvm_suld_1d_array_v2i32_trap - : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + "llvm.nvvm.tex.unified.1d.array.v4s32.s32">; +def int_nvvm_tex_unified_1d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.v4s32.f32">; +def int_nvvm_tex_unified_1d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.level.v4s32.f32">; +def int_nvvm_tex_unified_1d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.grad.v4s32.f32">; +def int_nvvm_tex_unified_1d_array_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.v2i32.trap">; -def int_nvvm_suld_1d_array_v4i8_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + "llvm.nvvm.tex.unified.1d.array.v4u32.s32">; +def int_nvvm_tex_unified_1d_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.v4u32.f32">; +def int_nvvm_tex_unified_1d_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.level.v4u32.f32">; +def int_nvvm_tex_unified_1d_array_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.1d.array.grad.v4u32.f32">; + +def int_nvvm_tex_unified_2d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.v4i8.trap">; -def int_nvvm_suld_1d_array_v4i16_trap - : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + "llvm.nvvm.tex.unified.2d.v4f32.s32">; +def int_nvvm_tex_unified_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.v4f32.f32">; +def int_nvvm_tex_unified_2d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.level.v4f32.f32">; +def int_nvvm_tex_unified_2d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.grad.v4f32.f32">; +def int_nvvm_tex_unified_2d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.v4i16.trap">; -def int_nvvm_suld_1d_array_v4i32_trap + "llvm.nvvm.tex.unified.2d.v4s32.s32">; +def int_nvvm_tex_unified_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.v4s32.f32">; +def int_nvvm_tex_unified_2d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.level.v4s32.f32">; +def int_nvvm_tex_unified_2d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.grad.v4s32.f32">; +def int_nvvm_tex_unified_2d_v4u32_s32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.1d.array.v4i32.trap">; + "llvm.nvvm.tex.unified.2d.v4u32.s32">; +def int_nvvm_tex_unified_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.v4u32.f32">; +def int_nvvm_tex_unified_2d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.level.v4u32.f32">; +def int_nvvm_tex_unified_2d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.grad.v4u32.f32">; + +def int_nvvm_tex_unified_2d_array_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], [], + "llvm.nvvm.tex.unified.2d.array.v4f32.s32">; +def int_nvvm_tex_unified_2d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.v4f32.f32">; +def int_nvvm_tex_unified_2d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.level.v4f32.f32">; +def int_nvvm_tex_unified_2d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.grad.v4f32.f32">; +def int_nvvm_tex_unified_2d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], [], + "llvm.nvvm.tex.unified.2d.array.v4s32.s32">; +def int_nvvm_tex_unified_2d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.v4s32.f32">; +def int_nvvm_tex_unified_2d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.level.v4s32.f32">; +def int_nvvm_tex_unified_2d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.grad.v4s32.f32">; +def int_nvvm_tex_unified_2d_array_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], [], + "llvm.nvvm.tex.unified.2d.array.v4u32.s32">; +def int_nvvm_tex_unified_2d_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.v4u32.f32">; +def int_nvvm_tex_unified_2d_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.level.v4u32.f32">; +def int_nvvm_tex_unified_2d_array_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.2d.array.grad.v4u32.f32">; + +def int_nvvm_tex_unified_3d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [], "llvm.nvvm.tex.unified.3d.v4f32.s32">; +def int_nvvm_tex_unified_3d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.v4f32.f32">; +def int_nvvm_tex_unified_3d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.level.v4f32.f32">; +def int_nvvm_tex_unified_3d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.grad.v4f32.f32">; +def int_nvvm_tex_unified_3d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [], "llvm.nvvm.tex.unified.3d.v4s32.s32">; +def int_nvvm_tex_unified_3d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.v4s32.f32">; +def int_nvvm_tex_unified_3d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.level.v4s32.f32">; +def int_nvvm_tex_unified_3d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.grad.v4s32.f32">; +def int_nvvm_tex_unified_3d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [], "llvm.nvvm.tex.unified.3d.v4u32.s32">; +def int_nvvm_tex_unified_3d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.v4u32.f32">; +def int_nvvm_tex_unified_3d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.level.v4u32.f32">; +def int_nvvm_tex_unified_3d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.3d.grad.v4u32.f32">; + +def int_nvvm_tex_unified_cube_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.v4f32.f32">; +def int_nvvm_tex_unified_cube_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.level.v4f32.f32">; +def int_nvvm_tex_unified_cube_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.v4s32.f32">; +def int_nvvm_tex_unified_cube_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.level.v4s32.f32">; +def int_nvvm_tex_unified_cube_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.v4u32.f32">; +def int_nvvm_tex_unified_cube_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.level.v4u32.f32">; + +def int_nvvm_tex_unified_cube_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.array.v4f32.f32">; +def int_nvvm_tex_unified_cube_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.array.level.v4f32.f32">; +def int_nvvm_tex_unified_cube_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.array.v4s32.f32">; +def int_nvvm_tex_unified_cube_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.array.level.v4s32.f32">; +def int_nvvm_tex_unified_cube_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.array.v4u32.f32">; +def int_nvvm_tex_unified_cube_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.unified.cube.array.level.v4u32.f32">; + +def int_nvvm_tld4_unified_r_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.r.2d.v4f32.f32">; +def int_nvvm_tld4_unified_g_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.g.2d.v4f32.f32">; +def int_nvvm_tld4_unified_b_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.b.2d.v4f32.f32">; +def int_nvvm_tld4_unified_a_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.a.2d.v4f32.f32">; +def int_nvvm_tld4_unified_r_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.r.2d.v4s32.f32">; +def int_nvvm_tld4_unified_g_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.g.2d.v4s32.f32">; +def int_nvvm_tld4_unified_b_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.b.2d.v4s32.f32">; +def int_nvvm_tld4_unified_a_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.a.2d.v4s32.f32">; +def int_nvvm_tld4_unified_r_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.r.2d.v4u32.f32">; +def int_nvvm_tld4_unified_g_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.g.2d.v4u32.f32">; +def int_nvvm_tld4_unified_b_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.b.2d.v4u32.f32">; +def int_nvvm_tld4_unified_a_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tld4.unified.a.2d.v4u32.f32">; + + +//=== Surface Load +// .clamp variants +def int_nvvm_suld_1d_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i8.clamp">; +def int_nvvm_suld_1d_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i16.clamp">; +def int_nvvm_suld_1d_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i32.clamp">; +def int_nvvm_suld_1d_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i64.clamp">; +def int_nvvm_suld_1d_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i8.clamp">; +def int_nvvm_suld_1d_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i16.clamp">; +def int_nvvm_suld_1d_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i32.clamp">; +def int_nvvm_suld_1d_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i64.clamp">; +def int_nvvm_suld_1d_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i8.clamp">; +def int_nvvm_suld_1d_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i16.clamp">; +def int_nvvm_suld_1d_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i32.clamp">; + +def int_nvvm_suld_1d_array_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i8.clamp">; +def int_nvvm_suld_1d_array_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i16.clamp">; +def int_nvvm_suld_1d_array_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i32.clamp">; +def int_nvvm_suld_1d_array_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i64.clamp">; +def int_nvvm_suld_1d_array_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i8.clamp">; +def int_nvvm_suld_1d_array_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i16.clamp">; +def int_nvvm_suld_1d_array_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i32.clamp">; +def int_nvvm_suld_1d_array_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i64.clamp">; +def int_nvvm_suld_1d_array_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i8.clamp">; +def int_nvvm_suld_1d_array_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i16.clamp">; +def int_nvvm_suld_1d_array_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i32.clamp">; + +def int_nvvm_suld_2d_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i8.clamp">; +def int_nvvm_suld_2d_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i16.clamp">; +def int_nvvm_suld_2d_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i32.clamp">; +def int_nvvm_suld_2d_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i64.clamp">; +def int_nvvm_suld_2d_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i8.clamp">; +def int_nvvm_suld_2d_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i16.clamp">; +def int_nvvm_suld_2d_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i32.clamp">; +def int_nvvm_suld_2d_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i64.clamp">; +def int_nvvm_suld_2d_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i8.clamp">; +def int_nvvm_suld_2d_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i16.clamp">; +def int_nvvm_suld_2d_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i32.clamp">; + +def int_nvvm_suld_2d_array_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i8.clamp">; +def int_nvvm_suld_2d_array_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i16.clamp">; +def int_nvvm_suld_2d_array_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i32.clamp">; +def int_nvvm_suld_2d_array_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i64.clamp">; +def int_nvvm_suld_2d_array_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i8.clamp">; +def int_nvvm_suld_2d_array_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i16.clamp">; +def int_nvvm_suld_2d_array_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i32.clamp">; +def int_nvvm_suld_2d_array_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i64.clamp">; +def int_nvvm_suld_2d_array_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i8.clamp">; +def int_nvvm_suld_2d_array_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i16.clamp">; +def int_nvvm_suld_2d_array_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i32.clamp">; + +def int_nvvm_suld_3d_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i8.clamp">; +def int_nvvm_suld_3d_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i16.clamp">; +def int_nvvm_suld_3d_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i32.clamp">; +def int_nvvm_suld_3d_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i64.clamp">; +def int_nvvm_suld_3d_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i8.clamp">; +def int_nvvm_suld_3d_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i16.clamp">; +def int_nvvm_suld_3d_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i32.clamp">; +def int_nvvm_suld_3d_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i64.clamp">; +def int_nvvm_suld_3d_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i8.clamp">; +def int_nvvm_suld_3d_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i16.clamp">; +def int_nvvm_suld_3d_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i32.clamp">; + +// .trap variants +def int_nvvm_suld_1d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i8.trap">; +def int_nvvm_suld_1d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i16.trap">; +def int_nvvm_suld_1d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i32.trap">; +def int_nvvm_suld_1d_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i64.trap">; +def int_nvvm_suld_1d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i8.trap">; +def int_nvvm_suld_1d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i16.trap">; +def int_nvvm_suld_1d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i32.trap">; +def int_nvvm_suld_1d_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i64.trap">; +def int_nvvm_suld_1d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i8.trap">; +def int_nvvm_suld_1d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i16.trap">; +def int_nvvm_suld_1d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i32.trap">; + +def int_nvvm_suld_1d_array_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i8.trap">; +def int_nvvm_suld_1d_array_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i16.trap">; +def int_nvvm_suld_1d_array_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i32.trap">; +def int_nvvm_suld_1d_array_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i64.trap">; +def int_nvvm_suld_1d_array_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i8.trap">; +def int_nvvm_suld_1d_array_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i16.trap">; +def int_nvvm_suld_1d_array_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i32.trap">; +def int_nvvm_suld_1d_array_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i64.trap">; +def int_nvvm_suld_1d_array_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i8.trap">; +def int_nvvm_suld_1d_array_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i16.trap">; +def int_nvvm_suld_1d_array_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i32.trap">; + +def int_nvvm_suld_2d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i8.trap">; +def int_nvvm_suld_2d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i16.trap">; +def int_nvvm_suld_2d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i32.trap">; +def int_nvvm_suld_2d_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i64.trap">; +def int_nvvm_suld_2d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i8.trap">; +def int_nvvm_suld_2d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i16.trap">; +def int_nvvm_suld_2d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i32.trap">; +def int_nvvm_suld_2d_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i64.trap">; +def int_nvvm_suld_2d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i8.trap">; +def int_nvvm_suld_2d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i16.trap">; +def int_nvvm_suld_2d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i32.trap">; + +def int_nvvm_suld_2d_array_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i8.trap">; +def int_nvvm_suld_2d_array_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i16.trap">; +def int_nvvm_suld_2d_array_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i32.trap">; +def int_nvvm_suld_2d_array_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i64.trap">; +def int_nvvm_suld_2d_array_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i8.trap">; +def int_nvvm_suld_2d_array_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i16.trap">; +def int_nvvm_suld_2d_array_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i32.trap">; +def int_nvvm_suld_2d_array_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i64.trap">; +def int_nvvm_suld_2d_array_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i8.trap">; +def int_nvvm_suld_2d_array_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i16.trap">; +def int_nvvm_suld_2d_array_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i32.trap">; + +def int_nvvm_suld_3d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i8.trap">; +def int_nvvm_suld_3d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i16.trap">; +def int_nvvm_suld_3d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i32.trap">; +def int_nvvm_suld_3d_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i64.trap">; +def int_nvvm_suld_3d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i8.trap">; +def int_nvvm_suld_3d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i16.trap">; +def int_nvvm_suld_3d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i32.trap">; +def int_nvvm_suld_3d_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i64.trap">; +def int_nvvm_suld_3d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i8.trap">; +def int_nvvm_suld_3d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i16.trap">; +def int_nvvm_suld_3d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i32.trap">; + +// .zero variants +def int_nvvm_suld_1d_i8_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i8.zero">; +def int_nvvm_suld_1d_i16_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i16.zero">; +def int_nvvm_suld_1d_i32_zero + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i32.zero">; +def int_nvvm_suld_1d_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i64.zero">; +def int_nvvm_suld_1d_v2i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i8.zero">; +def int_nvvm_suld_1d_v2i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i16.zero">; +def int_nvvm_suld_1d_v2i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i32.zero">; +def int_nvvm_suld_1d_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i64.zero">; +def int_nvvm_suld_1d_v4i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i8.zero">; +def int_nvvm_suld_1d_v4i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i16.zero">; +def int_nvvm_suld_1d_v4i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i32.zero">; + +def int_nvvm_suld_1d_array_i8_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i8.zero">; +def int_nvvm_suld_1d_array_i16_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i16.zero">; +def int_nvvm_suld_1d_array_i32_zero + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i32.zero">; +def int_nvvm_suld_1d_array_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i64.zero">; +def int_nvvm_suld_1d_array_v2i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i8.zero">; +def int_nvvm_suld_1d_array_v2i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i16.zero">; +def int_nvvm_suld_1d_array_v2i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i32.zero">; +def int_nvvm_suld_1d_array_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i64.zero">; +def int_nvvm_suld_1d_array_v4i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i8.zero">; +def int_nvvm_suld_1d_array_v4i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i16.zero">; +def int_nvvm_suld_1d_array_v4i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i32.zero">; -def int_nvvm_suld_2d_i8_trap +def int_nvvm_suld_2d_i8_zero : Intrinsic<[llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.i8.trap">; -def int_nvvm_suld_2d_i16_trap + "llvm.nvvm.suld.2d.i8.zero">; +def int_nvvm_suld_2d_i16_zero : Intrinsic<[llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.i16.trap">; -def int_nvvm_suld_2d_i32_trap + "llvm.nvvm.suld.2d.i16.zero">; +def int_nvvm_suld_2d_i32_zero : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.i32.trap">; -def int_nvvm_suld_2d_v2i8_trap + "llvm.nvvm.suld.2d.i32.zero">; +def int_nvvm_suld_2d_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i64.zero">; +def int_nvvm_suld_2d_v2i8_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.v2i8.trap">; -def int_nvvm_suld_2d_v2i16_trap + "llvm.nvvm.suld.2d.v2i8.zero">; +def int_nvvm_suld_2d_v2i16_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.v2i16.trap">; -def int_nvvm_suld_2d_v2i32_trap + "llvm.nvvm.suld.2d.v2i16.zero">; +def int_nvvm_suld_2d_v2i32_zero : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.v2i32.trap">; -def int_nvvm_suld_2d_v4i8_trap + "llvm.nvvm.suld.2d.v2i32.zero">; +def int_nvvm_suld_2d_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i64.zero">; +def int_nvvm_suld_2d_v4i8_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.v4i8.trap">; -def int_nvvm_suld_2d_v4i16_trap + "llvm.nvvm.suld.2d.v4i8.zero">; +def int_nvvm_suld_2d_v4i16_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.v4i16.trap">; -def int_nvvm_suld_2d_v4i32_trap + "llvm.nvvm.suld.2d.v4i16.zero">; +def int_nvvm_suld_2d_v4i32_zero : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.v4i32.trap">; + "llvm.nvvm.suld.2d.v4i32.zero">; -def int_nvvm_suld_2d_array_i8_trap +def int_nvvm_suld_2d_array_i8_zero : Intrinsic<[llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.i8.trap">; -def int_nvvm_suld_2d_array_i16_trap + "llvm.nvvm.suld.2d.array.i8.zero">; +def int_nvvm_suld_2d_array_i16_zero : Intrinsic<[llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.i16.trap">; -def int_nvvm_suld_2d_array_i32_trap + "llvm.nvvm.suld.2d.array.i16.zero">; +def int_nvvm_suld_2d_array_i32_zero : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.i32.trap">; -def int_nvvm_suld_2d_array_v2i8_trap + "llvm.nvvm.suld.2d.array.i32.zero">; +def int_nvvm_suld_2d_array_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i64.zero">; +def int_nvvm_suld_2d_array_v2i8_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.v2i8.trap">; -def int_nvvm_suld_2d_array_v2i16_trap + "llvm.nvvm.suld.2d.array.v2i8.zero">; +def int_nvvm_suld_2d_array_v2i16_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.v2i16.trap">; -def int_nvvm_suld_2d_array_v2i32_trap + "llvm.nvvm.suld.2d.array.v2i16.zero">; +def int_nvvm_suld_2d_array_v2i32_zero : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.v2i32.trap">; -def int_nvvm_suld_2d_array_v4i8_trap + "llvm.nvvm.suld.2d.array.v2i32.zero">; +def int_nvvm_suld_2d_array_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i64.zero">; +def int_nvvm_suld_2d_array_v4i8_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.v4i8.trap">; -def int_nvvm_suld_2d_array_v4i16_trap + "llvm.nvvm.suld.2d.array.v4i8.zero">; +def int_nvvm_suld_2d_array_v4i16_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.v4i16.trap">; -def int_nvvm_suld_2d_array_v4i32_trap + "llvm.nvvm.suld.2d.array.v4i16.zero">; +def int_nvvm_suld_2d_array_v4i32_zero : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.2d.array.v4i32.trap">; + "llvm.nvvm.suld.2d.array.v4i32.zero">; -def int_nvvm_suld_3d_i8_trap +def int_nvvm_suld_3d_i8_zero : Intrinsic<[llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.i8.trap">; -def int_nvvm_suld_3d_i16_trap + "llvm.nvvm.suld.3d.i8.zero">; +def int_nvvm_suld_3d_i16_zero : Intrinsic<[llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.i16.trap">; -def int_nvvm_suld_3d_i32_trap + "llvm.nvvm.suld.3d.i16.zero">; +def int_nvvm_suld_3d_i32_zero : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.i32.trap">; -def int_nvvm_suld_3d_v2i8_trap + "llvm.nvvm.suld.3d.i32.zero">; +def int_nvvm_suld_3d_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i64.zero">; +def int_nvvm_suld_3d_v2i8_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.v2i8.trap">; -def int_nvvm_suld_3d_v2i16_trap + "llvm.nvvm.suld.3d.v2i8.zero">; +def int_nvvm_suld_3d_v2i16_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.v2i16.trap">; -def int_nvvm_suld_3d_v2i32_trap + "llvm.nvvm.suld.3d.v2i16.zero">; +def int_nvvm_suld_3d_v2i32_zero : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.v2i32.trap">; -def int_nvvm_suld_3d_v4i8_trap + "llvm.nvvm.suld.3d.v2i32.zero">; +def int_nvvm_suld_3d_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i64.zero">; +def int_nvvm_suld_3d_v4i8_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.v4i8.trap">; -def int_nvvm_suld_3d_v4i16_trap + "llvm.nvvm.suld.3d.v4i8.zero">; +def int_nvvm_suld_3d_v4i16_zero : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.v4i16.trap">; -def int_nvvm_suld_3d_v4i32_trap + "llvm.nvvm.suld.3d.v4i16.zero">; +def int_nvvm_suld_3d_v4i32_zero : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], - "llvm.nvvm.suld.3d.v4i32.trap">; + "llvm.nvvm.suld.3d.v4i32.zero">; //===- Texture Query ------------------------------------------------------===// @@ -1503,7 +2603,277 @@ def int_nvvm_istypep_texture //===- Surface Stores -----------------------------------------------------===// // Unformatted +// .clamp variant +def int_nvvm_sust_b_1d_i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_i8_clamp">; +def int_nvvm_sust_b_1d_i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_i16_clamp">; +def int_nvvm_sust_b_1d_i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_i32_clamp">; +def int_nvvm_sust_b_1d_i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_i64_clamp">; +def int_nvvm_sust_b_1d_v2i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v2i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">; +def int_nvvm_sust_b_1d_v2i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v2i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">; +def int_nvvm_sust_b_1d_v2i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.v2i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">; +def int_nvvm_sust_b_1d_v2i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.v2i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">; +def int_nvvm_sust_b_1d_v4i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v4i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">; +def int_nvvm_sust_b_1d_v4i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v4i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">; +def int_nvvm_sust_b_1d_v4i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.v4i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">; + + +def int_nvvm_sust_b_1d_array_i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">; +def int_nvvm_sust_b_1d_array_i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">; +def int_nvvm_sust_b_1d_array_i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">; +def int_nvvm_sust_b_1d_array_i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.array.i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">; +def int_nvvm_sust_b_1d_array_v2i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">; +def int_nvvm_sust_b_1d_array_v2i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">; +def int_nvvm_sust_b_1d_array_v2i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">; +def int_nvvm_sust_b_1d_array_v2i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">; +def int_nvvm_sust_b_1d_array_v4i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">; +def int_nvvm_sust_b_1d_array_v4i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">; +def int_nvvm_sust_b_1d_array_v4i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">; + + +def int_nvvm_sust_b_2d_i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_i8_clamp">; +def int_nvvm_sust_b_2d_i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_i16_clamp">; +def int_nvvm_sust_b_2d_i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_i32_clamp">; +def int_nvvm_sust_b_2d_i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_i64_clamp">; +def int_nvvm_sust_b_2d_v2i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v2i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">; +def int_nvvm_sust_b_2d_v2i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v2i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">; +def int_nvvm_sust_b_2d_v2i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.v2i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">; +def int_nvvm_sust_b_2d_v2i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.v2i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">; +def int_nvvm_sust_b_2d_v4i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v4i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">; +def int_nvvm_sust_b_2d_v4i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v4i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">; +def int_nvvm_sust_b_2d_v4i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.v4i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">; + + +def int_nvvm_sust_b_2d_array_i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">; +def int_nvvm_sust_b_2d_array_i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">; +def int_nvvm_sust_b_2d_array_i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">; +def int_nvvm_sust_b_2d_array_i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.array.i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">; +def int_nvvm_sust_b_2d_array_v2i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">; +def int_nvvm_sust_b_2d_array_v2i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">; +def int_nvvm_sust_b_2d_array_v2i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">; +def int_nvvm_sust_b_2d_array_v2i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">; +def int_nvvm_sust_b_2d_array_v4i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">; +def int_nvvm_sust_b_2d_array_v4i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">; +def int_nvvm_sust_b_2d_array_v4i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">; + + +def int_nvvm_sust_b_3d_i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_i8_clamp">; +def int_nvvm_sust_b_3d_i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_i16_clamp">; +def int_nvvm_sust_b_3d_i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_i32_clamp">; +def int_nvvm_sust_b_3d_i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.3d.i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_i64_clamp">; +def int_nvvm_sust_b_3d_v2i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v2i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">; +def int_nvvm_sust_b_3d_v2i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v2i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">; +def int_nvvm_sust_b_3d_v2i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.v2i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">; +def int_nvvm_sust_b_3d_v2i64_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.3d.v2i64.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">; +def int_nvvm_sust_b_3d_v4i8_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v4i8.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">; +def int_nvvm_sust_b_3d_v4i16_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v4i16.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">; +def int_nvvm_sust_b_3d_v4i32_clamp + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.v4i32.clamp">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">; + +// .trap variant def int_nvvm_sust_b_1d_i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.i8.trap">, @@ -1516,6 +2886,10 @@ def int_nvvm_sust_b_1d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.i32.trap">, GCCBuiltin<"__nvvm_sust_b_1d_i32_trap">; +def int_nvvm_sust_b_1d_i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.i64.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_i64_trap">; def int_nvvm_sust_b_1d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], "llvm.nvvm.sust.b.1d.v2i8.trap">, @@ -1528,6 +2902,10 @@ def int_nvvm_sust_b_1d_v2i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.v2i32.trap">, GCCBuiltin<"__nvvm_sust_b_1d_v2i32_trap">; +def int_nvvm_sust_b_1d_v2i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.v2i64.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i64_trap">; def int_nvvm_sust_b_1d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1557,6 +2935,10 @@ def int_nvvm_sust_b_1d_array_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.i32.trap">, GCCBuiltin<"__nvvm_sust_b_1d_array_i32_trap">; +def int_nvvm_sust_b_1d_array_i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.array.i64.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i64_trap">; def int_nvvm_sust_b_1d_array_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1572,6 +2954,11 @@ def int_nvvm_sust_b_1d_array_v2i32_trap llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.1d.array.v2i32.trap">, GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">; +def int_nvvm_sust_b_1d_array_v2i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i64.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">; def int_nvvm_sust_b_1d_array_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1601,6 +2988,10 @@ def int_nvvm_sust_b_2d_i32_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.i32.trap">, GCCBuiltin<"__nvvm_sust_b_2d_i32_trap">; +def int_nvvm_sust_b_2d_i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.i64.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_i64_trap">; def int_nvvm_sust_b_2d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1616,6 +3007,11 @@ def int_nvvm_sust_b_2d_v2i32_trap llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.v2i32.trap">, GCCBuiltin<"__nvvm_sust_b_2d_v2i32_trap">; +def int_nvvm_sust_b_2d_v2i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.v2i64.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i64_trap">; def int_nvvm_sust_b_2d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1648,6 +3044,11 @@ def int_nvvm_sust_b_2d_array_i32_trap llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.i32.trap">, GCCBuiltin<"__nvvm_sust_b_2d_array_i32_trap">; +def int_nvvm_sust_b_2d_array_i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.array.i64.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i64_trap">; def int_nvvm_sust_b_2d_array_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1663,6 +3064,11 @@ def int_nvvm_sust_b_2d_array_v2i32_trap llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.2d.array.v2i32.trap">, GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">; +def int_nvvm_sust_b_2d_array_v2i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i64.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">; def int_nvvm_sust_b_2d_array_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1695,6 +3101,11 @@ def int_nvvm_sust_b_3d_i32_trap llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.i32.trap">, GCCBuiltin<"__nvvm_sust_b_3d_i32_trap">; +def int_nvvm_sust_b_3d_i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.3d.i64.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_i64_trap">; def int_nvvm_sust_b_3d_v2i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1710,6 +3121,11 @@ def int_nvvm_sust_b_3d_v2i32_trap llvm_i32_ty, llvm_i32_ty], [], "llvm.nvvm.sust.b.3d.v2i32.trap">, GCCBuiltin<"__nvvm_sust_b_3d_v2i32_trap">; +def int_nvvm_sust_b_3d_v2i64_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.3d.v2i64.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i64_trap">; def int_nvvm_sust_b_3d_v4i8_trap : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], @@ -1726,6 +3142,278 @@ def int_nvvm_sust_b_3d_v4i32_trap "llvm.nvvm.sust.b.3d.v4i32.trap">, GCCBuiltin<"__nvvm_sust_b_3d_v4i32_trap">; + +// .zero variant +def int_nvvm_sust_b_1d_i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.i8.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_i8_zero">; +def int_nvvm_sust_b_1d_i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.i16.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_i16_zero">; +def int_nvvm_sust_b_1d_i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.i32.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_i32_zero">; +def int_nvvm_sust_b_1d_i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.i64.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_i64_zero">; +def int_nvvm_sust_b_1d_v2i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v2i8.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i8_zero">; +def int_nvvm_sust_b_1d_v2i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v2i16.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i16_zero">; +def int_nvvm_sust_b_1d_v2i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.v2i32.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i32_zero">; +def int_nvvm_sust_b_1d_v2i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.v2i64.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i64_zero">; +def int_nvvm_sust_b_1d_v4i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v4i8.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i8_zero">; +def int_nvvm_sust_b_1d_v4i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v4i16.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i16_zero">; +def int_nvvm_sust_b_1d_v4i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.v4i32.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i32_zero">; + + +def int_nvvm_sust_b_1d_array_i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.i8.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i8_zero">; +def int_nvvm_sust_b_1d_array_i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.i16.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i16_zero">; +def int_nvvm_sust_b_1d_array_i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.i32.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i32_zero">; +def int_nvvm_sust_b_1d_array_i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.array.i64.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i64_zero">; +def int_nvvm_sust_b_1d_array_v2i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i8.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">; +def int_nvvm_sust_b_1d_array_v2i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i16.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">; +def int_nvvm_sust_b_1d_array_v2i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i32.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">; +def int_nvvm_sust_b_1d_array_v2i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i64.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">; +def int_nvvm_sust_b_1d_array_v4i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i8.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">; +def int_nvvm_sust_b_1d_array_v4i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i16.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">; +def int_nvvm_sust_b_1d_array_v4i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i32.zero">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">; + + +def int_nvvm_sust_b_2d_i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.i8.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_i8_zero">; +def int_nvvm_sust_b_2d_i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.i16.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_i16_zero">; +def int_nvvm_sust_b_2d_i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.i32.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_i32_zero">; +def int_nvvm_sust_b_2d_i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.i64.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_i64_zero">; +def int_nvvm_sust_b_2d_v2i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v2i8.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i8_zero">; +def int_nvvm_sust_b_2d_v2i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v2i16.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i16_zero">; +def int_nvvm_sust_b_2d_v2i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.v2i32.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i32_zero">; +def int_nvvm_sust_b_2d_v2i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.v2i64.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i64_zero">; +def int_nvvm_sust_b_2d_v4i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v4i8.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i8_zero">; +def int_nvvm_sust_b_2d_v4i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v4i16.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i16_zero">; +def int_nvvm_sust_b_2d_v4i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.v4i32.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i32_zero">; + + +def int_nvvm_sust_b_2d_array_i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.i8.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i8_zero">; +def int_nvvm_sust_b_2d_array_i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.i16.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i16_zero">; +def int_nvvm_sust_b_2d_array_i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.i32.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i32_zero">; +def int_nvvm_sust_b_2d_array_i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.array.i64.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i64_zero">; +def int_nvvm_sust_b_2d_array_v2i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i8.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">; +def int_nvvm_sust_b_2d_array_v2i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i16.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">; +def int_nvvm_sust_b_2d_array_v2i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i32.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">; +def int_nvvm_sust_b_2d_array_v2i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i64.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">; +def int_nvvm_sust_b_2d_array_v4i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i8.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">; +def int_nvvm_sust_b_2d_array_v4i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i16.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">; +def int_nvvm_sust_b_2d_array_v4i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i32.zero">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">; + + +def int_nvvm_sust_b_3d_i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.i8.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_i8_zero">; +def int_nvvm_sust_b_3d_i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.i16.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_i16_zero">; +def int_nvvm_sust_b_3d_i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.i32.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_i32_zero">; +def int_nvvm_sust_b_3d_i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.3d.i64.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_i64_zero">; +def int_nvvm_sust_b_3d_v2i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v2i8.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i8_zero">; +def int_nvvm_sust_b_3d_v2i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v2i16.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i16_zero">; +def int_nvvm_sust_b_3d_v2i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.v2i32.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i32_zero">; +def int_nvvm_sust_b_3d_v2i64_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], [], + "llvm.nvvm.sust.b.3d.v2i64.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i64_zero">; +def int_nvvm_sust_b_3d_v4i8_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v4i8.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i8_zero">; +def int_nvvm_sust_b_3d_v4i16_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v4i16.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i16_zero">; +def int_nvvm_sust_b_3d_v4i32_zero + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.v4i32.zero">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i32_zero">; + + + // Formatted def int_nvvm_sust_p_1d_i8_trap @@ -1950,6 +3638,7 @@ def int_nvvm_sust_p_3d_v4i32_trap "llvm.nvvm.sust.p.3d.v4i32.trap">, GCCBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; + def int_nvvm_rotate_b32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem], "llvm.nvvm.rotate.b32">, diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index ddb122f65b5..16ec19c25f1 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -84,6 +84,17 @@ __attribute__((unused)) #endif static const char *NamedMDForAnnotations = "nvvm.annotations"; +namespace NVPTXII { +enum { + // These must be kept in sync with TSFlags in NVPTXInstrFormats.td + IsTexFlag = 0x80, + IsSuldMask = 0x300, + IsSuldShift = 8, + IsSustFlag = 0x400, + IsSurfTexQueryFlag = 0x800, + IsTexModeUnifiedFlag = 0x1000 +}; +} } #endif diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 046a0934c27..187b88c1d54 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -330,253 +330,51 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, MCOperand &MCOp) { const MachineOperand &MO = MI->getOperand(OpNo); + const MCInstrDesc &MCID = MI->getDesc(); - switch (MI->getOpcode()) { - default: return false; - case NVPTX::TEX_1D_F32_I32: - case NVPTX::TEX_1D_F32_F32: - case NVPTX::TEX_1D_F32_F32_LEVEL: - case NVPTX::TEX_1D_F32_F32_GRAD: - case NVPTX::TEX_1D_I32_I32: - case NVPTX::TEX_1D_I32_F32: - case NVPTX::TEX_1D_I32_F32_LEVEL: - case NVPTX::TEX_1D_I32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_F32_I32: - case NVPTX::TEX_1D_ARRAY_F32_F32: - case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_I32_I32: - case NVPTX::TEX_1D_ARRAY_I32_F32: - case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_2D_F32_I32: - case NVPTX::TEX_2D_F32_F32: - case NVPTX::TEX_2D_F32_F32_LEVEL: - case NVPTX::TEX_2D_F32_F32_GRAD: - case NVPTX::TEX_2D_I32_I32: - case NVPTX::TEX_2D_I32_F32: - case NVPTX::TEX_2D_I32_F32_LEVEL: - case NVPTX::TEX_2D_I32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_F32_I32: - case NVPTX::TEX_2D_ARRAY_F32_F32: - case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_I32_I32: - case NVPTX::TEX_2D_ARRAY_I32_F32: - case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_3D_F32_I32: - case NVPTX::TEX_3D_F32_F32: - case NVPTX::TEX_3D_F32_F32_LEVEL: - case NVPTX::TEX_3D_F32_F32_GRAD: - case NVPTX::TEX_3D_I32_I32: - case NVPTX::TEX_3D_I32_F32: - case NVPTX::TEX_3D_I32_F32_LEVEL: - case NVPTX::TEX_3D_I32_F32_GRAD: - { + if (MCID.TSFlags & NVPTXII::IsTexFlag) { // This is a texture fetch, so operand 4 is a texref and operand 5 is // a samplerref - if (OpNo == 4) { + if (OpNo == 4 && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } - if (OpNo == 5) { + if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; - } - case NVPTX::SULD_1D_I8_TRAP: - case NVPTX::SULD_1D_I16_TRAP: - case NVPTX::SULD_1D_I32_TRAP: - case NVPTX::SULD_1D_ARRAY_I8_TRAP: - case NVPTX::SULD_1D_ARRAY_I16_TRAP: - case NVPTX::SULD_1D_ARRAY_I32_TRAP: - case NVPTX::SULD_2D_I8_TRAP: - case NVPTX::SULD_2D_I16_TRAP: - case NVPTX::SULD_2D_I32_TRAP: - case NVPTX::SULD_2D_ARRAY_I8_TRAP: - case NVPTX::SULD_2D_ARRAY_I16_TRAP: - case NVPTX::SULD_2D_ARRAY_I32_TRAP: - case NVPTX::SULD_3D_I8_TRAP: - case NVPTX::SULD_3D_I16_TRAP: - case NVPTX::SULD_3D_I32_TRAP: { - // This is a V1 surface load, so operand 1 is a surfref - if (OpNo == 1) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } + } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { + unsigned VecSize = + 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); - return false; - } - case NVPTX::SULD_1D_V2I8_TRAP: - case NVPTX::SULD_1D_V2I16_TRAP: - case NVPTX::SULD_1D_V2I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_2D_V2I8_TRAP: - case NVPTX::SULD_2D_V2I16_TRAP: - case NVPTX::SULD_2D_V2I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_3D_V2I8_TRAP: - case NVPTX::SULD_3D_V2I16_TRAP: - case NVPTX::SULD_3D_V2I32_TRAP: { - // This is a V2 surface load, so operand 2 is a surfref - if (OpNo == 2) { + // For a surface load of vector size N, the Nth operand will be the surfref + if (OpNo == VecSize && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; - } - case NVPTX::SULD_1D_V4I8_TRAP: - case NVPTX::SULD_1D_V4I16_TRAP: - case NVPTX::SULD_1D_V4I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_2D_V4I8_TRAP: - case NVPTX::SULD_2D_V4I16_TRAP: - case NVPTX::SULD_2D_V4I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_3D_V4I8_TRAP: - case NVPTX::SULD_3D_V4I16_TRAP: - case NVPTX::SULD_3D_V4I32_TRAP: { - // This is a V4 surface load, so operand 4 is a surfref - if (OpNo == 4) { - lowerImageHandleSymbol(MO.getImm(), MCOp); - return true; - } - - return false; - } - case NVPTX::SUST_B_1D_B8_TRAP: - case NVPTX::SUST_B_1D_B16_TRAP: - case NVPTX::SUST_B_1D_B32_TRAP: - case NVPTX::SUST_B_1D_V2B8_TRAP: - case NVPTX::SUST_B_1D_V2B16_TRAP: - case NVPTX::SUST_B_1D_V2B32_TRAP: - case NVPTX::SUST_B_1D_V4B8_TRAP: - case NVPTX::SUST_B_1D_V4B16_TRAP: - case NVPTX::SUST_B_1D_V4B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_2D_B8_TRAP: - case NVPTX::SUST_B_2D_B16_TRAP: - case NVPTX::SUST_B_2D_B32_TRAP: - case NVPTX::SUST_B_2D_V2B8_TRAP: - case NVPTX::SUST_B_2D_V2B16_TRAP: - case NVPTX::SUST_B_2D_V2B32_TRAP: - case NVPTX::SUST_B_2D_V4B8_TRAP: - case NVPTX::SUST_B_2D_V4B16_TRAP: - case NVPTX::SUST_B_2D_V4B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_3D_B8_TRAP: - case NVPTX::SUST_B_3D_B16_TRAP: - case NVPTX::SUST_B_3D_B32_TRAP: - case NVPTX::SUST_B_3D_V2B8_TRAP: - case NVPTX::SUST_B_3D_V2B16_TRAP: - case NVPTX::SUST_B_3D_V2B32_TRAP: - case NVPTX::SUST_B_3D_V4B8_TRAP: - case NVPTX::SUST_B_3D_V4B16_TRAP: - case NVPTX::SUST_B_3D_V4B32_TRAP: - case NVPTX::SUST_P_1D_B8_TRAP: - case NVPTX::SUST_P_1D_B16_TRAP: - case NVPTX::SUST_P_1D_B32_TRAP: - case NVPTX::SUST_P_1D_V2B8_TRAP: - case NVPTX::SUST_P_1D_V2B16_TRAP: - case NVPTX::SUST_P_1D_V2B32_TRAP: - case NVPTX::SUST_P_1D_V4B8_TRAP: - case NVPTX::SUST_P_1D_V4B16_TRAP: - case NVPTX::SUST_P_1D_V4B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_2D_B8_TRAP: - case NVPTX::SUST_P_2D_B16_TRAP: - case NVPTX::SUST_P_2D_B32_TRAP: - case NVPTX::SUST_P_2D_V2B8_TRAP: - case NVPTX::SUST_P_2D_V2B16_TRAP: - case NVPTX::SUST_P_2D_V2B32_TRAP: - case NVPTX::SUST_P_2D_V4B8_TRAP: - case NVPTX::SUST_P_2D_V4B16_TRAP: - case NVPTX::SUST_P_2D_V4B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_3D_B8_TRAP: - case NVPTX::SUST_P_3D_B16_TRAP: - case NVPTX::SUST_P_3D_B32_TRAP: - case NVPTX::SUST_P_3D_V2B8_TRAP: - case NVPTX::SUST_P_3D_V2B16_TRAP: - case NVPTX::SUST_P_3D_V2B32_TRAP: - case NVPTX::SUST_P_3D_V4B8_TRAP: - case NVPTX::SUST_P_3D_V4B16_TRAP: - case NVPTX::SUST_P_3D_V4B32_TRAP: { + } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { // This is a surface store, so operand 0 is a surfref - if (OpNo == 0) { + if (OpNo == 0 && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; - } - case NVPTX::TXQ_CHANNEL_ORDER: - case NVPTX::TXQ_CHANNEL_DATA_TYPE: - case NVPTX::TXQ_WIDTH: - case NVPTX::TXQ_HEIGHT: - case NVPTX::TXQ_DEPTH: - case NVPTX::TXQ_ARRAY_SIZE: - case NVPTX::TXQ_NUM_SAMPLES: - case NVPTX::TXQ_NUM_MIPMAP_LEVELS: - case NVPTX::SUQ_CHANNEL_ORDER: - case NVPTX::SUQ_CHANNEL_DATA_TYPE: - case NVPTX::SUQ_WIDTH: - case NVPTX::SUQ_HEIGHT: - case NVPTX::SUQ_DEPTH: - case NVPTX::SUQ_ARRAY_SIZE: { + } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { // This is a query, so operand 1 is a surfref/texref - if (OpNo == 1) { + if (OpNo == 1 && MO.isImm()) { lowerImageHandleSymbol(MO.getImm(), MCOp); return true; } return false; } - } + + return false; } void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0dfbf10cd9d..35d553227e8 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -170,93 +170,341 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case ISD::INTRINSIC_W_CHAIN: ResNode = SelectIntrinsicChain(N); break; - case NVPTXISD::Tex1DFloatI32: + case NVPTXISD::Tex1DFloatS32: case NVPTXISD::Tex1DFloatFloat: case NVPTXISD::Tex1DFloatFloatLevel: case NVPTXISD::Tex1DFloatFloatGrad: - case NVPTXISD::Tex1DI32I32: - case NVPTXISD::Tex1DI32Float: - case NVPTXISD::Tex1DI32FloatLevel: - case NVPTXISD::Tex1DI32FloatGrad: - case NVPTXISD::Tex1DArrayFloatI32: + case NVPTXISD::Tex1DS32S32: + case NVPTXISD::Tex1DS32Float: + case NVPTXISD::Tex1DS32FloatLevel: + case NVPTXISD::Tex1DS32FloatGrad: + case NVPTXISD::Tex1DU32S32: + case NVPTXISD::Tex1DU32Float: + case NVPTXISD::Tex1DU32FloatLevel: + case NVPTXISD::Tex1DU32FloatGrad: + case NVPTXISD::Tex1DArrayFloatS32: case NVPTXISD::Tex1DArrayFloatFloat: case NVPTXISD::Tex1DArrayFloatFloatLevel: case NVPTXISD::Tex1DArrayFloatFloatGrad: - case NVPTXISD::Tex1DArrayI32I32: - case NVPTXISD::Tex1DArrayI32Float: - case NVPTXISD::Tex1DArrayI32FloatLevel: - case NVPTXISD::Tex1DArrayI32FloatGrad: - case NVPTXISD::Tex2DFloatI32: + case NVPTXISD::Tex1DArrayS32S32: + case NVPTXISD::Tex1DArrayS32Float: + case NVPTXISD::Tex1DArrayS32FloatLevel: + case NVPTXISD::Tex1DArrayS32FloatGrad: + case NVPTXISD::Tex1DArrayU32S32: + case NVPTXISD::Tex1DArrayU32Float: + case NVPTXISD::Tex1DArrayU32FloatLevel: + case NVPTXISD::Tex1DArrayU32FloatGrad: + case NVPTXISD::Tex2DFloatS32: case NVPTXISD::Tex2DFloatFloat: case NVPTXISD::Tex2DFloatFloatLevel: case NVPTXISD::Tex2DFloatFloatGrad: - case NVPTXISD::Tex2DI32I32: - case NVPTXISD::Tex2DI32Float: - case NVPTXISD::Tex2DI32FloatLevel: - case NVPTXISD::Tex2DI32FloatGrad: - case NVPTXISD::Tex2DArrayFloatI32: + case NVPTXISD::Tex2DS32S32: + case NVPTXISD::Tex2DS32Float: + case NVPTXISD::Tex2DS32FloatLevel: + case NVPTXISD::Tex2DS32FloatGrad: + case NVPTXISD::Tex2DU32S32: + case NVPTXISD::Tex2DU32Float: + case NVPTXISD::Tex2DU32FloatLevel: + case NVPTXISD::Tex2DU32FloatGrad: + case NVPTXISD::Tex2DArrayFloatS32: case NVPTXISD::Tex2DArrayFloatFloat: case NVPTXISD::Tex2DArrayFloatFloatLevel: case NVPTXISD::Tex2DArrayFloatFloatGrad: - case NVPTXISD::Tex2DArrayI32I32: - case NVPTXISD::Tex2DArrayI32Float: - case NVPTXISD::Tex2DArrayI32FloatLevel: - case NVPTXISD::Tex2DArrayI32FloatGrad: - case NVPTXISD::Tex3DFloatI32: + case NVPTXISD::Tex2DArrayS32S32: + case NVPTXISD::Tex2DArrayS32Float: + case NVPTXISD::Tex2DArrayS32FloatLevel: + case NVPTXISD::Tex2DArrayS32FloatGrad: + case NVPTXISD::Tex2DArrayU32S32: + case NVPTXISD::Tex2DArrayU32Float: + case NVPTXISD::Tex2DArrayU32FloatLevel: + case NVPTXISD::Tex2DArrayU32FloatGrad: + case NVPTXISD::Tex3DFloatS32: case NVPTXISD::Tex3DFloatFloat: case NVPTXISD::Tex3DFloatFloatLevel: case NVPTXISD::Tex3DFloatFloatGrad: - case NVPTXISD::Tex3DI32I32: - case NVPTXISD::Tex3DI32Float: - case NVPTXISD::Tex3DI32FloatLevel: - case NVPTXISD::Tex3DI32FloatGrad: + case NVPTXISD::Tex3DS32S32: + case NVPTXISD::Tex3DS32Float: + case NVPTXISD::Tex3DS32FloatLevel: + case NVPTXISD::Tex3DS32FloatGrad: + case NVPTXISD::Tex3DU32S32: + case NVPTXISD::Tex3DU32Float: + case NVPTXISD::Tex3DU32FloatLevel: + case NVPTXISD::Tex3DU32FloatGrad: + case NVPTXISD::TexCubeFloatFloat: + case NVPTXISD::TexCubeFloatFloatLevel: + case NVPTXISD::TexCubeS32Float: + case NVPTXISD::TexCubeS32FloatLevel: + case NVPTXISD::TexCubeU32Float: + case NVPTXISD::TexCubeU32FloatLevel: + case NVPTXISD::TexCubeArrayFloatFloat: + case NVPTXISD::TexCubeArrayFloatFloatLevel: + case NVPTXISD::TexCubeArrayS32Float: + case NVPTXISD::TexCubeArrayS32FloatLevel: + case NVPTXISD::TexCubeArrayU32Float: + case NVPTXISD::TexCubeArrayU32FloatLevel: + case NVPTXISD::Tld4R2DFloatFloat: + case NVPTXISD::Tld4G2DFloatFloat: + case NVPTXISD::Tld4B2DFloatFloat: + case NVPTXISD::Tld4A2DFloatFloat: + case NVPTXISD::Tld4R2DS64Float: + case NVPTXISD::Tld4G2DS64Float: + case NVPTXISD::Tld4B2DS64Float: + case NVPTXISD::Tld4A2DS64Float: + case NVPTXISD::Tld4R2DU64Float: + case NVPTXISD::Tld4G2DU64Float: + case NVPTXISD::Tld4B2DU64Float: + case NVPTXISD::Tld4A2DU64Float: + case NVPTXISD::TexUnified1DFloatS32: + case NVPTXISD::TexUnified1DFloatFloat: + case NVPTXISD::TexUnified1DFloatFloatLevel: + case NVPTXISD::TexUnified1DFloatFloatGrad: + case NVPTXISD::TexUnified1DS32S32: + case NVPTXISD::TexUnified1DS32Float: + case NVPTXISD::TexUnified1DS32FloatLevel: + case NVPTXISD::TexUnified1DS32FloatGrad: + case NVPTXISD::TexUnified1DU32S32: + case NVPTXISD::TexUnified1DU32Float: + case NVPTXISD::TexUnified1DU32FloatLevel: + case NVPTXISD::TexUnified1DU32FloatGrad: + case NVPTXISD::TexUnified1DArrayFloatS32: + case NVPTXISD::TexUnified1DArrayFloatFloat: + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + case NVPTXISD::TexUnified1DArrayS32S32: + case NVPTXISD::TexUnified1DArrayS32Float: + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + case NVPTXISD::TexUnified1DArrayU32S32: + case NVPTXISD::TexUnified1DArrayU32Float: + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + case NVPTXISD::TexUnified2DFloatS32: + case NVPTXISD::TexUnified2DFloatFloat: + case NVPTXISD::TexUnified2DFloatFloatLevel: + case NVPTXISD::TexUnified2DFloatFloatGrad: + case NVPTXISD::TexUnified2DS32S32: + case NVPTXISD::TexUnified2DS32Float: + case NVPTXISD::TexUnified2DS32FloatLevel: + case NVPTXISD::TexUnified2DS32FloatGrad: + case NVPTXISD::TexUnified2DU32S32: + case NVPTXISD::TexUnified2DU32Float: + case NVPTXISD::TexUnified2DU32FloatLevel: + case NVPTXISD::TexUnified2DU32FloatGrad: + case NVPTXISD::TexUnified2DArrayFloatS32: + case NVPTXISD::TexUnified2DArrayFloatFloat: + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + case NVPTXISD::TexUnified2DArrayS32S32: + case NVPTXISD::TexUnified2DArrayS32Float: + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + case NVPTXISD::TexUnified2DArrayU32S32: + case NVPTXISD::TexUnified2DArrayU32Float: + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + case NVPTXISD::TexUnified3DFloatS32: + case NVPTXISD::TexUnified3DFloatFloat: + case NVPTXISD::TexUnified3DFloatFloatLevel: + case NVPTXISD::TexUnified3DFloatFloatGrad: + case NVPTXISD::TexUnified3DS32S32: + case NVPTXISD::TexUnified3DS32Float: + case NVPTXISD::TexUnified3DS32FloatLevel: + case NVPTXISD::TexUnified3DS32FloatGrad: + case NVPTXISD::TexUnified3DU32S32: + case NVPTXISD::TexUnified3DU32Float: + case NVPTXISD::TexUnified3DU32FloatLevel: + case NVPTXISD::TexUnified3DU32FloatGrad: + case NVPTXISD::TexUnifiedCubeFloatFloat: + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + case NVPTXISD::TexUnifiedCubeS32Float: + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + case NVPTXISD::TexUnifiedCubeU32Float: + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + case NVPTXISD::TexUnifiedCubeArrayS32Float: + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + case NVPTXISD::TexUnifiedCubeArrayU32Float: + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + case NVPTXISD::Tld4UnifiedR2DS64Float: + case NVPTXISD::Tld4UnifiedG2DS64Float: + case NVPTXISD::Tld4UnifiedB2DS64Float: + case NVPTXISD::Tld4UnifiedA2DS64Float: + case NVPTXISD::Tld4UnifiedR2DU64Float: + case NVPTXISD::Tld4UnifiedG2DU64Float: + case NVPTXISD::Tld4UnifiedB2DU64Float: + case NVPTXISD::Tld4UnifiedA2DU64Float: ResNode = SelectTextureIntrinsic(N); break; + case NVPTXISD::Suld1DI8Clamp: + case NVPTXISD::Suld1DI16Clamp: + case NVPTXISD::Suld1DI32Clamp: + case NVPTXISD::Suld1DI64Clamp: + case NVPTXISD::Suld1DV2I8Clamp: + case NVPTXISD::Suld1DV2I16Clamp: + case NVPTXISD::Suld1DV2I32Clamp: + case NVPTXISD::Suld1DV2I64Clamp: + case NVPTXISD::Suld1DV4I8Clamp: + case NVPTXISD::Suld1DV4I16Clamp: + case NVPTXISD::Suld1DV4I32Clamp: + case NVPTXISD::Suld1DArrayI8Clamp: + case NVPTXISD::Suld1DArrayI16Clamp: + case NVPTXISD::Suld1DArrayI32Clamp: + case NVPTXISD::Suld1DArrayI64Clamp: + case NVPTXISD::Suld1DArrayV2I8Clamp: + case NVPTXISD::Suld1DArrayV2I16Clamp: + case NVPTXISD::Suld1DArrayV2I32Clamp: + case NVPTXISD::Suld1DArrayV2I64Clamp: + case NVPTXISD::Suld1DArrayV4I8Clamp: + case NVPTXISD::Suld1DArrayV4I16Clamp: + case NVPTXISD::Suld1DArrayV4I32Clamp: + case NVPTXISD::Suld2DI8Clamp: + case NVPTXISD::Suld2DI16Clamp: + case NVPTXISD::Suld2DI32Clamp: + case NVPTXISD::Suld2DI64Clamp: + case NVPTXISD::Suld2DV2I8Clamp: + case NVPTXISD::Suld2DV2I16Clamp: + case NVPTXISD::Suld2DV2I32Clamp: + case NVPTXISD::Suld2DV2I64Clamp: + case NVPTXISD::Suld2DV4I8Clamp: + case NVPTXISD::Suld2DV4I16Clamp: + case NVPTXISD::Suld2DV4I32Clamp: + case NVPTXISD::Suld2DArrayI8Clamp: + case NVPTXISD::Suld2DArrayI16Clamp: + case NVPTXISD::Suld2DArrayI32Clamp: + case NVPTXISD::Suld2DArrayI64Clamp: + case NVPTXISD::Suld2DArrayV2I8Clamp: + case NVPTXISD::Suld2DArrayV2I16Clamp: + case NVPTXISD::Suld2DArrayV2I32Clamp: + case NVPTXISD::Suld2DArrayV2I64Clamp: + case NVPTXISD::Suld2DArrayV4I8Clamp: + case NVPTXISD::Suld2DArrayV4I16Clamp: + case NVPTXISD::Suld2DArrayV4I32Clamp: + case NVPTXISD::Suld3DI8Clamp: + case NVPTXISD::Suld3DI16Clamp: + case NVPTXISD::Suld3DI32Clamp: + case NVPTXISD::Suld3DI64Clamp: + case NVPTXISD::Suld3DV2I8Clamp: + case NVPTXISD::Suld3DV2I16Clamp: + case NVPTXISD::Suld3DV2I32Clamp: + case NVPTXISD::Suld3DV2I64Clamp: + case NVPTXISD::Suld3DV4I8Clamp: + case NVPTXISD::Suld3DV4I16Clamp: + case NVPTXISD::Suld3DV4I32Clamp: case NVPTXISD::Suld1DI8Trap: case NVPTXISD::Suld1DI16Trap: case NVPTXISD::Suld1DI32Trap: + case NVPTXISD::Suld1DI64Trap: case NVPTXISD::Suld1DV2I8Trap: case NVPTXISD::Suld1DV2I16Trap: case NVPTXISD::Suld1DV2I32Trap: + case NVPTXISD::Suld1DV2I64Trap: case NVPTXISD::Suld1DV4I8Trap: case NVPTXISD::Suld1DV4I16Trap: case NVPTXISD::Suld1DV4I32Trap: case NVPTXISD::Suld1DArrayI8Trap: case NVPTXISD::Suld1DArrayI16Trap: case NVPTXISD::Suld1DArrayI32Trap: + case NVPTXISD::Suld1DArrayI64Trap: case NVPTXISD::Suld1DArrayV2I8Trap: case NVPTXISD::Suld1DArrayV2I16Trap: case NVPTXISD::Suld1DArrayV2I32Trap: + case NVPTXISD::Suld1DArrayV2I64Trap: case NVPTXISD::Suld1DArrayV4I8Trap: case NVPTXISD::Suld1DArrayV4I16Trap: case NVPTXISD::Suld1DArrayV4I32Trap: case NVPTXISD::Suld2DI8Trap: case NVPTXISD::Suld2DI16Trap: case NVPTXISD::Suld2DI32Trap: + case NVPTXISD::Suld2DI64Trap: case NVPTXISD::Suld2DV2I8Trap: case NVPTXISD::Suld2DV2I16Trap: case NVPTXISD::Suld2DV2I32Trap: + case NVPTXISD::Suld2DV2I64Trap: case NVPTXISD::Suld2DV4I8Trap: case NVPTXISD::Suld2DV4I16Trap: case NVPTXISD::Suld2DV4I32Trap: case NVPTXISD::Suld2DArrayI8Trap: case NVPTXISD::Suld2DArrayI16Trap: case NVPTXISD::Suld2DArrayI32Trap: + case NVPTXISD::Suld2DArrayI64Trap: case NVPTXISD::Suld2DArrayV2I8Trap: case NVPTXISD::Suld2DArrayV2I16Trap: case NVPTXISD::Suld2DArrayV2I32Trap: + case NVPTXISD::Suld2DArrayV2I64Trap: case NVPTXISD::Suld2DArrayV4I8Trap: case NVPTXISD::Suld2DArrayV4I16Trap: case NVPTXISD::Suld2DArrayV4I32Trap: case NVPTXISD::Suld3DI8Trap: case NVPTXISD::Suld3DI16Trap: case NVPTXISD::Suld3DI32Trap: + case NVPTXISD::Suld3DI64Trap: case NVPTXISD::Suld3DV2I8Trap: case NVPTXISD::Suld3DV2I16Trap: case NVPTXISD::Suld3DV2I32Trap: + case NVPTXISD::Suld3DV2I64Trap: case NVPTXISD::Suld3DV4I8Trap: case NVPTXISD::Suld3DV4I16Trap: case NVPTXISD::Suld3DV4I32Trap: + case NVPTXISD::Suld1DI8Zero: + case NVPTXISD::Suld1DI16Zero: + case NVPTXISD::Suld1DI32Zero: + case NVPTXISD::Suld1DI64Zero: + case NVPTXISD::Suld1DV2I8Zero: + case NVPTXISD::Suld1DV2I16Zero: + case NVPTXISD::Suld1DV2I32Zero: + case NVPTXISD::Suld1DV2I64Zero: + case NVPTXISD::Suld1DV4I8Zero: + case NVPTXISD::Suld1DV4I16Zero: + case NVPTXISD::Suld1DV4I32Zero: + case NVPTXISD::Suld1DArrayI8Zero: + case NVPTXISD::Suld1DArrayI16Zero: + case NVPTXISD::Suld1DArrayI32Zero: + case NVPTXISD::Suld1DArrayI64Zero: + case NVPTXISD::Suld1DArrayV2I8Zero: + case NVPTXISD::Suld1DArrayV2I16Zero: + case NVPTXISD::Suld1DArrayV2I32Zero: + case NVPTXISD::Suld1DArrayV2I64Zero: + case NVPTXISD::Suld1DArrayV4I8Zero: + case NVPTXISD::Suld1DArrayV4I16Zero: + case NVPTXISD::Suld1DArrayV4I32Zero: + case NVPTXISD::Suld2DI8Zero: + case NVPTXISD::Suld2DI16Zero: + case NVPTXISD::Suld2DI32Zero: + case NVPTXISD::Suld2DI64Zero: + case NVPTXISD::Suld2DV2I8Zero: + case NVPTXISD::Suld2DV2I16Zero: + case NVPTXISD::Suld2DV2I32Zero: + case NVPTXISD::Suld2DV2I64Zero: + case NVPTXISD::Suld2DV4I8Zero: + case NVPTXISD::Suld2DV4I16Zero: + case NVPTXISD::Suld2DV4I32Zero: + case NVPTXISD::Suld2DArrayI8Zero: + case NVPTXISD::Suld2DArrayI16Zero: + case NVPTXISD::Suld2DArrayI32Zero: + case NVPTXISD::Suld2DArrayI64Zero: + case NVPTXISD::Suld2DArrayV2I8Zero: + case NVPTXISD::Suld2DArrayV2I16Zero: + case NVPTXISD::Suld2DArrayV2I32Zero: + case NVPTXISD::Suld2DArrayV2I64Zero: + case NVPTXISD::Suld2DArrayV4I8Zero: + case NVPTXISD::Suld2DArrayV4I16Zero: + case NVPTXISD::Suld2DArrayV4I32Zero: + case NVPTXISD::Suld3DI8Zero: + case NVPTXISD::Suld3DI16Zero: + case NVPTXISD::Suld3DI32Zero: + case NVPTXISD::Suld3DI64Zero: + case NVPTXISD::Suld3DV2I8Zero: + case NVPTXISD::Suld3DV2I16Zero: + case NVPTXISD::Suld3DV2I32Zero: + case NVPTXISD::Suld3DV2I64Zero: + case NVPTXISD::Suld3DV4I8Zero: + case NVPTXISD::Suld3DV4I16Zero: + case NVPTXISD::Suld3DV4I32Zero: ResNode = SelectSurfaceIntrinsic(N); break; case ISD::AND: @@ -2781,16 +3029,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { SDValue Chain = N->getOperand(0); - SDValue TexRef = N->getOperand(1); - SDValue SampRef = N->getOperand(2); SDNode *Ret = nullptr; unsigned Opc = 0; SmallVector Ops; switch (N->getOpcode()) { default: return nullptr; - case NVPTXISD::Tex1DFloatI32: - Opc = NVPTX::TEX_1D_F32_I32; + case NVPTXISD::Tex1DFloatS32: + Opc = NVPTX::TEX_1D_F32_S32; break; case NVPTXISD::Tex1DFloatFloat: Opc = NVPTX::TEX_1D_F32_F32; @@ -2801,20 +3047,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex1DFloatFloatGrad: Opc = NVPTX::TEX_1D_F32_F32_GRAD; break; - case NVPTXISD::Tex1DI32I32: - Opc = NVPTX::TEX_1D_I32_I32; + case NVPTXISD::Tex1DS32S32: + Opc = NVPTX::TEX_1D_S32_S32; break; - case NVPTXISD::Tex1DI32Float: - Opc = NVPTX::TEX_1D_I32_F32; + case NVPTXISD::Tex1DS32Float: + Opc = NVPTX::TEX_1D_S32_F32; break; - case NVPTXISD::Tex1DI32FloatLevel: - Opc = NVPTX::TEX_1D_I32_F32_LEVEL; + case NVPTXISD::Tex1DS32FloatLevel: + Opc = NVPTX::TEX_1D_S32_F32_LEVEL; break; - case NVPTXISD::Tex1DI32FloatGrad: - Opc = NVPTX::TEX_1D_I32_F32_GRAD; + case NVPTXISD::Tex1DS32FloatGrad: + Opc = NVPTX::TEX_1D_S32_F32_GRAD; break; - case NVPTXISD::Tex1DArrayFloatI32: - Opc = NVPTX::TEX_1D_ARRAY_F32_I32; + case NVPTXISD::Tex1DU32S32: + Opc = NVPTX::TEX_1D_U32_S32; + break; + case NVPTXISD::Tex1DU32Float: + Opc = NVPTX::TEX_1D_U32_F32; + break; + case NVPTXISD::Tex1DU32FloatLevel: + Opc = NVPTX::TEX_1D_U32_F32_LEVEL; + break; + case NVPTXISD::Tex1DU32FloatGrad: + Opc = NVPTX::TEX_1D_U32_F32_GRAD; + break; + case NVPTXISD::Tex1DArrayFloatS32: + Opc = NVPTX::TEX_1D_ARRAY_F32_S32; break; case NVPTXISD::Tex1DArrayFloatFloat: Opc = NVPTX::TEX_1D_ARRAY_F32_F32; @@ -2825,20 +3083,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex1DArrayFloatFloatGrad: Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; break; - case NVPTXISD::Tex1DArrayI32I32: - Opc = NVPTX::TEX_1D_ARRAY_I32_I32; + case NVPTXISD::Tex1DArrayS32S32: + Opc = NVPTX::TEX_1D_ARRAY_S32_S32; + break; + case NVPTXISD::Tex1DArrayS32Float: + Opc = NVPTX::TEX_1D_ARRAY_S32_F32; + break; + case NVPTXISD::Tex1DArrayS32FloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::Tex1DArrayS32FloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; + break; + case NVPTXISD::Tex1DArrayU32S32: + Opc = NVPTX::TEX_1D_ARRAY_U32_S32; break; - case NVPTXISD::Tex1DArrayI32Float: - Opc = NVPTX::TEX_1D_ARRAY_I32_F32; + case NVPTXISD::Tex1DArrayU32Float: + Opc = NVPTX::TEX_1D_ARRAY_U32_F32; break; - case NVPTXISD::Tex1DArrayI32FloatLevel: - Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL; + case NVPTXISD::Tex1DArrayU32FloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; break; - case NVPTXISD::Tex1DArrayI32FloatGrad: - Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD; + case NVPTXISD::Tex1DArrayU32FloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; break; - case NVPTXISD::Tex2DFloatI32: - Opc = NVPTX::TEX_2D_F32_I32; + case NVPTXISD::Tex2DFloatS32: + Opc = NVPTX::TEX_2D_F32_S32; break; case NVPTXISD::Tex2DFloatFloat: Opc = NVPTX::TEX_2D_F32_F32; @@ -2849,20 +3119,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex2DFloatFloatGrad: Opc = NVPTX::TEX_2D_F32_F32_GRAD; break; - case NVPTXISD::Tex2DI32I32: - Opc = NVPTX::TEX_2D_I32_I32; + case NVPTXISD::Tex2DS32S32: + Opc = NVPTX::TEX_2D_S32_S32; break; - case NVPTXISD::Tex2DI32Float: - Opc = NVPTX::TEX_2D_I32_F32; + case NVPTXISD::Tex2DS32Float: + Opc = NVPTX::TEX_2D_S32_F32; break; - case NVPTXISD::Tex2DI32FloatLevel: - Opc = NVPTX::TEX_2D_I32_F32_LEVEL; + case NVPTXISD::Tex2DS32FloatLevel: + Opc = NVPTX::TEX_2D_S32_F32_LEVEL; break; - case NVPTXISD::Tex2DI32FloatGrad: - Opc = NVPTX::TEX_2D_I32_F32_GRAD; + case NVPTXISD::Tex2DS32FloatGrad: + Opc = NVPTX::TEX_2D_S32_F32_GRAD; break; - case NVPTXISD::Tex2DArrayFloatI32: - Opc = NVPTX::TEX_2D_ARRAY_F32_I32; + case NVPTXISD::Tex2DU32S32: + Opc = NVPTX::TEX_2D_U32_S32; + break; + case NVPTXISD::Tex2DU32Float: + Opc = NVPTX::TEX_2D_U32_F32; + break; + case NVPTXISD::Tex2DU32FloatLevel: + Opc = NVPTX::TEX_2D_U32_F32_LEVEL; + break; + case NVPTXISD::Tex2DU32FloatGrad: + Opc = NVPTX::TEX_2D_U32_F32_GRAD; + break; + case NVPTXISD::Tex2DArrayFloatS32: + Opc = NVPTX::TEX_2D_ARRAY_F32_S32; break; case NVPTXISD::Tex2DArrayFloatFloat: Opc = NVPTX::TEX_2D_ARRAY_F32_F32; @@ -2873,20 +3155,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex2DArrayFloatFloatGrad: Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; break; - case NVPTXISD::Tex2DArrayI32I32: - Opc = NVPTX::TEX_2D_ARRAY_I32_I32; + case NVPTXISD::Tex2DArrayS32S32: + Opc = NVPTX::TEX_2D_ARRAY_S32_S32; + break; + case NVPTXISD::Tex2DArrayS32Float: + Opc = NVPTX::TEX_2D_ARRAY_S32_F32; + break; + case NVPTXISD::Tex2DArrayS32FloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::Tex2DArrayS32FloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; + break; + case NVPTXISD::Tex2DArrayU32S32: + Opc = NVPTX::TEX_2D_ARRAY_U32_S32; break; - case NVPTXISD::Tex2DArrayI32Float: - Opc = NVPTX::TEX_2D_ARRAY_I32_F32; + case NVPTXISD::Tex2DArrayU32Float: + Opc = NVPTX::TEX_2D_ARRAY_U32_F32; break; - case NVPTXISD::Tex2DArrayI32FloatLevel: - Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL; + case NVPTXISD::Tex2DArrayU32FloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; break; - case NVPTXISD::Tex2DArrayI32FloatGrad: - Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD; + case NVPTXISD::Tex2DArrayU32FloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; break; - case NVPTXISD::Tex3DFloatI32: - Opc = NVPTX::TEX_3D_F32_I32; + case NVPTXISD::Tex3DFloatS32: + Opc = NVPTX::TEX_3D_F32_S32; break; case NVPTXISD::Tex3DFloatFloat: Opc = NVPTX::TEX_3D_F32_F32; @@ -2897,25 +3191,358 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { case NVPTXISD::Tex3DFloatFloatGrad: Opc = NVPTX::TEX_3D_F32_F32_GRAD; break; - case NVPTXISD::Tex3DI32I32: - Opc = NVPTX::TEX_3D_I32_I32; + case NVPTXISD::Tex3DS32S32: + Opc = NVPTX::TEX_3D_S32_S32; + break; + case NVPTXISD::Tex3DS32Float: + Opc = NVPTX::TEX_3D_S32_F32; + break; + case NVPTXISD::Tex3DS32FloatLevel: + Opc = NVPTX::TEX_3D_S32_F32_LEVEL; + break; + case NVPTXISD::Tex3DS32FloatGrad: + Opc = NVPTX::TEX_3D_S32_F32_GRAD; + break; + case NVPTXISD::Tex3DU32S32: + Opc = NVPTX::TEX_3D_U32_S32; + break; + case NVPTXISD::Tex3DU32Float: + Opc = NVPTX::TEX_3D_U32_F32; + break; + case NVPTXISD::Tex3DU32FloatLevel: + Opc = NVPTX::TEX_3D_U32_F32_LEVEL; + break; + case NVPTXISD::Tex3DU32FloatGrad: + Opc = NVPTX::TEX_3D_U32_F32_GRAD; + break; + case NVPTXISD::TexCubeFloatFloat: + Opc = NVPTX::TEX_CUBE_F32_F32; + break; + case NVPTXISD::TexCubeFloatFloatLevel: + Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; + break; + case NVPTXISD::TexCubeS32Float: + Opc = NVPTX::TEX_CUBE_S32_F32; + break; + case NVPTXISD::TexCubeS32FloatLevel: + Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; + break; + case NVPTXISD::TexCubeU32Float: + Opc = NVPTX::TEX_CUBE_U32_F32; + break; + case NVPTXISD::TexCubeU32FloatLevel: + Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; + break; + case NVPTXISD::TexCubeArrayFloatFloat: + Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; + break; + case NVPTXISD::TexCubeArrayFloatFloatLevel: + Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexCubeArrayS32Float: + Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; + break; + case NVPTXISD::TexCubeArrayS32FloatLevel: + Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::TexCubeArrayU32Float: + Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; + break; + case NVPTXISD::TexCubeArrayU32FloatLevel: + Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::Tld4R2DFloatFloat: + Opc = NVPTX::TLD4_R_2D_F32_F32; + break; + case NVPTXISD::Tld4G2DFloatFloat: + Opc = NVPTX::TLD4_G_2D_F32_F32; + break; + case NVPTXISD::Tld4B2DFloatFloat: + Opc = NVPTX::TLD4_B_2D_F32_F32; + break; + case NVPTXISD::Tld4A2DFloatFloat: + Opc = NVPTX::TLD4_A_2D_F32_F32; + break; + case NVPTXISD::Tld4R2DS64Float: + Opc = NVPTX::TLD4_R_2D_S32_F32; + break; + case NVPTXISD::Tld4G2DS64Float: + Opc = NVPTX::TLD4_G_2D_S32_F32; + break; + case NVPTXISD::Tld4B2DS64Float: + Opc = NVPTX::TLD4_B_2D_S32_F32; + break; + case NVPTXISD::Tld4A2DS64Float: + Opc = NVPTX::TLD4_A_2D_S32_F32; + break; + case NVPTXISD::Tld4R2DU64Float: + Opc = NVPTX::TLD4_R_2D_U32_F32; + break; + case NVPTXISD::Tld4G2DU64Float: + Opc = NVPTX::TLD4_G_2D_U32_F32; + break; + case NVPTXISD::Tld4B2DU64Float: + Opc = NVPTX::TLD4_B_2D_U32_F32; + break; + case NVPTXISD::Tld4A2DU64Float: + Opc = NVPTX::TLD4_A_2D_U32_F32; + break; + case NVPTXISD::TexUnified1DFloatS32: + Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; + break; + case NVPTXISD::TexUnified1DFloatFloat: + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; + break; + case NVPTXISD::TexUnified1DFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DS32S32: + Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; + break; + case NVPTXISD::TexUnified1DS32Float: + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; + break; + case NVPTXISD::TexUnified1DS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DU32S32: + Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; + break; + case NVPTXISD::TexUnified1DU32Float: + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; + break; + case NVPTXISD::TexUnified1DU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DArrayFloatS32: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; + break; + case NVPTXISD::TexUnified1DArrayFloatFloat: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; + break; + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DArrayS32S32: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; + break; + case NVPTXISD::TexUnified1DArrayS32Float: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; + break; + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified1DArrayU32S32: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; + break; + case NVPTXISD::TexUnified1DArrayU32Float: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; + break; + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DFloatS32: + Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; + break; + case NVPTXISD::TexUnified2DFloatFloat: + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; + break; + case NVPTXISD::TexUnified2DFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DS32S32: + Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; + break; + case NVPTXISD::TexUnified2DS32Float: + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; + break; + case NVPTXISD::TexUnified2DS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DU32S32: + Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; + break; + case NVPTXISD::TexUnified2DU32Float: + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; + break; + case NVPTXISD::TexUnified2DU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DArrayFloatS32: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; + break; + case NVPTXISD::TexUnified2DArrayFloatFloat: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; + break; + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DArrayS32S32: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; + break; + case NVPTXISD::TexUnified2DArrayS32Float: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; + break; + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified2DArrayU32S32: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; + break; + case NVPTXISD::TexUnified2DArrayU32Float: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; + break; + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; + break; + case NVPTXISD::TexUnified3DFloatS32: + Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; + break; + case NVPTXISD::TexUnified3DFloatFloat: + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; + break; + case NVPTXISD::TexUnified3DFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnified3DFloatFloatGrad: + Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; + break; + case NVPTXISD::TexUnified3DS32S32: + Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; + break; + case NVPTXISD::TexUnified3DS32Float: + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; + break; + case NVPTXISD::TexUnified3DS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnified3DS32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; + break; + case NVPTXISD::TexUnified3DU32S32: + Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; + break; + case NVPTXISD::TexUnified3DU32Float: + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; + break; + case NVPTXISD::TexUnified3DU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnified3DU32FloatGrad: + Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; + break; + case NVPTXISD::TexUnifiedCubeFloatFloat: + Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; + break; + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeS32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; + break; + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeU32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; + break; + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; + break; + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeArrayS32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; + break; + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; + break; + case NVPTXISD::TexUnifiedCubeArrayU32Float: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; + break; + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; + break; + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; + break; + case NVPTXISD::Tld4UnifiedR2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedG2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedB2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedA2DS64Float: + Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; + break; + case NVPTXISD::Tld4UnifiedR2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; break; - case NVPTXISD::Tex3DI32Float: - Opc = NVPTX::TEX_3D_I32_F32; + case NVPTXISD::Tld4UnifiedG2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; break; - case NVPTXISD::Tex3DI32FloatLevel: - Opc = NVPTX::TEX_3D_I32_F32_LEVEL; + case NVPTXISD::Tld4UnifiedB2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; break; - case NVPTXISD::Tex3DI32FloatGrad: - Opc = NVPTX::TEX_3D_I32_F32_GRAD; + case NVPTXISD::Tld4UnifiedA2DU64Float: + Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; break; } - Ops.push_back(TexRef); - Ops.push_back(SampRef); - - // Copy over indices - for (unsigned i = 3; i < N->getNumOperands(); ++i) { + // Copy over operands + for (unsigned i = 1; i < N->getNumOperands(); ++i) { Ops.push_back(N->getOperand(i)); } @@ -2932,334 +3559,1199 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { SmallVector Ops; switch (N->getOpcode()) { default: return nullptr; - case NVPTXISD::Suld1DI8Trap: - Opc = NVPTX::SULD_1D_I8_TRAP; + case NVPTXISD::Suld1DI8Clamp: + Opc = NVPTX::SULD_1D_I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DI16Trap: - Opc = NVPTX::SULD_1D_I16_TRAP; + case NVPTXISD::Suld1DI16Clamp: + Opc = NVPTX::SULD_1D_I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DI32Trap: - Opc = NVPTX::SULD_1D_I32_TRAP; + case NVPTXISD::Suld1DI32Clamp: + Opc = NVPTX::SULD_1D_I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DV2I8Trap: - Opc = NVPTX::SULD_1D_V2I8_TRAP; + case NVPTXISD::Suld1DI64Clamp: + Opc = NVPTX::SULD_1D_I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DV2I16Trap: - Opc = NVPTX::SULD_1D_V2I16_TRAP; + case NVPTXISD::Suld1DV2I8Clamp: + Opc = NVPTX::SULD_1D_V2I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DV2I32Trap: - Opc = NVPTX::SULD_1D_V2I32_TRAP; + case NVPTXISD::Suld1DV2I16Clamp: + Opc = NVPTX::SULD_1D_V2I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DV4I8Trap: - Opc = NVPTX::SULD_1D_V4I8_TRAP; + case NVPTXISD::Suld1DV2I32Clamp: + Opc = NVPTX::SULD_1D_V2I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DV4I16Trap: - Opc = NVPTX::SULD_1D_V4I16_TRAP; + case NVPTXISD::Suld1DV2I64Clamp: + Opc = NVPTX::SULD_1D_V2I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DV4I32Trap: - Opc = NVPTX::SULD_1D_V4I32_TRAP; + case NVPTXISD::Suld1DV4I8Clamp: + Opc = NVPTX::SULD_1D_V4I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayI8Trap: - Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; + case NVPTXISD::Suld1DV4I16Clamp: + Opc = NVPTX::SULD_1D_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I32Clamp: + Opc = NVPTX::SULD_1D_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI8Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayI16Trap: - Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; + case NVPTXISD::Suld1DArrayI16Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayI32Trap: - Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; + case NVPTXISD::Suld1DArrayI32Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayV2I8Trap: - Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; + case NVPTXISD::Suld1DArrayI64Clamp: + Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayV2I16Trap: - Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; + case NVPTXISD::Suld1DArrayV2I8Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayV2I32Trap: - Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; + case NVPTXISD::Suld1DArrayV2I16Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayV4I8Trap: - Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; + case NVPTXISD::Suld1DArrayV2I32Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayV4I16Trap: - Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; + case NVPTXISD::Suld1DArrayV2I64Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld1DArrayV4I32Trap: - Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; + case NVPTXISD::Suld1DArrayV4I8Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DI8Trap: - Opc = NVPTX::SULD_2D_I8_TRAP; + case NVPTXISD::Suld1DArrayV4I16Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DI16Trap: - Opc = NVPTX::SULD_2D_I16_TRAP; + case NVPTXISD::Suld1DArrayV4I32Clamp: + Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DI32Trap: - Opc = NVPTX::SULD_2D_I32_TRAP; + case NVPTXISD::Suld2DI8Clamp: + Opc = NVPTX::SULD_2D_I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DV2I8Trap: - Opc = NVPTX::SULD_2D_V2I8_TRAP; + case NVPTXISD::Suld2DI16Clamp: + Opc = NVPTX::SULD_2D_I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DV2I16Trap: - Opc = NVPTX::SULD_2D_V2I16_TRAP; + case NVPTXISD::Suld2DI32Clamp: + Opc = NVPTX::SULD_2D_I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DV2I32Trap: - Opc = NVPTX::SULD_2D_V2I32_TRAP; + case NVPTXISD::Suld2DI64Clamp: + Opc = NVPTX::SULD_2D_I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DV4I8Trap: - Opc = NVPTX::SULD_2D_V4I8_TRAP; + case NVPTXISD::Suld2DV2I8Clamp: + Opc = NVPTX::SULD_2D_V2I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DV4I16Trap: - Opc = NVPTX::SULD_2D_V4I16_TRAP; + case NVPTXISD::Suld2DV2I16Clamp: + Opc = NVPTX::SULD_2D_V2I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DV4I32Trap: - Opc = NVPTX::SULD_2D_V4I32_TRAP; + case NVPTXISD::Suld2DV2I32Clamp: + Opc = NVPTX::SULD_2D_V2I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayI8Trap: - Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; + case NVPTXISD::Suld2DV2I64Clamp: + Opc = NVPTX::SULD_2D_V2I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I8Clamp: + Opc = NVPTX::SULD_2D_V4I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I16Clamp: + Opc = NVPTX::SULD_2D_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I32Clamp: + Opc = NVPTX::SULD_2D_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI8Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayI16Trap: - Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; + case NVPTXISD::Suld2DArrayI16Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayI32Trap: - Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; + case NVPTXISD::Suld2DArrayI32Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayV2I8Trap: - Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; + case NVPTXISD::Suld2DArrayI64Clamp: + Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayV2I16Trap: - Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; + case NVPTXISD::Suld2DArrayV2I8Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayV2I32Trap: - Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; + case NVPTXISD::Suld2DArrayV2I16Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayV4I8Trap: - Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; + case NVPTXISD::Suld2DArrayV2I32Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayV4I16Trap: - Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; + case NVPTXISD::Suld2DArrayV2I64Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld2DArrayV4I32Trap: - Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; + case NVPTXISD::Suld2DArrayV4I8Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DI8Trap: - Opc = NVPTX::SULD_3D_I8_TRAP; + case NVPTXISD::Suld2DArrayV4I16Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DI16Trap: - Opc = NVPTX::SULD_3D_I16_TRAP; + case NVPTXISD::Suld2DArrayV4I32Clamp: + Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DI32Trap: - Opc = NVPTX::SULD_3D_I32_TRAP; + case NVPTXISD::Suld3DI8Clamp: + Opc = NVPTX::SULD_3D_I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DV2I8Trap: - Opc = NVPTX::SULD_3D_V2I8_TRAP; + case NVPTXISD::Suld3DI16Clamp: + Opc = NVPTX::SULD_3D_I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DV2I16Trap: - Opc = NVPTX::SULD_3D_V2I16_TRAP; + case NVPTXISD::Suld3DI32Clamp: + Opc = NVPTX::SULD_3D_I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DV2I32Trap: - Opc = NVPTX::SULD_3D_V2I32_TRAP; + case NVPTXISD::Suld3DI64Clamp: + Opc = NVPTX::SULD_3D_I64_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DV4I8Trap: - Opc = NVPTX::SULD_3D_V4I8_TRAP; + case NVPTXISD::Suld3DV2I8Clamp: + Opc = NVPTX::SULD_3D_V2I8_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DV4I16Trap: - Opc = NVPTX::SULD_3D_V4I16_TRAP; + case NVPTXISD::Suld3DV2I16Clamp: + Opc = NVPTX::SULD_3D_V2I16_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - case NVPTXISD::Suld3DV4I32Trap: - Opc = NVPTX::SULD_3D_V4I32_TRAP; + case NVPTXISD::Suld3DV2I32Clamp: + Opc = NVPTX::SULD_3D_V2I32_CLAMP; Ops.push_back(TexHandle); Ops.push_back(N->getOperand(2)); Ops.push_back(N->getOperand(3)); Ops.push_back(N->getOperand(4)); Ops.push_back(Chain); break; - } - Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); - return Ret; -} + case NVPTXISD::Suld3DV2I64Clamp: + Opc = NVPTX::SULD_3D_V2I64_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I8Clamp: + Opc = NVPTX::SULD_3D_V4I8_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I16Clamp: + Opc = NVPTX::SULD_3D_V4I16_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I32Clamp: + Opc = NVPTX::SULD_3D_V4I32_CLAMP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI8Trap: + Opc = NVPTX::SULD_1D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI16Trap: + Opc = NVPTX::SULD_1D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI32Trap: + Opc = NVPTX::SULD_1D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI64Trap: + Opc = NVPTX::SULD_1D_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I8Trap: + Opc = NVPTX::SULD_1D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I16Trap: + Opc = NVPTX::SULD_1D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I32Trap: + Opc = NVPTX::SULD_1D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I64Trap: + Opc = NVPTX::SULD_1D_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I8Trap: + Opc = NVPTX::SULD_1D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I16Trap: + Opc = NVPTX::SULD_1D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I32Trap: + Opc = NVPTX::SULD_1D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI8Trap: + Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI16Trap: + Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI32Trap: + Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI64Trap: + Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I8Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I16Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I32Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I64Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I8Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I16Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I32Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI8Trap: + Opc = NVPTX::SULD_2D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI16Trap: + Opc = NVPTX::SULD_2D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI32Trap: + Opc = NVPTX::SULD_2D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI64Trap: + Opc = NVPTX::SULD_2D_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I8Trap: + Opc = NVPTX::SULD_2D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I16Trap: + Opc = NVPTX::SULD_2D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I32Trap: + Opc = NVPTX::SULD_2D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I64Trap: + Opc = NVPTX::SULD_2D_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I8Trap: + Opc = NVPTX::SULD_2D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I16Trap: + Opc = NVPTX::SULD_2D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I32Trap: + Opc = NVPTX::SULD_2D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI8Trap: + Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI16Trap: + Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI32Trap: + Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI64Trap: + Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I8Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I16Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I32Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I64Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I8Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I16Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I32Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI8Trap: + Opc = NVPTX::SULD_3D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI16Trap: + Opc = NVPTX::SULD_3D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI32Trap: + Opc = NVPTX::SULD_3D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI64Trap: + Opc = NVPTX::SULD_3D_I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I8Trap: + Opc = NVPTX::SULD_3D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I16Trap: + Opc = NVPTX::SULD_3D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I32Trap: + Opc = NVPTX::SULD_3D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I64Trap: + Opc = NVPTX::SULD_3D_V2I64_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I8Trap: + Opc = NVPTX::SULD_3D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I16Trap: + Opc = NVPTX::SULD_3D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I32Trap: + Opc = NVPTX::SULD_3D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI8Zero: + Opc = NVPTX::SULD_1D_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI16Zero: + Opc = NVPTX::SULD_1D_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI32Zero: + Opc = NVPTX::SULD_1D_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI64Zero: + Opc = NVPTX::SULD_1D_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I8Zero: + Opc = NVPTX::SULD_1D_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I16Zero: + Opc = NVPTX::SULD_1D_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I32Zero: + Opc = NVPTX::SULD_1D_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I64Zero: + Opc = NVPTX::SULD_1D_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I8Zero: + Opc = NVPTX::SULD_1D_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I16Zero: + Opc = NVPTX::SULD_1D_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I32Zero: + Opc = NVPTX::SULD_1D_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI8Zero: + Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI16Zero: + Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI32Zero: + Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI64Zero: + Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I8Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I16Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I32Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I64Zero: + Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I8Zero: + Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I16Zero: + Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I32Zero: + Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI8Zero: + Opc = NVPTX::SULD_2D_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI16Zero: + Opc = NVPTX::SULD_2D_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI32Zero: + Opc = NVPTX::SULD_2D_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI64Zero: + Opc = NVPTX::SULD_2D_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I8Zero: + Opc = NVPTX::SULD_2D_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I16Zero: + Opc = NVPTX::SULD_2D_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I32Zero: + Opc = NVPTX::SULD_2D_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I64Zero: + Opc = NVPTX::SULD_2D_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I8Zero: + Opc = NVPTX::SULD_2D_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I16Zero: + Opc = NVPTX::SULD_2D_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I32Zero: + Opc = NVPTX::SULD_2D_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI8Zero: + Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI16Zero: + Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI32Zero: + Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI64Zero: + Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I8Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I16Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I32Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I64Zero: + Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I8Zero: + Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I16Zero: + Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I32Zero: + Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI8Zero: + Opc = NVPTX::SULD_3D_I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI16Zero: + Opc = NVPTX::SULD_3D_I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI32Zero: + Opc = NVPTX::SULD_3D_I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI64Zero: + Opc = NVPTX::SULD_3D_I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I8Zero: + Opc = NVPTX::SULD_3D_V2I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I16Zero: + Opc = NVPTX::SULD_3D_V2I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I32Zero: + Opc = NVPTX::SULD_3D_V2I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I64Zero: + Opc = NVPTX::SULD_3D_V2I64_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I8Zero: + Opc = NVPTX::SULD_3D_V4I8_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I16Zero: + Opc = NVPTX::SULD_3D_V4I16_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I32Zero: + Opc = NVPTX::SULD_3D_V4I32_ZERO; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + } + Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + return Ret; +} + /// SelectBFE - Look for instruction sequences that can be made more efficient /// by using the 'bfe' (bit-field extract) PTX instruction diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 91e24bb617d..258c57b17c5 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -360,73 +360,379 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::MUL_WIDE_SIGNED"; case NVPTXISD::MUL_WIDE_UNSIGNED: return "NVPTXISD::MUL_WIDE_UNSIGNED"; - case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32"; + case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; case NVPTXISD::Tex1DFloatFloatLevel: return "NVPTXISD::Tex1DFloatFloatLevel"; case NVPTXISD::Tex1DFloatFloatGrad: return "NVPTXISD::Tex1DFloatFloatGrad"; - case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32"; - case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float"; - case NVPTXISD::Tex1DI32FloatLevel: - return "NVPTXISD::Tex1DI32FloatLevel"; - case NVPTXISD::Tex1DI32FloatGrad: - return "NVPTXISD::Tex1DI32FloatGrad"; - case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; - case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; + case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; + case NVPTXISD::Tex1DS32FloatLevel: + return "NVPTXISD::Tex1DS32FloatLevel"; + case NVPTXISD::Tex1DS32FloatGrad: + return "NVPTXISD::Tex1DS32FloatGrad"; + case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; + case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; + case NVPTXISD::Tex1DU32FloatLevel: + return "NVPTXISD::Tex1DU32FloatLevel"; + case NVPTXISD::Tex1DU32FloatGrad: + return "NVPTXISD::Tex1DU32FloatGrad"; + case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; + case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; case NVPTXISD::Tex1DArrayFloatFloatLevel: - return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + return "NVPTXISD::Tex1DArrayFloatFloatLevel"; case NVPTXISD::Tex1DArrayFloatFloatGrad: - return "NVPTXISD::Tex2DArrayFloatFloatGrad"; - case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; - case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; - case NVPTXISD::Tex1DArrayI32FloatLevel: - return "NVPTXISD::Tex2DArrayI32FloatLevel"; - case NVPTXISD::Tex1DArrayI32FloatGrad: - return "NVPTXISD::Tex2DArrayI32FloatGrad"; - case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32"; + return "NVPTXISD::Tex1DArrayFloatFloatGrad"; + case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; + case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; + case NVPTXISD::Tex1DArrayS32FloatLevel: + return "NVPTXISD::Tex1DArrayS32FloatLevel"; + case NVPTXISD::Tex1DArrayS32FloatGrad: + return "NVPTXISD::Tex1DArrayS32FloatGrad"; + case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; + case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; + case NVPTXISD::Tex1DArrayU32FloatLevel: + return "NVPTXISD::Tex1DArrayU32FloatLevel"; + case NVPTXISD::Tex1DArrayU32FloatGrad: + return "NVPTXISD::Tex1DArrayU32FloatGrad"; + case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; case NVPTXISD::Tex2DFloatFloatLevel: return "NVPTXISD::Tex2DFloatFloatLevel"; case NVPTXISD::Tex2DFloatFloatGrad: return "NVPTXISD::Tex2DFloatFloatGrad"; - case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32"; - case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float"; - case NVPTXISD::Tex2DI32FloatLevel: - return "NVPTXISD::Tex2DI32FloatLevel"; - case NVPTXISD::Tex2DI32FloatGrad: - return "NVPTXISD::Tex2DI32FloatGrad"; - case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; + case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; + case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; + case NVPTXISD::Tex2DS32FloatLevel: + return "NVPTXISD::Tex2DS32FloatLevel"; + case NVPTXISD::Tex2DS32FloatGrad: + return "NVPTXISD::Tex2DS32FloatGrad"; + case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; + case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; + case NVPTXISD::Tex2DU32FloatLevel: + return "NVPTXISD::Tex2DU32FloatLevel"; + case NVPTXISD::Tex2DU32FloatGrad: + return "NVPTXISD::Tex2DU32FloatGrad"; + case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; case NVPTXISD::Tex2DArrayFloatFloatLevel: return "NVPTXISD::Tex2DArrayFloatFloatLevel"; case NVPTXISD::Tex2DArrayFloatFloatGrad: return "NVPTXISD::Tex2DArrayFloatFloatGrad"; - case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; - case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; - case NVPTXISD::Tex2DArrayI32FloatLevel: - return "NVPTXISD::Tex2DArrayI32FloatLevel"; - case NVPTXISD::Tex2DArrayI32FloatGrad: - return "NVPTXISD::Tex2DArrayI32FloatGrad"; - case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32"; + case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; + case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; + case NVPTXISD::Tex2DArrayS32FloatLevel: + return "NVPTXISD::Tex2DArrayS32FloatLevel"; + case NVPTXISD::Tex2DArrayS32FloatGrad: + return "NVPTXISD::Tex2DArrayS32FloatGrad"; + case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; + case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; + case NVPTXISD::Tex2DArrayU32FloatLevel: + return "NVPTXISD::Tex2DArrayU32FloatLevel"; + case NVPTXISD::Tex2DArrayU32FloatGrad: + return "NVPTXISD::Tex2DArrayU32FloatGrad"; + case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; case NVPTXISD::Tex3DFloatFloatLevel: return "NVPTXISD::Tex3DFloatFloatLevel"; case NVPTXISD::Tex3DFloatFloatGrad: return "NVPTXISD::Tex3DFloatFloatGrad"; - case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32"; - case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float"; - case NVPTXISD::Tex3DI32FloatLevel: - return "NVPTXISD::Tex3DI32FloatLevel"; - case NVPTXISD::Tex3DI32FloatGrad: - return "NVPTXISD::Tex3DI32FloatGrad"; + case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; + case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; + case NVPTXISD::Tex3DS32FloatLevel: + return "NVPTXISD::Tex3DS32FloatLevel"; + case NVPTXISD::Tex3DS32FloatGrad: + return "NVPTXISD::Tex3DS32FloatGrad"; + case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; + case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; + case NVPTXISD::Tex3DU32FloatLevel: + return "NVPTXISD::Tex3DU32FloatLevel"; + case NVPTXISD::Tex3DU32FloatGrad: + return "NVPTXISD::Tex3DU32FloatGrad"; + case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; + case NVPTXISD::TexCubeFloatFloatLevel: + return "NVPTXISD::TexCubeFloatFloatLevel"; + case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; + case NVPTXISD::TexCubeS32FloatLevel: + return "NVPTXISD::TexCubeS32FloatLevel"; + case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; + case NVPTXISD::TexCubeU32FloatLevel: + return "NVPTXISD::TexCubeU32FloatLevel"; + case NVPTXISD::TexCubeArrayFloatFloat: + return "NVPTXISD::TexCubeArrayFloatFloat"; + case NVPTXISD::TexCubeArrayFloatFloatLevel: + return "NVPTXISD::TexCubeArrayFloatFloatLevel"; + case NVPTXISD::TexCubeArrayS32Float: + return "NVPTXISD::TexCubeArrayS32Float"; + case NVPTXISD::TexCubeArrayS32FloatLevel: + return "NVPTXISD::TexCubeArrayS32FloatLevel"; + case NVPTXISD::TexCubeArrayU32Float: + return "NVPTXISD::TexCubeArrayU32Float"; + case NVPTXISD::TexCubeArrayU32FloatLevel: + return "NVPTXISD::TexCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4R2DFloatFloat: + return "NVPTXISD::Tld4R2DFloatFloat"; + case NVPTXISD::Tld4G2DFloatFloat: + return "NVPTXISD::Tld4G2DFloatFloat"; + case NVPTXISD::Tld4B2DFloatFloat: + return "NVPTXISD::Tld4B2DFloatFloat"; + case NVPTXISD::Tld4A2DFloatFloat: + return "NVPTXISD::Tld4A2DFloatFloat"; + case NVPTXISD::Tld4R2DS64Float: + return "NVPTXISD::Tld4R2DS64Float"; + case NVPTXISD::Tld4G2DS64Float: + return "NVPTXISD::Tld4G2DS64Float"; + case NVPTXISD::Tld4B2DS64Float: + return "NVPTXISD::Tld4B2DS64Float"; + case NVPTXISD::Tld4A2DS64Float: + return "NVPTXISD::Tld4A2DS64Float"; + case NVPTXISD::Tld4R2DU64Float: + return "NVPTXISD::Tld4R2DU64Float"; + case NVPTXISD::Tld4G2DU64Float: + return "NVPTXISD::Tld4G2DU64Float"; + case NVPTXISD::Tld4B2DU64Float: + return "NVPTXISD::Tld4B2DU64Float"; + case NVPTXISD::Tld4A2DU64Float: + return "NVPTXISD::Tld4A2DU64Float"; + + case NVPTXISD::TexUnified1DFloatS32: + return "NVPTXISD::TexUnified1DFloatS32"; + case NVPTXISD::TexUnified1DFloatFloat: + return "NVPTXISD::TexUnified1DFloatFloat"; + case NVPTXISD::TexUnified1DFloatFloatLevel: + return "NVPTXISD::TexUnified1DFloatFloatLevel"; + case NVPTXISD::TexUnified1DFloatFloatGrad: + return "NVPTXISD::TexUnified1DFloatFloatGrad"; + case NVPTXISD::TexUnified1DS32S32: + return "NVPTXISD::TexUnified1DS32S32"; + case NVPTXISD::TexUnified1DS32Float: + return "NVPTXISD::TexUnified1DS32Float"; + case NVPTXISD::TexUnified1DS32FloatLevel: + return "NVPTXISD::TexUnified1DS32FloatLevel"; + case NVPTXISD::TexUnified1DS32FloatGrad: + return "NVPTXISD::TexUnified1DS32FloatGrad"; + case NVPTXISD::TexUnified1DU32S32: + return "NVPTXISD::TexUnified1DU32S32"; + case NVPTXISD::TexUnified1DU32Float: + return "NVPTXISD::TexUnified1DU32Float"; + case NVPTXISD::TexUnified1DU32FloatLevel: + return "NVPTXISD::TexUnified1DU32FloatLevel"; + case NVPTXISD::TexUnified1DU32FloatGrad: + return "NVPTXISD::TexUnified1DU32FloatGrad"; + case NVPTXISD::TexUnified1DArrayFloatS32: + return "NVPTXISD::TexUnified1DArrayFloatS32"; + case NVPTXISD::TexUnified1DArrayFloatFloat: + return "NVPTXISD::TexUnified1DArrayFloatFloat"; + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified1DArrayS32S32: + return "NVPTXISD::TexUnified1DArrayS32S32"; + case NVPTXISD::TexUnified1DArrayS32Float: + return "NVPTXISD::TexUnified1DArrayS32Float"; + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; + case NVPTXISD::TexUnified1DArrayU32S32: + return "NVPTXISD::TexUnified1DArrayU32S32"; + case NVPTXISD::TexUnified1DArrayU32Float: + return "NVPTXISD::TexUnified1DArrayU32Float"; + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; + case NVPTXISD::TexUnified2DFloatS32: + return "NVPTXISD::TexUnified2DFloatS32"; + case NVPTXISD::TexUnified2DFloatFloat: + return "NVPTXISD::TexUnified2DFloatFloat"; + case NVPTXISD::TexUnified2DFloatFloatLevel: + return "NVPTXISD::TexUnified2DFloatFloatLevel"; + case NVPTXISD::TexUnified2DFloatFloatGrad: + return "NVPTXISD::TexUnified2DFloatFloatGrad"; + case NVPTXISD::TexUnified2DS32S32: + return "NVPTXISD::TexUnified2DS32S32"; + case NVPTXISD::TexUnified2DS32Float: + return "NVPTXISD::TexUnified2DS32Float"; + case NVPTXISD::TexUnified2DS32FloatLevel: + return "NVPTXISD::TexUnified2DS32FloatLevel"; + case NVPTXISD::TexUnified2DS32FloatGrad: + return "NVPTXISD::TexUnified2DS32FloatGrad"; + case NVPTXISD::TexUnified2DU32S32: + return "NVPTXISD::TexUnified2DU32S32"; + case NVPTXISD::TexUnified2DU32Float: + return "NVPTXISD::TexUnified2DU32Float"; + case NVPTXISD::TexUnified2DU32FloatLevel: + return "NVPTXISD::TexUnified2DU32FloatLevel"; + case NVPTXISD::TexUnified2DU32FloatGrad: + return "NVPTXISD::TexUnified2DU32FloatGrad"; + case NVPTXISD::TexUnified2DArrayFloatS32: + return "NVPTXISD::TexUnified2DArrayFloatS32"; + case NVPTXISD::TexUnified2DArrayFloatFloat: + return "NVPTXISD::TexUnified2DArrayFloatFloat"; + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified2DArrayS32S32: + return "NVPTXISD::TexUnified2DArrayS32S32"; + case NVPTXISD::TexUnified2DArrayS32Float: + return "NVPTXISD::TexUnified2DArrayS32Float"; + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; + case NVPTXISD::TexUnified2DArrayU32S32: + return "NVPTXISD::TexUnified2DArrayU32S32"; + case NVPTXISD::TexUnified2DArrayU32Float: + return "NVPTXISD::TexUnified2DArrayU32Float"; + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; + case NVPTXISD::TexUnified3DFloatS32: + return "NVPTXISD::TexUnified3DFloatS32"; + case NVPTXISD::TexUnified3DFloatFloat: + return "NVPTXISD::TexUnified3DFloatFloat"; + case NVPTXISD::TexUnified3DFloatFloatLevel: + return "NVPTXISD::TexUnified3DFloatFloatLevel"; + case NVPTXISD::TexUnified3DFloatFloatGrad: + return "NVPTXISD::TexUnified3DFloatFloatGrad"; + case NVPTXISD::TexUnified3DS32S32: + return "NVPTXISD::TexUnified3DS32S32"; + case NVPTXISD::TexUnified3DS32Float: + return "NVPTXISD::TexUnified3DS32Float"; + case NVPTXISD::TexUnified3DS32FloatLevel: + return "NVPTXISD::TexUnified3DS32FloatLevel"; + case NVPTXISD::TexUnified3DS32FloatGrad: + return "NVPTXISD::TexUnified3DS32FloatGrad"; + case NVPTXISD::TexUnified3DU32S32: + return "NVPTXISD::TexUnified3DU32S32"; + case NVPTXISD::TexUnified3DU32Float: + return "NVPTXISD::TexUnified3DU32Float"; + case NVPTXISD::TexUnified3DU32FloatLevel: + return "NVPTXISD::TexUnified3DU32FloatLevel"; + case NVPTXISD::TexUnified3DU32FloatGrad: + return "NVPTXISD::TexUnified3DU32FloatGrad"; + case NVPTXISD::TexUnifiedCubeFloatFloat: + return "NVPTXISD::TexUnifiedCubeFloatFloat"; + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeS32Float: + return "NVPTXISD::TexUnifiedCubeS32Float"; + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeU32Float: + return "NVPTXISD::TexUnifiedCubeU32Float"; + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayS32Float: + return "NVPTXISD::TexUnifiedCubeArrayS32Float"; + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayU32Float: + return "NVPTXISD::TexUnifiedCubeArrayU32Float"; + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; + case NVPTXISD::Tld4UnifiedR2DS64Float: + return "NVPTXISD::Tld4UnifiedR2DS64Float"; + case NVPTXISD::Tld4UnifiedG2DS64Float: + return "NVPTXISD::Tld4UnifiedG2DS64Float"; + case NVPTXISD::Tld4UnifiedB2DS64Float: + return "NVPTXISD::Tld4UnifiedB2DS64Float"; + case NVPTXISD::Tld4UnifiedA2DS64Float: + return "NVPTXISD::Tld4UnifiedA2DS64Float"; + case NVPTXISD::Tld4UnifiedR2DU64Float: + return "NVPTXISD::Tld4UnifiedR2DU64Float"; + case NVPTXISD::Tld4UnifiedG2DU64Float: + return "NVPTXISD::Tld4UnifiedG2DU64Float"; + case NVPTXISD::Tld4UnifiedB2DU64Float: + return "NVPTXISD::Tld4UnifiedB2DU64Float"; + case NVPTXISD::Tld4UnifiedA2DU64Float: + return "NVPTXISD::Tld4UnifiedA2DU64Float"; + + case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; + case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; + case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; + case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; + case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; + case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; + case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; + case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; + case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; + case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; + case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; + + case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; + case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; + case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; + case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; + case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; + case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; + case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; + case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; + case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; + case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; + case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; + + case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; + case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; + case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; + case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; + case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; + case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; + case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; + case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; + case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; + case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; + case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; + + case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; + case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; + case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; + case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; + case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; + case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; + case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; + case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; + case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; + case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; + case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; + + case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; + case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; + case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; + case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; + case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; + case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; + case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; + case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; + case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; + case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; + case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; + case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; + case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; @@ -434,9 +740,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; + case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; + case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; @@ -444,9 +752,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; + case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; + case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; @@ -454,9 +764,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; + case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; + case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; @@ -464,12 +776,74 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; + case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; + case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; + + case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; + case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; + case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; + case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; + case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; + case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; + case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; + case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; + case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; + case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; + case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; + + case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; + case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; + case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; + case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; + case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; + case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; + case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; + case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; + case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; + case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; + case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; + + case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; + case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; + case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; + case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; + case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; + case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; + case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; + case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; + case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; + case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; + case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; + + case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; + case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; + case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; + case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; + case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; + case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; + case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; + case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; + case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; + case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; + case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; + + case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; + case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; + case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; + case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; + case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; + case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; + case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; + case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; + case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; + case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; + case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; } } @@ -2145,90 +2519,357 @@ static unsigned getOpcForTextureInstr(unsigned Intrinsic) { default: return 0; - case Intrinsic::nvvm_tex_1d_v4f32_i32: - return NVPTXISD::Tex1DFloatI32; + case Intrinsic::nvvm_tex_1d_v4f32_s32: + return NVPTXISD::Tex1DFloatS32; case Intrinsic::nvvm_tex_1d_v4f32_f32: return NVPTXISD::Tex1DFloatFloat; case Intrinsic::nvvm_tex_1d_level_v4f32_f32: return NVPTXISD::Tex1DFloatFloatLevel; case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: return NVPTXISD::Tex1DFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_v4i32_i32: - return NVPTXISD::Tex1DI32I32; - case Intrinsic::nvvm_tex_1d_v4i32_f32: - return NVPTXISD::Tex1DI32Float; - case Intrinsic::nvvm_tex_1d_level_v4i32_f32: - return NVPTXISD::Tex1DI32FloatLevel; - case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: - return NVPTXISD::Tex1DI32FloatGrad; - - case Intrinsic::nvvm_tex_1d_array_v4f32_i32: - return NVPTXISD::Tex1DArrayFloatI32; + case Intrinsic::nvvm_tex_1d_v4s32_s32: + return NVPTXISD::Tex1DS32S32; + case Intrinsic::nvvm_tex_1d_v4s32_f32: + return NVPTXISD::Tex1DS32Float; + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + return NVPTXISD::Tex1DS32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + return NVPTXISD::Tex1DS32FloatGrad; + case Intrinsic::nvvm_tex_1d_v4u32_s32: + return NVPTXISD::Tex1DU32S32; + case Intrinsic::nvvm_tex_1d_v4u32_f32: + return NVPTXISD::Tex1DU32Float; + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + return NVPTXISD::Tex1DU32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + return NVPTXISD::Tex1DU32FloatGrad; + + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: + return NVPTXISD::Tex1DArrayFloatS32; case Intrinsic::nvvm_tex_1d_array_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloat; case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloatLevel; case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: return NVPTXISD::Tex1DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_array_v4i32_i32: - return NVPTXISD::Tex1DArrayI32I32; - case Intrinsic::nvvm_tex_1d_array_v4i32_f32: - return NVPTXISD::Tex1DArrayI32Float; - case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: - return NVPTXISD::Tex1DArrayI32FloatLevel; - case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: - return NVPTXISD::Tex1DArrayI32FloatGrad; - - case Intrinsic::nvvm_tex_2d_v4f32_i32: - return NVPTXISD::Tex2DFloatI32; + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + return NVPTXISD::Tex1DArrayS32S32; + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + return NVPTXISD::Tex1DArrayS32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + return NVPTXISD::Tex1DArrayU32S32; + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + return NVPTXISD::Tex1DArrayU32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_v4f32_s32: + return NVPTXISD::Tex2DFloatS32; case Intrinsic::nvvm_tex_2d_v4f32_f32: return NVPTXISD::Tex2DFloatFloat; case Intrinsic::nvvm_tex_2d_level_v4f32_f32: return NVPTXISD::Tex2DFloatFloatLevel; case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: return NVPTXISD::Tex2DFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_v4i32_i32: - return NVPTXISD::Tex2DI32I32; - case Intrinsic::nvvm_tex_2d_v4i32_f32: - return NVPTXISD::Tex2DI32Float; - case Intrinsic::nvvm_tex_2d_level_v4i32_f32: - return NVPTXISD::Tex2DI32FloatLevel; - case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: - return NVPTXISD::Tex2DI32FloatGrad; - - case Intrinsic::nvvm_tex_2d_array_v4f32_i32: - return NVPTXISD::Tex2DArrayFloatI32; + case Intrinsic::nvvm_tex_2d_v4s32_s32: + return NVPTXISD::Tex2DS32S32; + case Intrinsic::nvvm_tex_2d_v4s32_f32: + return NVPTXISD::Tex2DS32Float; + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + return NVPTXISD::Tex2DS32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + return NVPTXISD::Tex2DS32FloatGrad; + case Intrinsic::nvvm_tex_2d_v4u32_s32: + return NVPTXISD::Tex2DU32S32; + case Intrinsic::nvvm_tex_2d_v4u32_f32: + return NVPTXISD::Tex2DU32Float; + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + return NVPTXISD::Tex2DU32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + return NVPTXISD::Tex2DU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: + return NVPTXISD::Tex2DArrayFloatS32; case Intrinsic::nvvm_tex_2d_array_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloat; case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloatLevel; case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: return NVPTXISD::Tex2DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_array_v4i32_i32: - return NVPTXISD::Tex2DArrayI32I32; - case Intrinsic::nvvm_tex_2d_array_v4i32_f32: - return NVPTXISD::Tex2DArrayI32Float; - case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: - return NVPTXISD::Tex2DArrayI32FloatLevel; - case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: - return NVPTXISD::Tex2DArrayI32FloatGrad; - - case Intrinsic::nvvm_tex_3d_v4f32_i32: - return NVPTXISD::Tex3DFloatI32; + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + return NVPTXISD::Tex2DArrayS32S32; + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + return NVPTXISD::Tex2DArrayS32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + return NVPTXISD::Tex2DArrayU32S32; + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + return NVPTXISD::Tex2DArrayU32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_3d_v4f32_s32: + return NVPTXISD::Tex3DFloatS32; case Intrinsic::nvvm_tex_3d_v4f32_f32: return NVPTXISD::Tex3DFloatFloat; case Intrinsic::nvvm_tex_3d_level_v4f32_f32: return NVPTXISD::Tex3DFloatFloatLevel; case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: return NVPTXISD::Tex3DFloatFloatGrad; - case Intrinsic::nvvm_tex_3d_v4i32_i32: - return NVPTXISD::Tex3DI32I32; - case Intrinsic::nvvm_tex_3d_v4i32_f32: - return NVPTXISD::Tex3DI32Float; - case Intrinsic::nvvm_tex_3d_level_v4i32_f32: - return NVPTXISD::Tex3DI32FloatLevel; - case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: - return NVPTXISD::Tex3DI32FloatGrad; + case Intrinsic::nvvm_tex_3d_v4s32_s32: + return NVPTXISD::Tex3DS32S32; + case Intrinsic::nvvm_tex_3d_v4s32_f32: + return NVPTXISD::Tex3DS32Float; + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + return NVPTXISD::Tex3DS32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + return NVPTXISD::Tex3DS32FloatGrad; + case Intrinsic::nvvm_tex_3d_v4u32_s32: + return NVPTXISD::Tex3DU32S32; + case Intrinsic::nvvm_tex_3d_v4u32_f32: + return NVPTXISD::Tex3DU32Float; + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + return NVPTXISD::Tex3DU32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + return NVPTXISD::Tex3DU32FloatGrad; + + case Intrinsic::nvvm_tex_cube_v4f32_f32: + return NVPTXISD::TexCubeFloatFloat; + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + return NVPTXISD::TexCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_v4s32_f32: + return NVPTXISD::TexCubeS32Float; + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + return NVPTXISD::TexCubeS32FloatLevel; + case Intrinsic::nvvm_tex_cube_v4u32_f32: + return NVPTXISD::TexCubeU32Float; + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + return NVPTXISD::TexCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + return NVPTXISD::TexCubeArrayS32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + return NVPTXISD::TexCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + return NVPTXISD::TexCubeArrayU32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + return NVPTXISD::TexCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + return NVPTXISD::Tld4R2DFloatFloat; + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + return NVPTXISD::Tld4G2DFloatFloat; + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + return NVPTXISD::Tld4B2DFloatFloat; + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + return NVPTXISD::Tld4A2DFloatFloat; + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + return NVPTXISD::Tld4R2DS64Float; + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + return NVPTXISD::Tld4G2DS64Float; + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + return NVPTXISD::Tld4B2DS64Float; + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + return NVPTXISD::Tld4A2DS64Float; + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + return NVPTXISD::Tld4R2DU64Float; + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + return NVPTXISD::Tld4G2DU64Float; + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + return NVPTXISD::Tld4B2DU64Float; + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + return NVPTXISD::Tld4A2DU64Float; + + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + return NVPTXISD::TexUnified1DFloatS32; + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + return NVPTXISD::TexUnified1DS32S32; + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + return NVPTXISD::TexUnified1DS32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + return NVPTXISD::TexUnified1DU32S32; + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + return NVPTXISD::TexUnified1DU32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + return NVPTXISD::TexUnified1DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + return NVPTXISD::TexUnified1DArrayS32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + return NVPTXISD::TexUnified1DArrayU32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + return NVPTXISD::TexUnified2DFloatS32; + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + return NVPTXISD::TexUnified2DS32S32; + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + return NVPTXISD::TexUnified2DS32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + return NVPTXISD::TexUnified2DU32S32; + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + return NVPTXISD::TexUnified2DU32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + return NVPTXISD::TexUnified2DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + return NVPTXISD::TexUnified2DArrayS32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + return NVPTXISD::TexUnified2DArrayU32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + return NVPTXISD::TexUnified3DFloatS32; + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloat; + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + return NVPTXISD::TexUnified3DS32S32; + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + return NVPTXISD::TexUnified3DS32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + return NVPTXISD::TexUnified3DU32S32; + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + return NVPTXISD::TexUnified3DU32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedR2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedG2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedB2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedA2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedR2DS64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedG2DS64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedB2DS64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedA2DS64Float; + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedR2DU64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedG2DU64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedB2DU64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedA2DU64Float; } } @@ -2236,18 +2877,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { switch (Intrinsic) { default: return 0; + case Intrinsic::nvvm_suld_1d_i8_clamp: + return NVPTXISD::Suld1DI8Clamp; + case Intrinsic::nvvm_suld_1d_i16_clamp: + return NVPTXISD::Suld1DI16Clamp; + case Intrinsic::nvvm_suld_1d_i32_clamp: + return NVPTXISD::Suld1DI32Clamp; + case Intrinsic::nvvm_suld_1d_i64_clamp: + return NVPTXISD::Suld1DI64Clamp; + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + return NVPTXISD::Suld1DV2I8Clamp; + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + return NVPTXISD::Suld1DV2I16Clamp; + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + return NVPTXISD::Suld1DV2I32Clamp; + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + return NVPTXISD::Suld1DV2I64Clamp; + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + return NVPTXISD::Suld1DV4I8Clamp; + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + return NVPTXISD::Suld1DV4I16Clamp; + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + return NVPTXISD::Suld1DV4I32Clamp; + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + return NVPTXISD::Suld1DArrayI8Clamp; + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + return NVPTXISD::Suld1DArrayI16Clamp; + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + return NVPTXISD::Suld1DArrayI32Clamp; + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + return NVPTXISD::Suld1DArrayI64Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + return NVPTXISD::Suld1DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + return NVPTXISD::Suld1DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + return NVPTXISD::Suld1DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + return NVPTXISD::Suld1DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + return NVPTXISD::Suld1DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + return NVPTXISD::Suld1DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + return NVPTXISD::Suld1DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_2d_i8_clamp: + return NVPTXISD::Suld2DI8Clamp; + case Intrinsic::nvvm_suld_2d_i16_clamp: + return NVPTXISD::Suld2DI16Clamp; + case Intrinsic::nvvm_suld_2d_i32_clamp: + return NVPTXISD::Suld2DI32Clamp; + case Intrinsic::nvvm_suld_2d_i64_clamp: + return NVPTXISD::Suld2DI64Clamp; + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + return NVPTXISD::Suld2DV2I8Clamp; + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + return NVPTXISD::Suld2DV2I16Clamp; + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + return NVPTXISD::Suld2DV2I32Clamp; + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + return NVPTXISD::Suld2DV2I64Clamp; + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + return NVPTXISD::Suld2DV4I8Clamp; + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + return NVPTXISD::Suld2DV4I16Clamp; + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + return NVPTXISD::Suld2DV4I32Clamp; + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + return NVPTXISD::Suld2DArrayI8Clamp; + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + return NVPTXISD::Suld2DArrayI16Clamp; + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + return NVPTXISD::Suld2DArrayI32Clamp; + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + return NVPTXISD::Suld2DArrayI64Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + return NVPTXISD::Suld2DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + return NVPTXISD::Suld2DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + return NVPTXISD::Suld2DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + return NVPTXISD::Suld2DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + return NVPTXISD::Suld2DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + return NVPTXISD::Suld2DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + return NVPTXISD::Suld2DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_3d_i8_clamp: + return NVPTXISD::Suld3DI8Clamp; + case Intrinsic::nvvm_suld_3d_i16_clamp: + return NVPTXISD::Suld3DI16Clamp; + case Intrinsic::nvvm_suld_3d_i32_clamp: + return NVPTXISD::Suld3DI32Clamp; + case Intrinsic::nvvm_suld_3d_i64_clamp: + return NVPTXISD::Suld3DI64Clamp; + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + return NVPTXISD::Suld3DV2I8Clamp; + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + return NVPTXISD::Suld3DV2I16Clamp; + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + return NVPTXISD::Suld3DV2I32Clamp; + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + return NVPTXISD::Suld3DV2I64Clamp; + case Intrinsic::nvvm_suld_3d_v4i8_clamp: + return NVPTXISD::Suld3DV4I8Clamp; + case Intrinsic::nvvm_suld_3d_v4i16_clamp: + return NVPTXISD::Suld3DV4I16Clamp; + case Intrinsic::nvvm_suld_3d_v4i32_clamp: + return NVPTXISD::Suld3DV4I32Clamp; case Intrinsic::nvvm_suld_1d_i8_trap: return NVPTXISD::Suld1DI8Trap; case Intrinsic::nvvm_suld_1d_i16_trap: return NVPTXISD::Suld1DI16Trap; case Intrinsic::nvvm_suld_1d_i32_trap: return NVPTXISD::Suld1DI32Trap; + case Intrinsic::nvvm_suld_1d_i64_trap: + return NVPTXISD::Suld1DI64Trap; case Intrinsic::nvvm_suld_1d_v2i8_trap: return NVPTXISD::Suld1DV2I8Trap; case Intrinsic::nvvm_suld_1d_v2i16_trap: return NVPTXISD::Suld1DV2I16Trap; case Intrinsic::nvvm_suld_1d_v2i32_trap: return NVPTXISD::Suld1DV2I32Trap; + case Intrinsic::nvvm_suld_1d_v2i64_trap: + return NVPTXISD::Suld1DV2I64Trap; case Intrinsic::nvvm_suld_1d_v4i8_trap: return NVPTXISD::Suld1DV4I8Trap; case Intrinsic::nvvm_suld_1d_v4i16_trap: @@ -2260,12 +3015,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld1DArrayI16Trap; case Intrinsic::nvvm_suld_1d_array_i32_trap: return NVPTXISD::Suld1DArrayI32Trap; + case Intrinsic::nvvm_suld_1d_array_i64_trap: + return NVPTXISD::Suld1DArrayI64Trap; case Intrinsic::nvvm_suld_1d_array_v2i8_trap: return NVPTXISD::Suld1DArrayV2I8Trap; case Intrinsic::nvvm_suld_1d_array_v2i16_trap: return NVPTXISD::Suld1DArrayV2I16Trap; case Intrinsic::nvvm_suld_1d_array_v2i32_trap: return NVPTXISD::Suld1DArrayV2I32Trap; + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + return NVPTXISD::Suld1DArrayV2I64Trap; case Intrinsic::nvvm_suld_1d_array_v4i8_trap: return NVPTXISD::Suld1DArrayV4I8Trap; case Intrinsic::nvvm_suld_1d_array_v4i16_trap: @@ -2278,12 +3037,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld2DI16Trap; case Intrinsic::nvvm_suld_2d_i32_trap: return NVPTXISD::Suld2DI32Trap; + case Intrinsic::nvvm_suld_2d_i64_trap: + return NVPTXISD::Suld2DI64Trap; case Intrinsic::nvvm_suld_2d_v2i8_trap: return NVPTXISD::Suld2DV2I8Trap; case Intrinsic::nvvm_suld_2d_v2i16_trap: return NVPTXISD::Suld2DV2I16Trap; case Intrinsic::nvvm_suld_2d_v2i32_trap: return NVPTXISD::Suld2DV2I32Trap; + case Intrinsic::nvvm_suld_2d_v2i64_trap: + return NVPTXISD::Suld2DV2I64Trap; case Intrinsic::nvvm_suld_2d_v4i8_trap: return NVPTXISD::Suld2DV4I8Trap; case Intrinsic::nvvm_suld_2d_v4i16_trap: @@ -2296,12 +3059,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld2DArrayI16Trap; case Intrinsic::nvvm_suld_2d_array_i32_trap: return NVPTXISD::Suld2DArrayI32Trap; + case Intrinsic::nvvm_suld_2d_array_i64_trap: + return NVPTXISD::Suld2DArrayI64Trap; case Intrinsic::nvvm_suld_2d_array_v2i8_trap: return NVPTXISD::Suld2DArrayV2I8Trap; case Intrinsic::nvvm_suld_2d_array_v2i16_trap: return NVPTXISD::Suld2DArrayV2I16Trap; case Intrinsic::nvvm_suld_2d_array_v2i32_trap: return NVPTXISD::Suld2DArrayV2I32Trap; + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + return NVPTXISD::Suld2DArrayV2I64Trap; case Intrinsic::nvvm_suld_2d_array_v4i8_trap: return NVPTXISD::Suld2DArrayV4I8Trap; case Intrinsic::nvvm_suld_2d_array_v4i16_trap: @@ -2314,18 +3081,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { return NVPTXISD::Suld3DI16Trap; case Intrinsic::nvvm_suld_3d_i32_trap: return NVPTXISD::Suld3DI32Trap; + case Intrinsic::nvvm_suld_3d_i64_trap: + return NVPTXISD::Suld3DI64Trap; case Intrinsic::nvvm_suld_3d_v2i8_trap: return NVPTXISD::Suld3DV2I8Trap; case Intrinsic::nvvm_suld_3d_v2i16_trap: return NVPTXISD::Suld3DV2I16Trap; case Intrinsic::nvvm_suld_3d_v2i32_trap: return NVPTXISD::Suld3DV2I32Trap; + case Intrinsic::nvvm_suld_3d_v2i64_trap: + return NVPTXISD::Suld3DV2I64Trap; case Intrinsic::nvvm_suld_3d_v4i8_trap: return NVPTXISD::Suld3DV4I8Trap; case Intrinsic::nvvm_suld_3d_v4i16_trap: return NVPTXISD::Suld3DV4I16Trap; case Intrinsic::nvvm_suld_3d_v4i32_trap: return NVPTXISD::Suld3DV4I32Trap; + case Intrinsic::nvvm_suld_1d_i8_zero: + return NVPTXISD::Suld1DI8Zero; + case Intrinsic::nvvm_suld_1d_i16_zero: + return NVPTXISD::Suld1DI16Zero; + case Intrinsic::nvvm_suld_1d_i32_zero: + return NVPTXISD::Suld1DI32Zero; + case Intrinsic::nvvm_suld_1d_i64_zero: + return NVPTXISD::Suld1DI64Zero; + case Intrinsic::nvvm_suld_1d_v2i8_zero: + return NVPTXISD::Suld1DV2I8Zero; + case Intrinsic::nvvm_suld_1d_v2i16_zero: + return NVPTXISD::Suld1DV2I16Zero; + case Intrinsic::nvvm_suld_1d_v2i32_zero: + return NVPTXISD::Suld1DV2I32Zero; + case Intrinsic::nvvm_suld_1d_v2i64_zero: + return NVPTXISD::Suld1DV2I64Zero; + case Intrinsic::nvvm_suld_1d_v4i8_zero: + return NVPTXISD::Suld1DV4I8Zero; + case Intrinsic::nvvm_suld_1d_v4i16_zero: + return NVPTXISD::Suld1DV4I16Zero; + case Intrinsic::nvvm_suld_1d_v4i32_zero: + return NVPTXISD::Suld1DV4I32Zero; + case Intrinsic::nvvm_suld_1d_array_i8_zero: + return NVPTXISD::Suld1DArrayI8Zero; + case Intrinsic::nvvm_suld_1d_array_i16_zero: + return NVPTXISD::Suld1DArrayI16Zero; + case Intrinsic::nvvm_suld_1d_array_i32_zero: + return NVPTXISD::Suld1DArrayI32Zero; + case Intrinsic::nvvm_suld_1d_array_i64_zero: + return NVPTXISD::Suld1DArrayI64Zero; + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + return NVPTXISD::Suld1DArrayV2I8Zero; + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + return NVPTXISD::Suld1DArrayV2I16Zero; + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + return NVPTXISD::Suld1DArrayV2I32Zero; + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + return NVPTXISD::Suld1DArrayV2I64Zero; + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + return NVPTXISD::Suld1DArrayV4I8Zero; + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + return NVPTXISD::Suld1DArrayV4I16Zero; + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + return NVPTXISD::Suld1DArrayV4I32Zero; + case Intrinsic::nvvm_suld_2d_i8_zero: + return NVPTXISD::Suld2DI8Zero; + case Intrinsic::nvvm_suld_2d_i16_zero: + return NVPTXISD::Suld2DI16Zero; + case Intrinsic::nvvm_suld_2d_i32_zero: + return NVPTXISD::Suld2DI32Zero; + case Intrinsic::nvvm_suld_2d_i64_zero: + return NVPTXISD::Suld2DI64Zero; + case Intrinsic::nvvm_suld_2d_v2i8_zero: + return NVPTXISD::Suld2DV2I8Zero; + case Intrinsic::nvvm_suld_2d_v2i16_zero: + return NVPTXISD::Suld2DV2I16Zero; + case Intrinsic::nvvm_suld_2d_v2i32_zero: + return NVPTXISD::Suld2DV2I32Zero; + case Intrinsic::nvvm_suld_2d_v2i64_zero: + return NVPTXISD::Suld2DV2I64Zero; + case Intrinsic::nvvm_suld_2d_v4i8_zero: + return NVPTXISD::Suld2DV4I8Zero; + case Intrinsic::nvvm_suld_2d_v4i16_zero: + return NVPTXISD::Suld2DV4I16Zero; + case Intrinsic::nvvm_suld_2d_v4i32_zero: + return NVPTXISD::Suld2DV4I32Zero; + case Intrinsic::nvvm_suld_2d_array_i8_zero: + return NVPTXISD::Suld2DArrayI8Zero; + case Intrinsic::nvvm_suld_2d_array_i16_zero: + return NVPTXISD::Suld2DArrayI16Zero; + case Intrinsic::nvvm_suld_2d_array_i32_zero: + return NVPTXISD::Suld2DArrayI32Zero; + case Intrinsic::nvvm_suld_2d_array_i64_zero: + return NVPTXISD::Suld2DArrayI64Zero; + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + return NVPTXISD::Suld2DArrayV2I8Zero; + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + return NVPTXISD::Suld2DArrayV2I16Zero; + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + return NVPTXISD::Suld2DArrayV2I32Zero; + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + return NVPTXISD::Suld2DArrayV2I64Zero; + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + return NVPTXISD::Suld2DArrayV4I8Zero; + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + return NVPTXISD::Suld2DArrayV4I16Zero; + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + return NVPTXISD::Suld2DArrayV4I32Zero; + case Intrinsic::nvvm_suld_3d_i8_zero: + return NVPTXISD::Suld3DI8Zero; + case Intrinsic::nvvm_suld_3d_i16_zero: + return NVPTXISD::Suld3DI16Zero; + case Intrinsic::nvvm_suld_3d_i32_zero: + return NVPTXISD::Suld3DI32Zero; + case Intrinsic::nvvm_suld_3d_i64_zero: + return NVPTXISD::Suld3DI64Zero; + case Intrinsic::nvvm_suld_3d_v2i8_zero: + return NVPTXISD::Suld3DV2I8Zero; + case Intrinsic::nvvm_suld_3d_v2i16_zero: + return NVPTXISD::Suld3DV2I16Zero; + case Intrinsic::nvvm_suld_3d_v2i32_zero: + return NVPTXISD::Suld3DV2I32Zero; + case Intrinsic::nvvm_suld_3d_v2i64_zero: + return NVPTXISD::Suld3DV2I64Zero; + case Intrinsic::nvvm_suld_3d_v4i8_zero: + return NVPTXISD::Suld3DV4I8Zero; + case Intrinsic::nvvm_suld_3d_v4i16_zero: + return NVPTXISD::Suld3DV4I16Zero; + case Intrinsic::nvvm_suld_3d_v4i32_zero: + return NVPTXISD::Suld3DV4I32Zero; } } @@ -2422,28 +3303,64 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( return true; } - case Intrinsic::nvvm_tex_1d_v4f32_i32: + case Intrinsic::nvvm_tex_1d_v4f32_s32: case Intrinsic::nvvm_tex_1d_v4f32_f32: case Intrinsic::nvvm_tex_1d_level_v4f32_f32: case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_1d_array_v4f32_i32: + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: case Intrinsic::nvvm_tex_1d_array_v4f32_f32: case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_v4f32_i32: + case Intrinsic::nvvm_tex_2d_v4f32_s32: case Intrinsic::nvvm_tex_2d_v4f32_f32: case Intrinsic::nvvm_tex_2d_level_v4f32_f32: case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_array_v4f32_i32: + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: case Intrinsic::nvvm_tex_2d_array_v4f32_f32: case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_3d_v4f32_i32: + case Intrinsic::nvvm_tex_3d_v4f32_s32: case Intrinsic::nvvm_tex_3d_v4f32_f32: case Intrinsic::nvvm_tex_3d_level_v4f32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: { + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_cube_v4f32_f32: + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::f32; + Info.memVT = MVT::v4f32; Info.ptrVal = nullptr; Info.offset = 0; Info.vol = 0; @@ -2452,28 +3369,120 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } - case Intrinsic::nvvm_tex_1d_v4i32_i32: - case Intrinsic::nvvm_tex_1d_v4i32_f32: - case Intrinsic::nvvm_tex_1d_level_v4i32_f32: - case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_v4i32_i32: - case Intrinsic::nvvm_tex_1d_array_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: - case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: - case Intrinsic::nvvm_tex_2d_v4i32_i32: - case Intrinsic::nvvm_tex_2d_v4i32_f32: - case Intrinsic::nvvm_tex_2d_level_v4i32_f32: - case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_v4i32_i32: - case Intrinsic::nvvm_tex_2d_array_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: - case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: - case Intrinsic::nvvm_tex_3d_v4i32_i32: - case Intrinsic::nvvm_tex_3d_v4i32_f32: - case Intrinsic::nvvm_tex_3d_level_v4i32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: { + case Intrinsic::nvvm_tex_1d_v4s32_s32: + case Intrinsic::nvvm_tex_1d_v4s32_f32: + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_v4s32_s32: + case Intrinsic::nvvm_tex_2d_v4s32_f32: + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_3d_v4s32_s32: + case Intrinsic::nvvm_tex_3d_v4s32_f32: + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4s32_f32: + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4u32_f32: + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_v4u32_s32: + case Intrinsic::nvvm_tex_1d_v4u32_f32: + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_v4u32_s32: + case Intrinsic::nvvm_tex_2d_v4u32_f32: + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_3d_v4u32_s32: + case Intrinsic::nvvm_tex_3d_v4u32_f32: + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::i32; + Info.memVT = MVT::v4i32; Info.ptrVal = nullptr; Info.offset = 0; Info.vol = 0; @@ -2482,6 +3491,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i8_clamp: + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_i8_clamp: + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_3d_i8_clamp: + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + case Intrinsic::nvvm_suld_3d_v4i8_clamp: case Intrinsic::nvvm_suld_1d_i8_trap: case Intrinsic::nvvm_suld_1d_v2i8_trap: case Intrinsic::nvvm_suld_1d_v4i8_trap: @@ -2496,7 +3520,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i8_trap: case Intrinsic::nvvm_suld_3d_i8_trap: case Intrinsic::nvvm_suld_3d_v2i8_trap: - case Intrinsic::nvvm_suld_3d_v4i8_trap: { + case Intrinsic::nvvm_suld_3d_v4i8_trap: + case Intrinsic::nvvm_suld_1d_i8_zero: + case Intrinsic::nvvm_suld_1d_v2i8_zero: + case Intrinsic::nvvm_suld_1d_v4i8_zero: + case Intrinsic::nvvm_suld_1d_array_i8_zero: + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + case Intrinsic::nvvm_suld_2d_i8_zero: + case Intrinsic::nvvm_suld_2d_v2i8_zero: + case Intrinsic::nvvm_suld_2d_v4i8_zero: + case Intrinsic::nvvm_suld_2d_array_i8_zero: + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + case Intrinsic::nvvm_suld_3d_i8_zero: + case Intrinsic::nvvm_suld_3d_v2i8_zero: + case Intrinsic::nvvm_suld_3d_v4i8_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i8; Info.ptrVal = nullptr; @@ -2507,6 +3546,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i16_clamp: + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_i16_clamp: + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_3d_i16_clamp: + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + case Intrinsic::nvvm_suld_3d_v4i16_clamp: case Intrinsic::nvvm_suld_1d_i16_trap: case Intrinsic::nvvm_suld_1d_v2i16_trap: case Intrinsic::nvvm_suld_1d_v4i16_trap: @@ -2521,7 +3575,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i16_trap: case Intrinsic::nvvm_suld_3d_i16_trap: case Intrinsic::nvvm_suld_3d_v2i16_trap: - case Intrinsic::nvvm_suld_3d_v4i16_trap: { + case Intrinsic::nvvm_suld_3d_v4i16_trap: + case Intrinsic::nvvm_suld_1d_i16_zero: + case Intrinsic::nvvm_suld_1d_v2i16_zero: + case Intrinsic::nvvm_suld_1d_v4i16_zero: + case Intrinsic::nvvm_suld_1d_array_i16_zero: + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + case Intrinsic::nvvm_suld_2d_i16_zero: + case Intrinsic::nvvm_suld_2d_v2i16_zero: + case Intrinsic::nvvm_suld_2d_v4i16_zero: + case Intrinsic::nvvm_suld_2d_array_i16_zero: + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + case Intrinsic::nvvm_suld_3d_i16_zero: + case Intrinsic::nvvm_suld_3d_v2i16_zero: + case Intrinsic::nvvm_suld_3d_v4i16_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i16; Info.ptrVal = nullptr; @@ -2532,6 +3601,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } + case Intrinsic::nvvm_suld_1d_i32_clamp: + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_i32_clamp: + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_3d_i32_clamp: + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + case Intrinsic::nvvm_suld_3d_v4i32_clamp: case Intrinsic::nvvm_suld_1d_i32_trap: case Intrinsic::nvvm_suld_1d_v2i32_trap: case Intrinsic::nvvm_suld_1d_v4i32_trap: @@ -2546,7 +3630,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_suld_2d_array_v4i32_trap: case Intrinsic::nvvm_suld_3d_i32_trap: case Intrinsic::nvvm_suld_3d_v2i32_trap: - case Intrinsic::nvvm_suld_3d_v4i32_trap: { + case Intrinsic::nvvm_suld_3d_v4i32_trap: + case Intrinsic::nvvm_suld_1d_i32_zero: + case Intrinsic::nvvm_suld_1d_v2i32_zero: + case Intrinsic::nvvm_suld_1d_v4i32_zero: + case Intrinsic::nvvm_suld_1d_array_i32_zero: + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + case Intrinsic::nvvm_suld_2d_i32_zero: + case Intrinsic::nvvm_suld_2d_v2i32_zero: + case Intrinsic::nvvm_suld_2d_v4i32_zero: + case Intrinsic::nvvm_suld_2d_array_i32_zero: + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + case Intrinsic::nvvm_suld_3d_i32_zero: + case Intrinsic::nvvm_suld_3d_v2i32_zero: + case Intrinsic::nvvm_suld_3d_v4i32_zero: { Info.opc = getOpcForSurfaceInstr(Intrinsic); Info.memVT = MVT::i32; Info.ptrVal = nullptr; @@ -2557,7 +3656,46 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 16; return true; } - + case Intrinsic::nvvm_suld_1d_i64_clamp: + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_i64_clamp: + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_3d_i64_clamp: + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_i64_trap: + case Intrinsic::nvvm_suld_1d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_array_i64_trap: + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + case Intrinsic::nvvm_suld_2d_i64_trap: + case Intrinsic::nvvm_suld_2d_v2i64_trap: + case Intrinsic::nvvm_suld_2d_array_i64_trap: + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + case Intrinsic::nvvm_suld_3d_i64_trap: + case Intrinsic::nvvm_suld_3d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_i64_zero: + case Intrinsic::nvvm_suld_1d_v2i64_zero: + case Intrinsic::nvvm_suld_1d_array_i64_zero: + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + case Intrinsic::nvvm_suld_2d_i64_zero: + case Intrinsic::nvvm_suld_2d_v2i64_zero: + case Intrinsic::nvvm_suld_2d_array_i64_zero: + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + case Intrinsic::nvvm_suld_3d_i64_zero: + case Intrinsic::nvvm_suld_3d_v2i64_zero: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i64; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } } return false; } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 7b4026d8fba..d25fc7844d3 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -77,54 +77,244 @@ enum NodeType { StoreRetvalV4, // Texture intrinsics - Tex1DFloatI32, + Tex1DFloatS32, Tex1DFloatFloat, Tex1DFloatFloatLevel, Tex1DFloatFloatGrad, - Tex1DI32I32, - Tex1DI32Float, - Tex1DI32FloatLevel, - Tex1DI32FloatGrad, - Tex1DArrayFloatI32, + Tex1DS32S32, + Tex1DS32Float, + Tex1DS32FloatLevel, + Tex1DS32FloatGrad, + Tex1DU32S32, + Tex1DU32Float, + Tex1DU32FloatLevel, + Tex1DU32FloatGrad, + Tex1DArrayFloatS32, Tex1DArrayFloatFloat, Tex1DArrayFloatFloatLevel, Tex1DArrayFloatFloatGrad, - Tex1DArrayI32I32, - Tex1DArrayI32Float, - Tex1DArrayI32FloatLevel, - Tex1DArrayI32FloatGrad, - Tex2DFloatI32, + Tex1DArrayS32S32, + Tex1DArrayS32Float, + Tex1DArrayS32FloatLevel, + Tex1DArrayS32FloatGrad, + Tex1DArrayU32S32, + Tex1DArrayU32Float, + Tex1DArrayU32FloatLevel, + Tex1DArrayU32FloatGrad, + Tex2DFloatS32, Tex2DFloatFloat, Tex2DFloatFloatLevel, Tex2DFloatFloatGrad, - Tex2DI32I32, - Tex2DI32Float, - Tex2DI32FloatLevel, - Tex2DI32FloatGrad, - Tex2DArrayFloatI32, + Tex2DS32S32, + Tex2DS32Float, + Tex2DS32FloatLevel, + Tex2DS32FloatGrad, + Tex2DU32S32, + Tex2DU32Float, + Tex2DU32FloatLevel, + Tex2DU32FloatGrad, + Tex2DArrayFloatS32, Tex2DArrayFloatFloat, Tex2DArrayFloatFloatLevel, Tex2DArrayFloatFloatGrad, - Tex2DArrayI32I32, - Tex2DArrayI32Float, - Tex2DArrayI32FloatLevel, - Tex2DArrayI32FloatGrad, - Tex3DFloatI32, + Tex2DArrayS32S32, + Tex2DArrayS32Float, + Tex2DArrayS32FloatLevel, + Tex2DArrayS32FloatGrad, + Tex2DArrayU32S32, + Tex2DArrayU32Float, + Tex2DArrayU32FloatLevel, + Tex2DArrayU32FloatGrad, + Tex3DFloatS32, Tex3DFloatFloat, Tex3DFloatFloatLevel, Tex3DFloatFloatGrad, - Tex3DI32I32, - Tex3DI32Float, - Tex3DI32FloatLevel, - Tex3DI32FloatGrad, + Tex3DS32S32, + Tex3DS32Float, + Tex3DS32FloatLevel, + Tex3DS32FloatGrad, + Tex3DU32S32, + Tex3DU32Float, + Tex3DU32FloatLevel, + Tex3DU32FloatGrad, + TexCubeFloatFloat, + TexCubeFloatFloatLevel, + TexCubeS32Float, + TexCubeS32FloatLevel, + TexCubeU32Float, + TexCubeU32FloatLevel, + TexCubeArrayFloatFloat, + TexCubeArrayFloatFloatLevel, + TexCubeArrayS32Float, + TexCubeArrayS32FloatLevel, + TexCubeArrayU32Float, + TexCubeArrayU32FloatLevel, + Tld4R2DFloatFloat, + Tld4G2DFloatFloat, + Tld4B2DFloatFloat, + Tld4A2DFloatFloat, + Tld4R2DS64Float, + Tld4G2DS64Float, + Tld4B2DS64Float, + Tld4A2DS64Float, + Tld4R2DU64Float, + Tld4G2DU64Float, + Tld4B2DU64Float, + Tld4A2DU64Float, + TexUnified1DFloatS32, + TexUnified1DFloatFloat, + TexUnified1DFloatFloatLevel, + TexUnified1DFloatFloatGrad, + TexUnified1DS32S32, + TexUnified1DS32Float, + TexUnified1DS32FloatLevel, + TexUnified1DS32FloatGrad, + TexUnified1DU32S32, + TexUnified1DU32Float, + TexUnified1DU32FloatLevel, + TexUnified1DU32FloatGrad, + TexUnified1DArrayFloatS32, + TexUnified1DArrayFloatFloat, + TexUnified1DArrayFloatFloatLevel, + TexUnified1DArrayFloatFloatGrad, + TexUnified1DArrayS32S32, + TexUnified1DArrayS32Float, + TexUnified1DArrayS32FloatLevel, + TexUnified1DArrayS32FloatGrad, + TexUnified1DArrayU32S32, + TexUnified1DArrayU32Float, + TexUnified1DArrayU32FloatLevel, + TexUnified1DArrayU32FloatGrad, + TexUnified2DFloatS32, + TexUnified2DFloatFloat, + TexUnified2DFloatFloatLevel, + TexUnified2DFloatFloatGrad, + TexUnified2DS32S32, + TexUnified2DS32Float, + TexUnified2DS32FloatLevel, + TexUnified2DS32FloatGrad, + TexUnified2DU32S32, + TexUnified2DU32Float, + TexUnified2DU32FloatLevel, + TexUnified2DU32FloatGrad, + TexUnified2DArrayFloatS32, + TexUnified2DArrayFloatFloat, + TexUnified2DArrayFloatFloatLevel, + TexUnified2DArrayFloatFloatGrad, + TexUnified2DArrayS32S32, + TexUnified2DArrayS32Float, + TexUnified2DArrayS32FloatLevel, + TexUnified2DArrayS32FloatGrad, + TexUnified2DArrayU32S32, + TexUnified2DArrayU32Float, + TexUnified2DArrayU32FloatLevel, + TexUnified2DArrayU32FloatGrad, + TexUnified3DFloatS32, + TexUnified3DFloatFloat, + TexUnified3DFloatFloatLevel, + TexUnified3DFloatFloatGrad, + TexUnified3DS32S32, + TexUnified3DS32Float, + TexUnified3DS32FloatLevel, + TexUnified3DS32FloatGrad, + TexUnified3DU32S32, + TexUnified3DU32Float, + TexUnified3DU32FloatLevel, + TexUnified3DU32FloatGrad, + TexUnifiedCubeFloatFloat, + TexUnifiedCubeFloatFloatLevel, + TexUnifiedCubeS32Float, + TexUnifiedCubeS32FloatLevel, + TexUnifiedCubeU32Float, + TexUnifiedCubeU32FloatLevel, + TexUnifiedCubeArrayFloatFloat, + TexUnifiedCubeArrayFloatFloatLevel, + TexUnifiedCubeArrayS32Float, + TexUnifiedCubeArrayS32FloatLevel, + TexUnifiedCubeArrayU32Float, + TexUnifiedCubeArrayU32FloatLevel, + Tld4UnifiedR2DFloatFloat, + Tld4UnifiedG2DFloatFloat, + Tld4UnifiedB2DFloatFloat, + Tld4UnifiedA2DFloatFloat, + Tld4UnifiedR2DS64Float, + Tld4UnifiedG2DS64Float, + Tld4UnifiedB2DS64Float, + Tld4UnifiedA2DS64Float, + Tld4UnifiedR2DU64Float, + Tld4UnifiedG2DU64Float, + Tld4UnifiedB2DU64Float, + Tld4UnifiedA2DU64Float, // Surface intrinsics + Suld1DI8Clamp, + Suld1DI16Clamp, + Suld1DI32Clamp, + Suld1DI64Clamp, + Suld1DV2I8Clamp, + Suld1DV2I16Clamp, + Suld1DV2I32Clamp, + Suld1DV2I64Clamp, + Suld1DV4I8Clamp, + Suld1DV4I16Clamp, + Suld1DV4I32Clamp, + + Suld1DArrayI8Clamp, + Suld1DArrayI16Clamp, + Suld1DArrayI32Clamp, + Suld1DArrayI64Clamp, + Suld1DArrayV2I8Clamp, + Suld1DArrayV2I16Clamp, + Suld1DArrayV2I32Clamp, + Suld1DArrayV2I64Clamp, + Suld1DArrayV4I8Clamp, + Suld1DArrayV4I16Clamp, + Suld1DArrayV4I32Clamp, + + Suld2DI8Clamp, + Suld2DI16Clamp, + Suld2DI32Clamp, + Suld2DI64Clamp, + Suld2DV2I8Clamp, + Suld2DV2I16Clamp, + Suld2DV2I32Clamp, + Suld2DV2I64Clamp, + Suld2DV4I8Clamp, + Suld2DV4I16Clamp, + Suld2DV4I32Clamp, + + Suld2DArrayI8Clamp, + Suld2DArrayI16Clamp, + Suld2DArrayI32Clamp, + Suld2DArrayI64Clamp, + Suld2DArrayV2I8Clamp, + Suld2DArrayV2I16Clamp, + Suld2DArrayV2I32Clamp, + Suld2DArrayV2I64Clamp, + Suld2DArrayV4I8Clamp, + Suld2DArrayV4I16Clamp, + Suld2DArrayV4I32Clamp, + + Suld3DI8Clamp, + Suld3DI16Clamp, + Suld3DI32Clamp, + Suld3DI64Clamp, + Suld3DV2I8Clamp, + Suld3DV2I16Clamp, + Suld3DV2I32Clamp, + Suld3DV2I64Clamp, + Suld3DV4I8Clamp, + Suld3DV4I16Clamp, + Suld3DV4I32Clamp, + Suld1DI8Trap, Suld1DI16Trap, Suld1DI32Trap, + Suld1DI64Trap, Suld1DV2I8Trap, Suld1DV2I16Trap, Suld1DV2I32Trap, + Suld1DV2I64Trap, Suld1DV4I8Trap, Suld1DV4I16Trap, Suld1DV4I32Trap, @@ -132,9 +322,11 @@ enum NodeType { Suld1DArrayI8Trap, Suld1DArrayI16Trap, Suld1DArrayI32Trap, + Suld1DArrayI64Trap, Suld1DArrayV2I8Trap, Suld1DArrayV2I16Trap, Suld1DArrayV2I32Trap, + Suld1DArrayV2I64Trap, Suld1DArrayV4I8Trap, Suld1DArrayV4I16Trap, Suld1DArrayV4I32Trap, @@ -142,9 +334,11 @@ enum NodeType { Suld2DI8Trap, Suld2DI16Trap, Suld2DI32Trap, + Suld2DI64Trap, Suld2DV2I8Trap, Suld2DV2I16Trap, Suld2DV2I32Trap, + Suld2DV2I64Trap, Suld2DV4I8Trap, Suld2DV4I16Trap, Suld2DV4I32Trap, @@ -152,9 +346,11 @@ enum NodeType { Suld2DArrayI8Trap, Suld2DArrayI16Trap, Suld2DArrayI32Trap, + Suld2DArrayI64Trap, Suld2DArrayV2I8Trap, Suld2DArrayV2I16Trap, Suld2DArrayV2I32Trap, + Suld2DArrayV2I64Trap, Suld2DArrayV4I8Trap, Suld2DArrayV4I16Trap, Suld2DArrayV4I32Trap, @@ -162,12 +358,74 @@ enum NodeType { Suld3DI8Trap, Suld3DI16Trap, Suld3DI32Trap, + Suld3DI64Trap, Suld3DV2I8Trap, Suld3DV2I16Trap, Suld3DV2I32Trap, + Suld3DV2I64Trap, Suld3DV4I8Trap, Suld3DV4I16Trap, - Suld3DV4I32Trap + Suld3DV4I32Trap, + + Suld1DI8Zero, + Suld1DI16Zero, + Suld1DI32Zero, + Suld1DI64Zero, + Suld1DV2I8Zero, + Suld1DV2I16Zero, + Suld1DV2I32Zero, + Suld1DV2I64Zero, + Suld1DV4I8Zero, + Suld1DV4I16Zero, + Suld1DV4I32Zero, + + Suld1DArrayI8Zero, + Suld1DArrayI16Zero, + Suld1DArrayI32Zero, + Suld1DArrayI64Zero, + Suld1DArrayV2I8Zero, + Suld1DArrayV2I16Zero, + Suld1DArrayV2I32Zero, + Suld1DArrayV2I64Zero, + Suld1DArrayV4I8Zero, + Suld1DArrayV4I16Zero, + Suld1DArrayV4I32Zero, + + Suld2DI8Zero, + Suld2DI16Zero, + Suld2DI32Zero, + Suld2DI64Zero, + Suld2DV2I8Zero, + Suld2DV2I16Zero, + Suld2DV2I32Zero, + Suld2DV2I64Zero, + Suld2DV4I8Zero, + Suld2DV4I16Zero, + Suld2DV4I32Zero, + + Suld2DArrayI8Zero, + Suld2DArrayI16Zero, + Suld2DArrayI32Zero, + Suld2DArrayI64Zero, + Suld2DArrayV2I8Zero, + Suld2DArrayV2I16Zero, + Suld2DArrayV2I32Zero, + Suld2DArrayV2I64Zero, + Suld2DArrayV4I8Zero, + Suld2DArrayV4I16Zero, + Suld2DArrayV4I32Zero, + + Suld3DI8Zero, + Suld3DI16Zero, + Suld3DI32Zero, + Suld3DI64Zero, + Suld3DV2I8Zero, + Suld3DV2I16Zero, + Suld3DV2I32Zero, + Suld3DV2I64Zero, + Suld3DV4I8Zero, + Suld3DV4I16Zero, + Suld3DV4I32Zero }; } diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td index f11f1b8f96f..ffcb5d5273a 100644 --- a/lib/Target/NVPTX/NVPTXInstrFormats.td +++ b/lib/Target/NVPTX/NVPTXInstrFormats.td @@ -36,8 +36,24 @@ class NVPTXInst pattern> bit IsLoad = 0; bit IsStore = 0; - let TSFlags{3-0} = VecInstType; - let TSFlags{4-4} = IsSimpleMove; - let TSFlags{5-5} = IsLoad; - let TSFlags{6-6} = IsStore; + bit IsTex = 0; + bit IsSust = 0; + bit IsSurfTexQuery = 0; + bit IsTexModeUnified = 0; + + // The following field is encoded as log2 of the vector size minus one, + // with 0 meaning the operation is not a surface instruction. For example, + // if IsSuld == 2, then the instruction is a suld instruction with vector size + // 2**(2-1) = 2. + bits<2> IsSuld = 0; + + let TSFlags{3-0} = VecInstType; + let TSFlags{4-4} = IsSimpleMove; + let TSFlags{5-5} = IsLoad; + let TSFlags{6-6} = IsStore; + let TSFlags{7} = IsTex; + let TSFlags{9-8} = IsSuld; + let TSFlags{10} = IsSust; + let TSFlags{11} = IsSurfTexQuery; + let TSFlags{12} = IsTexModeUnified; } diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 06873b7d1d8..2c1e99c4249 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1936,9 +1936,10 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be // also defined in NVPTXReplaceImageHandles.cpp - +// texmode_independent +let IsTex = 1, IsTexModeUnified = 0 in { // Texture fetch instructions using handles -def TEX_1D_F32_I32 +def TEX_1D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), @@ -1965,19 +1966,19 @@ def TEX_1D_F32_F32_GRAD "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", []>; -def TEX_1D_I32_I32 +def TEX_1D_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", []>; -def TEX_1D_I32_F32 +def TEX_1D_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", []>; -def TEX_1D_I32_F32_LEVEL +def TEX_1D_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, @@ -1985,7 +1986,7 @@ def TEX_1D_I32_F32_LEVEL "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x\\}], $lod;", []>; -def TEX_1D_I32_F32_GRAD +def TEX_1D_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, @@ -1993,8 +1994,36 @@ def TEX_1D_I32_F32_GRAD "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", []>; +def TEX_1D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), + "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), + "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], $lod;", + []>; +def TEX_1D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; -def TEX_1D_ARRAY_F32_I32 +def TEX_1D_ARRAY_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), @@ -2024,21 +2053,21 @@ def TEX_1D_ARRAY_F32_F32_GRAD "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", []>; -def TEX_1D_ARRAY_I32_I32 +def TEX_1D_ARRAY_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}];", []>; -def TEX_1D_ARRAY_I32_F32 +def TEX_1D_ARRAY_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}];", []>; -def TEX_1D_ARRAY_I32_F32_LEVEL +def TEX_1D_ARRAY_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2046,7 +2075,7 @@ def TEX_1D_ARRAY_I32_F32_LEVEL "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}], $lod;", []>; -def TEX_1D_ARRAY_I32_F32_GRAD +def TEX_1D_ARRAY_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2054,8 +2083,38 @@ def TEX_1D_ARRAY_I32_F32_GRAD "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", []>; +def TEX_1D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], $lod;", + []>; +def TEX_1D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; -def TEX_2D_F32_I32 +def TEX_2D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), @@ -2087,21 +2146,21 @@ def TEX_2D_F32_F32_GRAD "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; -def TEX_2D_I32_I32 +def TEX_2D_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y\\}];", []>; -def TEX_2D_I32_F32 +def TEX_2D_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y\\}];", []>; -def TEX_2D_I32_F32_LEVEL +def TEX_2D_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2109,7 +2168,7 @@ def TEX_2D_I32_F32_LEVEL "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y\\}], $lod;", []>; -def TEX_2D_I32_F32_GRAD +def TEX_2D_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2119,8 +2178,40 @@ def TEX_2D_I32_F32_GRAD "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; +def TEX_2D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], $lod;", + []>; +def TEX_2D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; -def TEX_2D_ARRAY_F32_I32 +def TEX_2D_ARRAY_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, @@ -2154,7 +2245,7 @@ def TEX_2D_ARRAY_F32_F32_GRAD "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; -def TEX_2D_ARRAY_I32_I32 +def TEX_2D_ARRAY_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, @@ -2162,7 +2253,7 @@ def TEX_2D_ARRAY_I32_I32 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x, $y, $y\\}];", []>; -def TEX_2D_ARRAY_I32_F32 +def TEX_2D_ARRAY_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2170,7 +2261,7 @@ def TEX_2D_ARRAY_I32_F32 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x, $y, $y\\}];", []>; -def TEX_2D_ARRAY_I32_F32_LEVEL +def TEX_2D_ARRAY_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2178,7 +2269,7 @@ def TEX_2D_ARRAY_I32_F32_LEVEL "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", []>; -def TEX_2D_ARRAY_I32_F32_GRAD +def TEX_2D_ARRAY_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, @@ -2189,8 +2280,43 @@ def TEX_2D_ARRAY_I32_F32_GRAD "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " "\\{$grady0, $grady1\\};", []>; +def TEX_2D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_2D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; -def TEX_3D_F32_I32 +def TEX_3D_F32_S32 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, Float32Regs:$b, Float32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, @@ -2227,7 +2353,7 @@ def TEX_3D_F32_F32_GRAD "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " "\\{$grady0, $grady1, $grady2, $grady2\\};", []>; -def TEX_3D_I32_I32 +def TEX_3D_S32_S32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, @@ -2235,7 +2361,7 @@ def TEX_3D_I32_I32 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def TEX_3D_I32_F32 +def TEX_3D_S32_F32 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2243,7 +2369,7 @@ def TEX_3D_I32_F32 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def TEX_3D_I32_F32_LEVEL +def TEX_3D_S32_F32_LEVEL : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2251,7 +2377,7 @@ def TEX_3D_I32_F32_LEVEL "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", []>; -def TEX_3D_I32_F32_GRAD +def TEX_3D_S32_F32_GRAD : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, @@ -2264,653 +2390,3006 @@ def TEX_3D_I32_F32_GRAD "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " "\\{$grady0, $grady1, $grady2, $grady2\\};", []>; - - -// Surface load instructions -def SULD_1D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", +def TEX_3D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_1D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", +def TEX_3D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_1D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", +def TEX_3D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", []>; -def SULD_1D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", +def TEX_3D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", []>; -def SULD_1D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + +def TEX_CUBE_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_1D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", +def TEX_CUBE_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", []>; -def SULD_1D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", +def TEX_CUBE_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", []>; -def SULD_1D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", +def TEX_CUBE_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", []>; -def SULD_1D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", +def TEX_CUBE_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_CUBE_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", []>; -def SULD_1D_ARRAY_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", +def TEX_CUBE_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}];", []>; -def SULD_1D_ARRAY_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", +def TEX_CUBE_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", []>; -def SULD_1D_ARRAY_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", +def TEX_CUBE_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}];", []>; -def SULD_1D_ARRAY_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", +def TEX_CUBE_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", []>; -def SULD_1D_ARRAY_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", +def TEX_CUBE_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}];", []>; -def SULD_1D_ARRAY_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", +def TEX_CUBE_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", []>; -def SULD_1D_ARRAY_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", + +def TLD4_R_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", []>; -def SULD_1D_ARRAY_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", +def TLD4_G_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", []>; -def SULD_1D_ARRAY_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", +def TLD4_B_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", []>; - -def SULD_2D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", +def TLD4_A_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", []>; -def SULD_2D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", +def TLD4_R_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", []>; -def SULD_2D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), +def TLD4_G_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_B_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_A_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_R_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_G_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_B_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TLD4_A_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +} + + +// texmode_unified +let IsTex = 1, IsTexModeUnified = 1 in { +// Texture fetch instructions using handles +def TEX_UNIFIED_1D_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x), + "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x), + "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), + "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x), + "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x), + "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x), + "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x), + "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; +def TEX_UNIFIED_1D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_UNIFIED_1D_ARRAY_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}];", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], $lod;", + []>; +def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_UNIFIED_2D_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TEX_UNIFIED_2D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_UNIFIED_2D_ARRAY_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_UNIFIED_3D_F32_S32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_3D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +def TEX_UNIFIED_3D_S32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_3D_S32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +def TEX_UNIFIED_3D_U32_S32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_3D_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_3D_U32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; + +def TEX_UNIFIED_CUBE_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; + +def TEX_UNIFIED_CUBE_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_ARRAY_S32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; +def TEX_UNIFIED_CUBE_ARRAY_U32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), + "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}];", + []>; +def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int32Regs:$l, + Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, + Float32Regs:$lod), + "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; + +def TLD4_UNIFIED_R_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_G_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_B_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_A_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, + Float32Regs:$v2, Float32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_R_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_G_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_B_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_A_2D_S32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_R_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_G_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_B_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +def TLD4_UNIFIED_A_2D_U32_F32 + : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, + Int32Regs:$v2, Int32Regs:$v3), + (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), + "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, " + "[$t, \\{$x, $y\\}];", + []>; +} + + + +//=== Surface load instructions +// .clamp variant +let IsSuld = 1 in { +def SULD_1D_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 2 in { +def SULD_1D_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_V2I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I64_CLAMP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 3 in { +def SULD_1D_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + + +def SULD_3D_V4I8_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I16_CLAMP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I32_CLAMP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +} + + +// .trap variant +let IsSuld = 1 in { +def SULD_1D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I64_TRAP + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 2 in { +def SULD_1D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I64_TRAP + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 3 in { +def SULD_1D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + + +def SULD_3D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +// .zero variant +let IsSuld = 1 in { +def SULD_1D_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I16_ZERO + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I32_ZERO + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I64_ZERO + : NVPTXInst<(outs Int64Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 2 in { +def SULD_1D_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_V2I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I64_ZERO + : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +let IsSuld = 3 in { +def SULD_1D_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + + +def SULD_3D_V4I8_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I16_ZERO + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I32_ZERO + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +} + +//----------------------------------- +// Texture Query Intrinsics +//----------------------------------- +def TXQ_CHANNEL_ORDER + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.channel_order.b32 \t$d, [$a];", + []>; +def TXQ_CHANNEL_DATA_TYPE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.channel_data_type.b32 \t$d, [$a];", + []>; +def TXQ_WIDTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.width.b32 \t$d, [$a];", + []>; +def TXQ_HEIGHT + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.height.b32 \t$d, [$a];", + []>; +def TXQ_DEPTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.depth.b32 \t$d, [$a];", + []>; +def TXQ_ARRAY_SIZE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.array_size.b32 \t$d, [$a];", + []>; +def TXQ_NUM_SAMPLES + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.num_samples.b32 \t$d, [$a];", + []>; +def TXQ_NUM_MIPMAP_LEVELS + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.num_mipmap_levels.b32 \t$d, [$a];", + []>; + +def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), + (TXQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), + (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_width Int64Regs:$a), + (TXQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_height Int64Regs:$a), + (TXQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_depth Int64Regs:$a), + (TXQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), + (TXQ_ARRAY_SIZE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), + (TXQ_NUM_SAMPLES Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), + (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; + + +//----------------------------------- +// Surface Query Intrinsics +//----------------------------------- +def SUQ_CHANNEL_ORDER + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.channel_order.b32 \t$d, [$a];", + []>; +def SUQ_CHANNEL_DATA_TYPE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.channel_data_type.b32 \t$d, [$a];", + []>; +def SUQ_WIDTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.width.b32 \t$d, [$a];", + []>; +def SUQ_HEIGHT + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.height.b32 \t$d, [$a];", + []>; +def SUQ_DEPTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.depth.b32 \t$d, [$a];", + []>; +def SUQ_ARRAY_SIZE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.array_size.b32 \t$d, [$a];", + []>; + +def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), + (SUQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), + (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_width Int64Regs:$a), + (SUQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_height Int64Regs:$a), + (SUQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_depth Int64Regs:$a), + (SUQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), + (SUQ_ARRAY_SIZE Int64Regs:$a)>; + + +//===- Handle Query -------------------------------------------------------===// + +// TODO: These intrinsics are not yet finalized, pending PTX ISA design work +def ISTYPEP_SAMPLER + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.samplerref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; +def ISTYPEP_SURFACE + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.surfref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; +def ISTYPEP_TEXTURE + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.texref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; + +//===- Surface Stores -----------------------------------------------------===// + +let IsSust = 1 in { +// Unformatted +// .clamp variant +def SUST_B_1D_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_1D_ARRAY_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), + "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, + Int64Regs:$g), + "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_ARRAY_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r), + "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g), + "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_3D_B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_V2B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B64_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V4B8_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SULD_2D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", +def SUST_B_3D_V4B16_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SULD_2D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", +def SUST_B_3D_V4B32_CLAMP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SULD_2D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + + +// .trap variant +def SUST_B_1D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", []>; -def SULD_2D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", +def SUST_B_1D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", []>; -def SULD_2D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", +def SUST_B_1D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", []>; -def SULD_2D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", +def SUST_B_1D_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", []>; -def SULD_2D_ARRAY_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + +def SUST_B_1D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; -def SULD_2D_ARRAY_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; -def SULD_2D_ARRAY_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; -def SULD_2D_ARRAY_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), + "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; -def SULD_2D_ARRAY_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; -def SULD_2D_ARRAY_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; -def SULD_2D_ARRAY_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; -def SULD_2D_ARRAY_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, + Int64Regs:$g), + "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; -def SULD_2D_ARRAY_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", +def SUST_B_1D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SULD_3D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + +def SUST_B_2D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", []>; -def SULD_3D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r), + "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", []>; -def SULD_3D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", []>; -def SULD_3D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", []>; -def SULD_3D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", []>; -def SULD_3D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g), + "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", []>; -def SULD_3D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SULD_3D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SULD_3D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", +def SUST_B_2D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", []>; -//----------------------------------- -// Texture Query Intrinsics -//----------------------------------- -def TXQ_CHANNEL_ORDER - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.channel_order.b32 \t$d, [$a];", - []>; -def TXQ_CHANNEL_DATA_TYPE - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.channel_data_type.b32 \t$d, [$a];", - []>; -def TXQ_WIDTH - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.width.b32 \t$d, [$a];", - []>; -def TXQ_HEIGHT - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.height.b32 \t$d, [$a];", +def SUST_B_3D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; -def TXQ_DEPTH - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.depth.b32 \t$d, [$a];", +def SUST_B_3D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; -def TXQ_ARRAY_SIZE - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.array_size.b32 \t$d, [$a];", +def SUST_B_3D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; -def TXQ_NUM_SAMPLES - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.num_samples.b32 \t$d, [$a];", +def SUST_B_3D_B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; -def TXQ_NUM_MIPMAP_LEVELS - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "txq.num_mipmap_levels.b32 \t$d, [$a];", +def SUST_B_3D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", []>; - -def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), - (TXQ_CHANNEL_ORDER Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), - (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_width Int64Regs:$a), - (TXQ_WIDTH Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_height Int64Regs:$a), - (TXQ_HEIGHT Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_depth Int64Regs:$a), - (TXQ_DEPTH Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), - (TXQ_ARRAY_SIZE Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), - (TXQ_NUM_SAMPLES Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), - (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; - - -//----------------------------------- -// Surface Query Intrinsics -//----------------------------------- -def SUQ_CHANNEL_ORDER - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "suq.channel_order.b32 \t$d, [$a];", +def SUST_B_3D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", []>; -def SUQ_CHANNEL_DATA_TYPE - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "suq.channel_data_type.b32 \t$d, [$a];", +def SUST_B_3D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", []>; -def SUQ_WIDTH - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "suq.width.b32 \t$d, [$a];", +def SUST_B_3D_V2B64_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", []>; -def SUQ_HEIGHT - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "suq.height.b32 \t$d, [$a];", +def SUST_B_3D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SUQ_DEPTH - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "suq.depth.b32 \t$d, [$a];", +def SUST_B_3D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def SUQ_ARRAY_SIZE - : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "suq.array_size.b32 \t$d, [$a];", +def SUST_B_3D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", []>; -def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), - (SUQ_CHANNEL_ORDER Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), - (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_width Int64Regs:$a), - (SUQ_WIDTH Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_height Int64Regs:$a), - (SUQ_HEIGHT Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_depth Int64Regs:$a), - (SUQ_DEPTH Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), - (SUQ_ARRAY_SIZE Int64Regs:$a)>; - - -//===- Handle Query -------------------------------------------------------===// - -// TODO: These intrinsics are not yet finalized, pending PTX ISA design work -def ISTYPEP_SAMPLER - : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), - "istypep.samplerref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; -def ISTYPEP_SURFACE - : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), - "istypep.surfref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; -def ISTYPEP_TEXTURE - : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), - "istypep.texref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; - -//===- Surface Stores -----------------------------------------------------===// - -// Unformatted -def SUST_B_1D_B8_TRAP +// .zero variant +def SUST_B_1D_B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", + "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", []>; -def SUST_B_1D_B16_TRAP +def SUST_B_1D_B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", + "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", []>; -def SUST_B_1D_B32_TRAP +def SUST_B_1D_B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", + "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", []>; -def SUST_B_1D_V2B8_TRAP +def SUST_B_1D_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_V2B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", []>; -def SUST_B_1D_V2B16_TRAP +def SUST_B_1D_V2B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", []>; -def SUST_B_1D_V2B32_TRAP +def SUST_B_1D_V2B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", []>; -def SUST_B_1D_V4B8_TRAP +def SUST_B_1D_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V4B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", []>; -def SUST_B_1D_V4B16_TRAP +def SUST_B_1D_V4B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", []>; -def SUST_B_1D_V4B32_TRAP +def SUST_B_1D_V4B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", []>; -def SUST_B_1D_ARRAY_B8_TRAP +def SUST_B_1D_ARRAY_B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; -def SUST_B_1D_ARRAY_B16_TRAP +def SUST_B_1D_ARRAY_B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; -def SUST_B_1D_ARRAY_B32_TRAP +def SUST_B_1D_ARRAY_B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", []>; -def SUST_B_1D_ARRAY_V2B8_TRAP +def SUST_B_1D_ARRAY_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), + "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_V2B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; -def SUST_B_1D_ARRAY_V2B16_TRAP +def SUST_B_1D_ARRAY_V2B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; -def SUST_B_1D_ARRAY_V2B32_TRAP +def SUST_B_1D_ARRAY_V2B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", []>; -def SUST_B_1D_ARRAY_V4B8_TRAP +def SUST_B_1D_ARRAY_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, + Int64Regs:$g), + "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V4B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " + "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_1D_ARRAY_V4B16_TRAP +def SUST_B_1D_ARRAY_V4B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " + "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_1D_ARRAY_V4B32_TRAP +def SUST_B_1D_ARRAY_V4B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " + "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_2D_B8_TRAP +def SUST_B_2D_B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", []>; -def SUST_B_2D_B16_TRAP +def SUST_B_2D_B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", []>; -def SUST_B_2D_B32_TRAP +def SUST_B_2D_B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", []>; -def SUST_B_2D_V2B8_TRAP +def SUST_B_2D_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_V2B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", []>; -def SUST_B_2D_V2B16_TRAP +def SUST_B_2D_V2B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", []>; -def SUST_B_2D_V2B32_TRAP +def SUST_B_2D_V2B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", []>; -def SUST_B_2D_V4B8_TRAP +def SUST_B_2D_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V4B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " + "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_2D_V4B16_TRAP +def SUST_B_2D_V4B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " + "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_2D_V4B32_TRAP +def SUST_B_2D_V4B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " + "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_2D_ARRAY_B8_TRAP +def SUST_B_2D_ARRAY_B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", []>; -def SUST_B_2D_ARRAY_B16_TRAP +def SUST_B_2D_ARRAY_B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", []>; -def SUST_B_2D_ARRAY_B32_TRAP +def SUST_B_2D_ARRAY_B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", []>; -def SUST_B_2D_ARRAY_V2B8_TRAP +def SUST_B_2D_ARRAY_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r), + "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_V2B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " "\\{$r, $g\\};", []>; -def SUST_B_2D_ARRAY_V2B16_TRAP +def SUST_B_2D_ARRAY_V2B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " "\\{$r, $g\\};", []>; -def SUST_B_2D_ARRAY_V2B32_TRAP +def SUST_B_2D_ARRAY_V2B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " "\\{$r, $g\\};", []>; -def SUST_B_2D_ARRAY_V4B8_TRAP +def SUST_B_2D_ARRAY_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g), + "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V4B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_2D_ARRAY_V4B16_TRAP +def SUST_B_2D_ARRAY_V4B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_2D_ARRAY_V4B32_TRAP +def SUST_B_2D_ARRAY_V4B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_3D_B8_TRAP +def SUST_B_3D_B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; -def SUST_B_3D_B16_TRAP +def SUST_B_3D_B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; -def SUST_B_3D_B32_TRAP +def SUST_B_3D_B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", []>; -def SUST_B_3D_V2B8_TRAP +def SUST_B_3D_B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_V2B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g\\};", []>; -def SUST_B_3D_V2B16_TRAP +def SUST_B_3D_V2B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g\\};", []>; -def SUST_B_3D_V2B32_TRAP +def SUST_B_3D_V2B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g\\};", []>; -def SUST_B_3D_V4B8_TRAP +def SUST_B_3D_V2B64_ZERO + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V4B8_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_3D_V4B16_TRAP +def SUST_B_3D_V4B16_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g, $b, $a\\};", []>; -def SUST_B_3D_V4B32_TRAP +def SUST_B_3D_V4B32_ZERO : NVPTXInst<(outs), (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g, $b, $a\\};", []>; + + // Formatted def SUST_P_1D_B8_TRAP @@ -3197,12 +5676,341 @@ def SUST_P_3D_V4B32_TRAP "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " "\\{$r, $g, $b, $a\\};", []>; +} + +// Surface store instruction patterns +// I'm not sure why we can't just include these in the instruction definitions, +// but TableGen complains of type errors :( + +// .clamp variant +def : Pat<(int_nvvm_sust_b_1d_i8_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i16_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i64_clamp + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), + (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_3d_i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B8_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B16_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; +def : Pat<(int_nvvm_sust_b_3d_i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_B_3D_B32_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + (SUST_B_3D_B64_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; -// Surface store instruction patterns -// I'm not sure why we can't just include these in the instruction definitions, -// but TableGen complains of type errors :( +// .trap variant def : Pat<(int_nvvm_sust_b_1d_i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; @@ -3215,6 +6023,10 @@ def : Pat<(int_nvvm_sust_b_1d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_1d_i64_trap + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_1d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, @@ -3230,6 +6042,11 @@ def : Pat<(int_nvvm_sust_b_1d_v2i32_trap (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_1d_v2i64_trap + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_1d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3265,6 +6082,11 @@ def : Pat<(int_nvvm_sust_b_1d_array_i32_trap (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_1d_array_i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, @@ -3280,6 +6102,11 @@ def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3315,6 +6142,11 @@ def : Pat<(int_nvvm_sust_b_2d_i32_trap (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_2d_i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_2d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, @@ -3330,6 +6162,11 @@ def : Pat<(int_nvvm_sust_b_2d_v2i32_trap (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_2d_v2i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), + (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_2d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3368,6 +6205,12 @@ def : Pat<(int_nvvm_sust_b_2d_array_i32_trap Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_2d_array_i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), @@ -3388,6 +6231,12 @@ def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3432,6 +6281,13 @@ def : Pat<(int_nvvm_sust_b_3d_i32_trap Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; +def : Pat<(int_nvvm_sust_b_3d_i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + (SUST_B_3D_B64_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r)>; + def : Pat<(int_nvvm_sust_b_3d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), @@ -3453,6 +6309,13 @@ def : Pat<(int_nvvm_sust_b_3d_v2i32_trap Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; +def : Pat<(int_nvvm_sust_b_3d_v2i64_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + (SUST_B_3D_V2B64_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g)>; + def : Pat<(int_nvvm_sust_b_3d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), @@ -3475,6 +6338,334 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; +// .zero variant +def : Pat<(int_nvvm_sust_b_1d_i8_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i16_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i64_zero + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i8_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i16_zero + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i64_zero + Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i8_zero + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i16_zero + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i32_zero + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_1d_array_i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), + (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), + (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), + (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_array_i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), + (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, + Int64Regs:$g), + (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_3d_i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B8_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B16_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_B_3D_B32_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r), + (SUST_B_3D_B64_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B8_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B16_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_B_3D_V2B32_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i64_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g), + (SUST_B_3D_V2B64_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int64Regs:$r, Int64Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i8_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B8_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i16_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B16_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i32_zero + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_3D_V4B32_ZERO Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + def : Pat<(int_nvvm_sust_p_1d_i8_trap diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index afd53a6a84c..20d4e272341 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -15,6 +15,7 @@ #include "NVPTX.h" #include "NVPTXMachineFunctionInfo.h" +#include "NVPTXSubtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -32,10 +33,16 @@ private: public: NVPTXReplaceImageHandles(); - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "NVPTX Replace Image Handles"; + } private: bool processInstr(MachineInstr &MI); void replaceImageHandle(MachineOperand &Op, MachineFunction &MF); + bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF, + unsigned &Idx); }; } @@ -65,242 +72,43 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) { E = InstrsToRemove.end(); I != E; ++I) { (*I)->eraseFromParent(); } - return Changed; } bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { MachineFunction &MF = *MI.getParent()->getParent(); - // Check if we have a surface/texture instruction - switch (MI.getOpcode()) { - default: return false; - case NVPTX::TEX_1D_F32_I32: - case NVPTX::TEX_1D_F32_F32: - case NVPTX::TEX_1D_F32_F32_LEVEL: - case NVPTX::TEX_1D_F32_F32_GRAD: - case NVPTX::TEX_1D_I32_I32: - case NVPTX::TEX_1D_I32_F32: - case NVPTX::TEX_1D_I32_F32_LEVEL: - case NVPTX::TEX_1D_I32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_F32_I32: - case NVPTX::TEX_1D_ARRAY_F32_F32: - case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_1D_ARRAY_I32_I32: - case NVPTX::TEX_1D_ARRAY_I32_F32: - case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_2D_F32_I32: - case NVPTX::TEX_2D_F32_F32: - case NVPTX::TEX_2D_F32_F32_LEVEL: - case NVPTX::TEX_2D_F32_F32_GRAD: - case NVPTX::TEX_2D_I32_I32: - case NVPTX::TEX_2D_I32_F32: - case NVPTX::TEX_2D_I32_F32_LEVEL: - case NVPTX::TEX_2D_I32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_F32_I32: - case NVPTX::TEX_2D_ARRAY_F32_F32: - case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: - case NVPTX::TEX_2D_ARRAY_I32_I32: - case NVPTX::TEX_2D_ARRAY_I32_F32: - case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: - case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: - case NVPTX::TEX_3D_F32_I32: - case NVPTX::TEX_3D_F32_F32: - case NVPTX::TEX_3D_F32_F32_LEVEL: - case NVPTX::TEX_3D_F32_F32_GRAD: - case NVPTX::TEX_3D_I32_I32: - case NVPTX::TEX_3D_I32_F32: - case NVPTX::TEX_3D_I32_F32_LEVEL: - case NVPTX::TEX_3D_I32_F32_GRAD: { + const MCInstrDesc &MCID = MI.getDesc(); + + if (MCID.TSFlags & NVPTXII::IsTexFlag) { // This is a texture fetch, so operand 4 is a texref and operand 5 is // a samplerref MachineOperand &TexHandle = MI.getOperand(4); - MachineOperand &SampHandle = MI.getOperand(5); - replaceImageHandle(TexHandle, MF); - replaceImageHandle(SampHandle, MF); - - return true; - } - case NVPTX::SULD_1D_I8_TRAP: - case NVPTX::SULD_1D_I16_TRAP: - case NVPTX::SULD_1D_I32_TRAP: - case NVPTX::SULD_1D_ARRAY_I8_TRAP: - case NVPTX::SULD_1D_ARRAY_I16_TRAP: - case NVPTX::SULD_1D_ARRAY_I32_TRAP: - case NVPTX::SULD_2D_I8_TRAP: - case NVPTX::SULD_2D_I16_TRAP: - case NVPTX::SULD_2D_I32_TRAP: - case NVPTX::SULD_2D_ARRAY_I8_TRAP: - case NVPTX::SULD_2D_ARRAY_I16_TRAP: - case NVPTX::SULD_2D_ARRAY_I32_TRAP: - case NVPTX::SULD_3D_I8_TRAP: - case NVPTX::SULD_3D_I16_TRAP: - case NVPTX::SULD_3D_I32_TRAP: { - // This is a V1 surface load, so operand 1 is a surfref - MachineOperand &SurfHandle = MI.getOperand(1); - replaceImageHandle(SurfHandle, MF); + if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { + MachineOperand &SampHandle = MI.getOperand(5); + replaceImageHandle(SampHandle, MF); + } return true; - } - case NVPTX::SULD_1D_V2I8_TRAP: - case NVPTX::SULD_1D_V2I16_TRAP: - case NVPTX::SULD_1D_V2I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_2D_V2I8_TRAP: - case NVPTX::SULD_2D_V2I16_TRAP: - case NVPTX::SULD_2D_V2I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: - case NVPTX::SULD_3D_V2I8_TRAP: - case NVPTX::SULD_3D_V2I16_TRAP: - case NVPTX::SULD_3D_V2I32_TRAP: { - // This is a V2 surface load, so operand 2 is a surfref - MachineOperand &SurfHandle = MI.getOperand(2); - - replaceImageHandle(SurfHandle, MF); + } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { + unsigned VecSize = + 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); - return true; - } - case NVPTX::SULD_1D_V4I8_TRAP: - case NVPTX::SULD_1D_V4I16_TRAP: - case NVPTX::SULD_1D_V4I32_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_2D_V4I8_TRAP: - case NVPTX::SULD_2D_V4I16_TRAP: - case NVPTX::SULD_2D_V4I32_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: - case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: - case NVPTX::SULD_3D_V4I8_TRAP: - case NVPTX::SULD_3D_V4I16_TRAP: - case NVPTX::SULD_3D_V4I32_TRAP: { - // This is a V4 surface load, so operand 4 is a surfref - MachineOperand &SurfHandle = MI.getOperand(4); + // For a surface load of vector size N, the Nth operand will be the surfref + MachineOperand &SurfHandle = MI.getOperand(VecSize); replaceImageHandle(SurfHandle, MF); return true; - } - case NVPTX::SUST_B_1D_B8_TRAP: - case NVPTX::SUST_B_1D_B16_TRAP: - case NVPTX::SUST_B_1D_B32_TRAP: - case NVPTX::SUST_B_1D_V2B8_TRAP: - case NVPTX::SUST_B_1D_V2B16_TRAP: - case NVPTX::SUST_B_1D_V2B32_TRAP: - case NVPTX::SUST_B_1D_V4B8_TRAP: - case NVPTX::SUST_B_1D_V4B16_TRAP: - case NVPTX::SUST_B_1D_V4B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_2D_B8_TRAP: - case NVPTX::SUST_B_2D_B16_TRAP: - case NVPTX::SUST_B_2D_B32_TRAP: - case NVPTX::SUST_B_2D_V2B8_TRAP: - case NVPTX::SUST_B_2D_V2B16_TRAP: - case NVPTX::SUST_B_2D_V2B32_TRAP: - case NVPTX::SUST_B_2D_V4B8_TRAP: - case NVPTX::SUST_B_2D_V4B16_TRAP: - case NVPTX::SUST_B_2D_V4B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_B_3D_B8_TRAP: - case NVPTX::SUST_B_3D_B16_TRAP: - case NVPTX::SUST_B_3D_B32_TRAP: - case NVPTX::SUST_B_3D_V2B8_TRAP: - case NVPTX::SUST_B_3D_V2B16_TRAP: - case NVPTX::SUST_B_3D_V2B32_TRAP: - case NVPTX::SUST_B_3D_V4B8_TRAP: - case NVPTX::SUST_B_3D_V4B16_TRAP: - case NVPTX::SUST_B_3D_V4B32_TRAP: - case NVPTX::SUST_P_1D_B8_TRAP: - case NVPTX::SUST_P_1D_B16_TRAP: - case NVPTX::SUST_P_1D_B32_TRAP: - case NVPTX::SUST_P_1D_V2B8_TRAP: - case NVPTX::SUST_P_1D_V2B16_TRAP: - case NVPTX::SUST_P_1D_V2B32_TRAP: - case NVPTX::SUST_P_1D_V4B8_TRAP: - case NVPTX::SUST_P_1D_V4B16_TRAP: - case NVPTX::SUST_P_1D_V4B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_2D_B8_TRAP: - case NVPTX::SUST_P_2D_B16_TRAP: - case NVPTX::SUST_P_2D_B32_TRAP: - case NVPTX::SUST_P_2D_V2B8_TRAP: - case NVPTX::SUST_P_2D_V2B16_TRAP: - case NVPTX::SUST_P_2D_V2B32_TRAP: - case NVPTX::SUST_P_2D_V4B8_TRAP: - case NVPTX::SUST_P_2D_V4B16_TRAP: - case NVPTX::SUST_P_2D_V4B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: - case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: - case NVPTX::SUST_P_3D_B8_TRAP: - case NVPTX::SUST_P_3D_B16_TRAP: - case NVPTX::SUST_P_3D_B32_TRAP: - case NVPTX::SUST_P_3D_V2B8_TRAP: - case NVPTX::SUST_P_3D_V2B16_TRAP: - case NVPTX::SUST_P_3D_V2B32_TRAP: - case NVPTX::SUST_P_3D_V4B8_TRAP: - case NVPTX::SUST_P_3D_V4B16_TRAP: - case NVPTX::SUST_P_3D_V4B32_TRAP: { + } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { // This is a surface store, so operand 0 is a surfref MachineOperand &SurfHandle = MI.getOperand(0); replaceImageHandle(SurfHandle, MF); return true; - } - case NVPTX::TXQ_CHANNEL_ORDER: - case NVPTX::TXQ_CHANNEL_DATA_TYPE: - case NVPTX::TXQ_WIDTH: - case NVPTX::TXQ_HEIGHT: - case NVPTX::TXQ_DEPTH: - case NVPTX::TXQ_ARRAY_SIZE: - case NVPTX::TXQ_NUM_SAMPLES: - case NVPTX::TXQ_NUM_MIPMAP_LEVELS: - case NVPTX::SUQ_CHANNEL_ORDER: - case NVPTX::SUQ_CHANNEL_DATA_TYPE: - case NVPTX::SUQ_WIDTH: - case NVPTX::SUQ_HEIGHT: - case NVPTX::SUQ_DEPTH: - case NVPTX::SUQ_ARRAY_SIZE: { + } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { // This is a query, so operand 1 is a surfref/texref MachineOperand &Handle = MI.getOperand(1); @@ -308,22 +116,38 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { return true; } - } + + return false; } void NVPTXReplaceImageHandles:: replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { + unsigned Idx; + if (findIndexForHandle(Op, MF, Idx)) { + Op.ChangeToImmediate(Idx); + } +} + +bool NVPTXReplaceImageHandles:: +findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { const MachineRegisterInfo &MRI = MF.getRegInfo(); NVPTXMachineFunctionInfo *MFI = MF.getInfo(); + + assert(Op.isReg() && "Handle is not in a reg?"); + // Which instruction defines the handle? - MachineInstr *MI = MRI.getVRegDef(Op.getReg()); - assert(MI && "No def for image handle vreg?"); - MachineInstr &TexHandleDef = *MI; + MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg()); switch (TexHandleDef.getOpcode()) { case NVPTX::LD_i64_avar: { // The handle is a parameter value being loaded, replace with the // parameter symbol + const NVPTXSubtarget &ST = MF.getTarget().getSubtarget(); + if (ST.getDrvInterface() == NVPTX::CUDA) { + // For CUDA, we preserve the param loads coming from function arguments + return false; + } + assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!"); StringRef Sym = TexHandleDef.getOperand(6).getSymbolName(); std::string ParamBaseName = MF.getName(); @@ -333,19 +157,27 @@ replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { std::string NewSym; raw_string_ostream NewSymStr(NewSym); NewSymStr << MF.getFunction()->getName() << "_param_" << Param; - Op.ChangeToImmediate( - MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str())); + InstrsToRemove.insert(&TexHandleDef); - break; + Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()); + return true; } case NVPTX::texsurf_handles: { // The handle is a global variable, replace with the global variable name assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!"); const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); assert(GV->hasName() && "Global sampler must be named!"); - Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data())); InstrsToRemove.insert(&TexHandleDef); - break; + Idx = MFI->getImageHandleSymbolIndex(GV->getName().data()); + return true; + } + case NVPTX::nvvm_move_i64: + case TargetOpcode::COPY: { + bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx); + if (Res) { + InstrsToRemove.insert(&TexHandleDef); + } + return Res; } default: llvm_unreachable("Unknown instruction operating on handle"); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 3ed5747b55f..4c41e4e470d 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -91,7 +91,12 @@ public: inline bool hasROT64() const { return SmVersion >= 20; } bool hasImageHandles() const { - // Currently disabled + // Enable handles for Kepler+, where CUDA supports indirect surfaces and + // textures + if (getDrvInterface() == NVPTX::CUDA) + return (SmVersion >= 30); + + // Disabled, otherwise return false; } bool is64Bit() const { return Is64Bit; } diff --git a/test/CodeGen/NVPTX/surf-read-cuda.ll b/test/CodeGen/NVPTX/surf-read-cuda.ll new file mode 100644 index 00000000000..10a1ecc4c47 --- /dev/null +++ b/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30 + +target triple = "nvptx-unknown-cuda" + +declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32) +declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*) + + +; SM20-LABEL: .entry foo +; SM30-LABEL: .entry foo +define void @foo(i64 %img, float* %red, i32 %idx) { +; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; +; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}] +; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; +; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}] + %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx) +; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] +; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] + %ret = sitofp i32 %val to float +; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]] + store float %ret, float* %red + ret void +} + +@surf0 = internal addrspace(1) global i64 0, align 8 + +; SM20-LABEL: .entry bar +; SM30-LABEL: .entry bar +define void @bar(float* %red, i32 %idx) { +; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0 + %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0) +; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}] +; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}] + %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx) +; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] +; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] + %ret = sitofp i32 %val to float +; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]] + store float %ret, float* %red + ret void +} + + + + +!nvvm.annotations = !{!1, !2, !3} +!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1} +!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1} +!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1} + diff --git a/test/CodeGen/NVPTX/surf-write-cuda.ll b/test/CodeGen/NVPTX/surf-write-cuda.ll new file mode 100644 index 00000000000..654c47f4695 --- /dev/null +++ b/test/CodeGen/NVPTX/surf-write-cuda.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30 + +target triple = "nvptx-unknown-cuda" + +declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32) +declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*) + + +; SM20-LABEL: .entry foo +; SM30-LABEL: .entry foo +define void @foo(i64 %img, i32 %val, i32 %idx) { +; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; +; SM20: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}} +; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0]; +; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}} + tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val) + ret void +} + + +@surf0 = internal addrspace(1) global i64 0, align 8 + + + +; SM20-LABEL: .entry bar +; SM30-LABEL: .entry bar +define void @bar(i32 %val, i32 %idx) { +; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0 + %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0) +; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}} +; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}} + tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val) + ret void +} + + +!nvvm.annotations = !{!1, !2, !3} +!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1} +!2 = metadata !{void (i32, i32)* @bar, metadata !"kernel", i32 1} +!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1} + diff --git a/test/CodeGen/NVPTX/tex-read-cuda.ll b/test/CodeGen/NVPTX/tex-read-cuda.ll new file mode 100644 index 00000000000..ee0cefa919b --- /dev/null +++ b/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 +; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30 + + +target triple = "nvptx-unknown-cuda" + +declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32) +declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*) + +; SM20-LABEL: .entry foo +; SM30-LABEL: .entry foo +define void @foo(i64 %img, float* %red, i32 %idx) { +; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0]; +; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}] +; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0]; +; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}] + %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx) + %ret = extractvalue { float, float, float, float } %val, 0 +; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]] + store float %ret, float* %red + ret void +} + + +@tex0 = internal addrspace(1) global i64 0, align 8 + +; SM20-LABEL: .entry bar +; SM30-LABEL: .entry bar +define void @bar(float* %red, i32 %idx) { +; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 + %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0) +; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}] +; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}] + %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx) + %ret = extractvalue { float, float, float, float } %val, 0 +; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]] + store float %ret, float* %red + ret void +} + +!nvvm.annotations = !{!1, !2, !3} +!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1} +!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1} +!3 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1} diff --git a/test/CodeGen/NVPTX/tex-read.ll b/test/CodeGen/NVPTX/tex-read.ll index 291060b9848..55e4bfc9e45 100644 --- a/test/CodeGen/NVPTX/tex-read.ll +++ b/test/CodeGen/NVPTX/tex-read.ll @@ -2,12 +2,12 @@ target triple = "nvptx-unknown-nvcl" -declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32) +declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32) ; CHECK: .entry foo define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) { ; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}] - %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx) + %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 ; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]] store float %ret, float* %red -- 2.11.0