From: Artem Belevich Date: Wed, 6 Dec 2017 17:50:05 +0000 (+0000) Subject: [NVPTX,CUDA] Added llvm.nvvm.fns intrinsic and matching __nvvm_fns builtin in clang. X-Git-Tag: android-x86-7.1-r4~7693 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=47a46192bbbf0b9e1e22b41d71fe0fdecaef501e;p=android-x86%2Fexternal-llvm.git [NVPTX,CUDA] Added llvm.nvvm.fns intrinsic and matching __nvvm_fns builtin in clang. Differential Revision: https://reviews.llvm.org/D40872 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319909 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index 6f75e78ff61..73622ce9303 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -682,6 +682,11 @@ let TargetPrefix = "nvvm" in { def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">, Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; +// FNS + + def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty], diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 478f3e9d057..c932758bd0a 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -979,6 +979,33 @@ def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, Float64Regs, int_nvvm_bitcast_d2ll>; +// +// FNS +// + +class INT_FNS_MBO + : NVPTXInst<(outs Int32Regs:$dst), ins, + "fns.b32 \t$dst, $mask, $base, $offset;", + [(set Int32Regs:$dst, Operands )]>, + Requires<[hasPTX60, hasSM30]>; + +def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), + (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; +def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), + (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; +def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), + (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; +def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), + (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; +def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), + (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; +def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), + (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; +def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), + (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; +def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), + (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; + //----------------------------------- // Atomic Functions //----------------------------------- diff --git a/test/CodeGen/NVPTX/fns.ll b/test/CodeGen/NVPTX/fns.ll new file mode 100644 index 00000000000..7673e43449c --- /dev/null +++ b/test/CodeGen/NVPTX/fns.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s + +declare i32 @llvm.nvvm.fns(i32, i32, i32) + +; CHECK-LABEL: .func{{.*}}fns +define i32 @fns(i32 %mask, i32 %base, i32 %offset) { + ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [fns_param_0]; + ; CHECK: ld.param.u32 [[BASE:%r[0-9]+]], [fns_param_1]; + ; CHECK: ld.param.u32 [[OFFSET:%r[0-9]+]], [fns_param_2]; + + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], [[BASE]], [[OFFSET]]; + %r0 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], [[BASE]], 0; + %r1 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 0); + %r01 = add i32 %r0, %r1; + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], 1, [[OFFSET]]; + %r2 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], 1, 0; + %r3 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 0); + %r23 = add i32 %r2, %r3; + %r0123 = add i32 %r01, %r23; + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, [[BASE]], [[OFFSET]]; + %r4 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, [[BASE]], 0; + %r5 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 0); + %r45 = add i32 %r4, %r5; + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, 1, [[OFFSET]]; + %r6 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, 1, 0; + %r7 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 0); + %r67 = add i32 %r6, %r7; + %r4567 = add i32 %r45, %r67; + %r = add i32 %r0123, %r4567; + ret i32 %r; +} +