From 7cde679f44f57e258599b75116519a09baf1f789 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Wed, 22 Jun 2016 21:33:49 +0000 Subject: [PATCH] AMDGPU/SI: Define an intrinsic to expose ds_swizzle_b32 Reviewers: tstellarAMD, arsenm Differential Revision: http://reviews.llvm.org/D21533 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273496 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAMDGPU.td | 5 +++++ lib/Target/AMDGPU/SIInstructions.td | 12 ++++++++++++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll | 15 +++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 48e215e232f..51afb83d7c6 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -373,6 +373,11 @@ def int_amdgcn_mbcnt_hi : GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// llvm.amdgcn.ds.swizzle src offset +def int_amdgcn_ds_swizzle : + GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 6f7159cf366..814cbd3093d 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -823,7 +823,11 @@ defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmps defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">; defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">; defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">; + +let Uses = [EXEC], mayLoad =0, mayStore = 0, isConvergent = 1 in { defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>; +} + let mayStore = 0 in { defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>; defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>; @@ -2339,6 +2343,14 @@ def : Pat < >; //===----------------------------------------------------------------------===// +// DS_SWIZZLE Intrinsic Pattern. +//===----------------------------------------------------------------------===// +def : Pat < + (int_amdgcn_ds_swizzle i32:$src, imm:$offset16), + (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) +>; + +//===----------------------------------------------------------------------===// // SMRD Patterns //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll new file mode 100644 index 00000000000..ef3cb00024b --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0 + +; FUNC-LABEL: {{^}}ds_swizzle: +; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:100 +; CHECK: s_waitcnt lgkmcnt +define void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind { + %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 + store i32 %swizzle, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone convergent } -- 2.11.0