From 8e92b2817eaf9d068f5ef86584b6043f6a1ea4a3 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Tue, 8 Aug 2017 18:52:22 +0000
Subject: [PATCH] [AMDGPU] Add llvm.amdgpu.update.dpp intrinsic

Summary:
Now that we've made all the necessary backend changes, we can add a new
intrinsic which exposes the new capabilities to IR producers. Since
llvm.amdgpu.update.dpp is a strict superset of llvm.amdgpu.mov.dpp, we
should deprecate the former. We also add tests for all the functionality
that was added in previous changes, now that we can access it via an IR
construct.

Reviewers: tstellar, arsenm

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye

Differential Revision: https://reviews.llvm.org/D34718

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310399 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/IntrinsicsAMDGPU.td           |  9 +++++++++
 lib/Target/AMDGPU/VOP1Instructions.td         |  8 ++++++++
 test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll | 17 +++++++++++++++++
 3 files changed, 34 insertions(+)
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 40ac52e5d0d..f2203470008 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -788,6 +788,15 @@ def int_amdgcn_mov_dpp :
             [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
              llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
 
+// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
+// Should be equivalent to:
+// v_mov_b32 <dest> <old>
+// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
+def int_amdgcn_update_dpp :
+  Intrinsic<[llvm_anyint_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty,
+             llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
+
 def int_amdgcn_s_dcache_wb :
   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
   Intrinsic<[], [], []>;
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index f20a20e91cd..b2b9af3dca9 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -663,6 +663,14 @@ def : Pat <
                        (as_i1imm $bound_ctrl))
 >;
 
+def : Pat <
+  (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
+                      imm:$bank_mask, imm:$bound_ctrl)),
+  (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
+                       (as_i32imm $row_mask), (as_i32imm $bank_mask),
+                       (as_i1imm $bound_ctrl))
+>;
+
 def : Pat<
   (i32 (anyext i16:$src)),
   (COPY $src)
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
new file mode 100644
index 00000000000..4d1dbf514a1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s
+
+; VI-LABEL: {{^}}dpp_test:
+; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
+; VI: v_mov_b32_e32 v1, s{{[0-9]+}}
+; VI: s_nop 1
+; VI: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
+define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 1) #0
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
+
+attributes #0 = { nounwind readnone convergent }
-- 
2.11.0