From c0021e43ea75c54417c83a01da6d9dd20b5e4881 Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Wed, 11 Feb 2015 14:26:46 +0000 Subject: [PATCH] R600/SI: Enable a lot of existing tests for VI (squashed commits) This is a union of these commits: * R600/SI: Enable more tests for VI which need no changes * R600/SI: Enable V_BCNT tests for VI Differences: - v_bcnt_..._e32 -> _e64 - s_load_dword* inline offset is in bytes instead of dwords * R600/SI: Enable all tests for VI which use S_LOAD_DWORD The inline offset is changed from dwords to bytes. * R600/SI: Enable LDS tests for VI Differences: - the s_load_dword inline offset changed from dwords to bytes - the tests checked very little on CI, so they have been fixed to check all instructions that "SI" checked * R600/SI: Enable lshr tests for VI * R600/SI: Fix divrem64 tests - "v_lshl_64" was missing "b" before "64" - added VI-NOT checks * R600/SI: Enable the SI.tid test for VI * R600/SI: Enable the frem test for VI Also, the frem_f64 checking is added for CI-VI. * R600/SI: Add VI tests for rsq.clamped git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228830 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/R600/64bit-kernel-args.ll | 9 +- test/CodeGen/R600/atomic_cmp_swap_local.ll | 83 ++++---- test/CodeGen/R600/ctpop.ll | 173 +++++++-------- test/CodeGen/R600/ctpop64.ll | 76 +++---- test/CodeGen/R600/fabs.ll | 35 +-- test/CodeGen/R600/fceil64.ll | 3 +- test/CodeGen/R600/fcopysign.f32.ll | 21 +- test/CodeGen/R600/fcopysign.f64.ll | 23 +- test/CodeGen/R600/ffloor.f64.ll | 3 +- test/CodeGen/R600/fneg.f64.ll | 26 +-- test/CodeGen/R600/fneg.ll | 24 ++- test/CodeGen/R600/frem.ll | 63 +++--- test/CodeGen/R600/ftrunc.f64.ll | 3 +- test/CodeGen/R600/gep-address-space.ll | 1 + test/CodeGen/R600/gv-const-addrspace.ll | 8 +- test/CodeGen/R600/imm.ll | 54 +++-- test/CodeGen/R600/kernel-args.ll | 244 ++++++++++----------- test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll | 22 +- test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll | 22 +- test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll | 12 ++ test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll | 9 + test/CodeGen/R600/llvm.SI.tid.ll | 8 +- test/CodeGen/R600/llvm.rint.f64.ll | 1 + test/CodeGen/R600/llvm.round.ll | 1 + test/CodeGen/R600/local-64.ll | 1 + test/CodeGen/R600/local-atomics.ll | 257 ++++++++++++----------- test/CodeGen/R600/local-atomics64.ll | 231 ++++++++++---------- test/CodeGen/R600/operand-spacing.ll | 13 +- test/CodeGen/R600/private-memory.ll | 2 + test/CodeGen/R600/schedule-kernel-arg-loads.ll | 8 + test/CodeGen/R600/scratch-buffer.ll | 1 + test/CodeGen/R600/sdivrem64.ll | 173 +++++++-------- test/CodeGen/R600/setcc-opt.ll | 159 +++++++------- test/CodeGen/R600/smrd.ll | 56 ++--- test/CodeGen/R600/srl.ll | 19 ++ test/CodeGen/R600/udivrem64.ll | 170 +++++++-------- test/CodeGen/R600/use-sgpr-multiple-times.ll | 73 ++++--- test/CodeGen/R600/work-item-intrinsics.ll | 72 ++++--- 38 files changed, 1165 insertions(+), 994 deletions(-) diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll index 9f2738edb6e..2e08901ac1e 100644 --- a/test/CodeGen/R600/64bit-kernel-args.ll +++ b/test/CodeGen/R600/64bit-kernel-args.ll @@ -1,9 +1,12 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI -; SI: {{^}}f64_kernel_arg: +; GCN: {{^}}f64_kernel_arg: ; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 ; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb -; SI: buffer_store_dwordx2 +; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24 +; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c +; GCN: buffer_store_dwordx2 define void @f64_kernel_arg(double addrspace(1)* %out, double %in) { entry: store double %in, double addrspace(1)* %out diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll index bbca7583e33..98a9157c6e6 100644 --- a/test/CodeGen/R600/atomic_cmp_swap_local.ll +++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -1,14 +1,17 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: -; SI: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] -; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] +; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] +; GCN: s_endpgm define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic @@ -18,16 +21,18 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset: -; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 -; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] -; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] -; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0] -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 +; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] +; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] +; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0] +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic @@ -38,8 +43,8 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset ; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0] -; SI: s_endpgm +; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0] +; GCN: s_endpgm define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -51,13 +56,15 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3 } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa -; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] -; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] -; SI: s_endpgm +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 +; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28 +; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] +; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] +; GCN: s_endpgm define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic @@ -66,15 +73,17 @@ define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %sw } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 -; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] -; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] -; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0] -; SI: s_endpgm +; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 +; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 +; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] +; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] +; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0] +; GCN: s_endpgm define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index b0a999699ec..6f7d92ba839 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.ctpop.i32(i32) nounwind readnone @@ -8,11 +9,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone ; FUNC-LABEL: {{^}}s_ctpop_i32: -; SI: s_load_dword [[SVAL:s[0-9]+]], -; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] -; SI: buffer_store_dword [[VRESULT]], -; SI: s_endpgm +; GCN: s_load_dword [[SVAL:s[0-9]+]], +; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { @@ -23,10 +24,10 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; XXX - Why 0 in register? ; FUNC-LABEL: {{^}}v_ctpop_i32: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -37,12 +38,13 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali } ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: -; SI: buffer_load_dword [[VAL1:v[0-9]+]], -; SI: buffer_load_dword [[VAL0:v[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 +; GCN: buffer_load_dword [[VAL1:v[0-9]+]], +; GCN: buffer_load_dword [[VAL0:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -57,11 +59,11 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace } ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32: -; SI: buffer_load_dword [[VAL0:v[0-9]+]], -; SI-NEXT: s_waitcnt -; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} -; SI-NEXT: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL0:v[0-9]+]], +; GCN-NEXT: s_waitcnt +; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} +; GCN-NEXT: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { %val0 = load i32 addrspace(1)* %in0, align 4 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone @@ -71,9 +73,9 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace( } ; FUNC-LABEL: {{^}}v_ctpop_v2i32: -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -85,11 +87,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v4i32: -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -103,15 +105,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v8i32: -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -129,23 +131,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v16i32: -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: v_bcnt_u32_b32_e64 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32_e64 +; GCN: s_endpgm ; EG: BCNT_INT ; EG: BCNT_INT @@ -171,10 +173,10 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -186,10 +188,10 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -201,11 +203,12 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -215,11 +218,11 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var: -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], -; SI-DAG: s_load_dword [[VAR:s[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], +; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { @@ -231,11 +234,11 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1 } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv: -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], -; SI-DAG: s_load_dword [[VAR:s[0-9]+]], -; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], +; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], +; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { @@ -247,11 +250,12 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv: -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}} -; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16 +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}} +; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { @@ -269,10 +273,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp ; FUNC-LABEL: {{^}}ctpop_i32_in_br: ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd -; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] -; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34 +; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] +; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BCNT_INT define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) { entry: diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll index 9758ac96ea9..8bcd81875c0 100644 --- a/test/CodeGen/R600/ctpop64.ll +++ b/test/CodeGen/R600/ctpop64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare i64 @llvm.ctpop.i64(i64) nounwind readnone declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone @@ -8,10 +9,11 @@ declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone ; FUNC-LABEL: {{^}}s_ctpop_i64: ; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]] -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] -; SI: buffer_store_dword [[VRESULT]], -; SI: s_endpgm +; VI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +; GCN: s_endpgm define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone %truncctpop = trunc i64 %ctpop to i32 @@ -20,11 +22,12 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { } ; FUNC-LABEL: {{^}}v_ctpop_i64: -; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, +; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %val = load i64 addrspace(1)* %in, align 8 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone @@ -34,9 +37,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali } ; FUNC-LABEL: {{^}}s_ctpop_v2i64: -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_endpgm +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_endpgm define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind { %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> @@ -45,11 +48,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) } ; FUNC-LABEL: {{^}}s_ctpop_v4i64: -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_bcnt1_i32_b64 -; SI: s_endpgm +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_bcnt1_i32_b64 +; GCN: s_endpgm define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind { %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> @@ -58,11 +61,11 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) } ; FUNC-LABEL: {{^}}v_ctpop_v2i64: -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: s_endpgm define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind { %val = load <2 x i64> addrspace(1)* %in, align 16 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone @@ -72,15 +75,15 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v4i64: -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: v_bcnt_u32_b32 -; SI: s_endpgm +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: s_endpgm define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind { %val = load <4 x i64> addrspace(1)* %in, align 32 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone @@ -94,11 +97,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs ; FUNC-LABEL: {{^}}ctpop_i64_in_br: ; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd -; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}} -; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]] -; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]] -; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}} -; SI: s_endpgm +; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34 +; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}} +; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]] +; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]] +; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}} +; GCN: s_endpgm define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) { entry: %tmp0 = icmp eq i32 %cond, 0 diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index add6b75d22a..419a73d0266 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s @@ -10,7 +11,7 @@ ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; SI: v_and_b32 +; GCN: v_and_b32 define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float @@ -23,7 +24,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) { ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; SI: v_and_b32 +; GCN: v_and_b32 define void @fabs_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float @@ -35,7 +36,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: {{^}}fabs_f32: ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; SI: v_and_b32 +; GCN: v_and_b32 define void @fabs_f32(float addrspace(1)* %out, float %in) { %fabs = call float @llvm.fabs.f32(float %in) store float %fabs, float addrspace(1)* %out @@ -46,8 +47,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) { ; R600: |{{(PV|T[0-9])\.[XYZW]}}| ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; SI: v_and_b32 -; SI: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) store <2 x float> %fabs, <2 x float> addrspace(1)* %out @@ -60,20 +61,21 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { ; R600: |{{(PV|T[0-9])\.[XYZW]}}| ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; SI: v_and_b32 -; SI: v_and_b32 -; SI: v_and_b32 -; SI: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 +; GCN: v_and_b32 define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) store <4 x float> %fabs, <4 x float> addrspace(1)* %out ret void } -; SI-LABEL: {{^}}fabs_fn_fold: +; GCN-LABEL: {{^}}fabs_fn_fold: ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb -; SI-NOT: and -; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} +; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN-NOT: and +; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @fabs(float %in0) %fmul = fmul float %fabs, %in1 @@ -81,10 +83,11 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { ret void } -; SI-LABEL: {{^}}fabs_fold: +; GCN-LABEL: {{^}}fabs_fold: ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb -; SI-NOT: and -; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} +; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN-NOT: and +; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @llvm.fabs.f32(float %in0) %fmul = fmul float %fabs, %in1 diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll index 19396aafa97..84898d09dae 100644 --- a/test/CodeGen/R600/fceil64.ll +++ b/test/CodeGen/R600/fceil64.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.ceil.f64(double) nounwind readnone declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll index 4bc5145bd4d..b719d5a3978 100644 --- a/test/CodeGen/R600/fcopysign.f32.ll +++ b/test/CodeGen/R600/fcopysign.f32.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -10,12 +11,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read ; FUNC-LABEL: {{^}}test_copysign_f32: ; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb ; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc -; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] -; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]] -; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff -; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0x2c +; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30 +; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] +; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]] +; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff +; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: BFI_INT define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind { @@ -25,7 +28,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign } ; FUNC-LABEL: {{^}}test_copysign_v2f32: -; SI: s_endpgm +; GCN: s_endpgm ; EG: BFI_INT ; EG: BFI_INT @@ -36,7 +39,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma } ; FUNC-LABEL: {{^}}test_copysign_v4f32: -; SI: s_endpgm +; GCN: s_endpgm ; EG: BFI_INT ; EG: BFI_INT diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll index a14a493f72c..3d8c5599308 100644 --- a/test/CodeGen/R600/fcopysign.f64.ll +++ b/test/CodeGen/R600/fcopysign.f64.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare double @llvm.copysign.f64(double, double) nounwind readnone declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone @@ -7,13 +8,15 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r ; FUNC-LABEL: {{^}}test_copysign_f64: ; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] -; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] -; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff -; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] -; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] -; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} -; SI: s_endpgm +; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] +; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] +; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff +; GCN: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] +; GCN: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} +; GCN: s_endpgm define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind { %result = call double @llvm.copysign.f64(double %mag, double %sign) store double %result, double addrspace(1)* %out, align 8 @@ -21,7 +24,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s } ; FUNC-LABEL: {{^}}test_copysign_v2f64: -; SI: s_endpgm +; GCN: s_endpgm define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind { %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8 @@ -29,7 +32,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> % } ; FUNC-LABEL: {{^}}test_copysign_v4f64: -; SI: s_endpgm +; GCN: s_endpgm define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind { %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8 diff --git a/test/CodeGen/R600/ffloor.f64.ll b/test/CodeGen/R600/ffloor.f64.ll index a63568eec87..a74b95c5b35 100644 --- a/test/CodeGen/R600/ffloor.f64.ll +++ b/test/CodeGen/R600/ffloor.f64.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.floor.f64(double) nounwind readnone declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll index eb2eb08b88b..9ea189c3143 100644 --- a/test/CodeGen/R600/fneg.f64.ll +++ b/test/CodeGen/R600/fneg.f64.ll @@ -1,7 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_f64: -; SI: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_f64(double addrspace(1)* %out, double %in) { %fneg = fsub double -0.000000e+00, %in store double %fneg, double addrspace(1)* %out @@ -9,8 +10,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) { } ; FUNC-LABEL: {{^}}fneg_v2f64: -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) { %fneg = fsub <2 x double> , %in store <2 x double> %fneg, <2 x double> addrspace(1)* %out @@ -23,10 +24,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> ; R600: -PV ; R600: -PV -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) { %fneg = fsub <4 x double> , %in store <4 x double> %fneg, <4 x double> addrspace(1)* %out @@ -39,7 +40,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> ; FUNC-LABEL: {{^}}fneg_free_f64: ; FIXME: Unnecessary copy to VGPRs -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}} +; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}} define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { %bc = bitcast i64 %in to double %fsub = fsub double 0.0, %bc @@ -47,10 +48,11 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { ret void } -; SI-LABEL: {{^}}fneg_fold_f64: +; GCN-LABEL: {{^}}fneg_fold_f64: ; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; SI-NOT: xor -; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]] +; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-NOT: xor +; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]] define void @fneg_fold_f64(double addrspace(1)* %out, double %in) { %fsub = fsub double -0.0, %in %fmul = fmul double %fsub, %in diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index ca3350dd7f4..a0fd539863c 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -1,10 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_f32: ; R600: -PV -; SI: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_f32(float addrspace(1)* %out, float %in) { %fneg = fsub float -0.000000e+00, %in store float %fneg, float addrspace(1)* %out @@ -15,8 +16,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) { ; R600: -PV ; R600: -PV -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { %fneg = fsub <2 x float> , %in store <2 x float> %fneg, <2 x float> addrspace(1)* %out @@ -29,10 +30,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i ; R600: -PV ; R600: -PV -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 -; SI: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_xor_b32 define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { %fneg = fsub <4 x float> , %in store <4 x float> %fneg, <4 x float> addrspace(1)* %out @@ -48,7 +49,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i ; R600: -KC0[2].Z ; XXX: We could use v_add_f32_e64 with the negate bit here instead. -; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}} +; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}} define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float %fsub = fsub float 0.0, %bc @@ -58,8 +59,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { ; FUNC-LABEL: {{^}}fneg_fold_f32: ; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb -; SI-NOT: xor -; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]] +; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN-NOT: xor +; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]] define void @fneg_fold_f32(float addrspace(1)* %out, float %in) { %fsub = fsub float -0.0, %in %fmul = fmul float %fsub, %in diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll index 50d6687abee..b1a51a41532 100644 --- a/test/CodeGen/R600/frem.ll +++ b/test/CodeGen/R600/frem.ll @@ -1,16 +1,18 @@ -; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}frem_f32: -; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} -; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 -; SI-DAG: v_cmp -; SI-DAG: v_mul_f32 -; SI: v_rcp_f32_e32 -; SI: v_mul_f32_e32 -; SI: v_mul_f32_e32 -; SI: v_trunc_f32_e32 -; SI: v_mad_f32 -; SI: s_endpgm +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} +; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 +; GCN-DAG: v_cmp +; GCN-DAG: v_mul_f32 +; GCN: v_rcp_f32_e32 +; GCN: v_mul_f32_e32 +; GCN: v_mul_f32_e32 +; GCN: v_trunc_f32_e32 +; GCN: v_mad_f32 +; GCN: s_endpgm define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 { %gep2 = getelementptr float addrspace(1)* %in2, i32 4 @@ -22,14 +24,14 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, } ; FUNC-LABEL: {{^}}unsafe_frem_f32: -; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 -; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}} -; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]] -; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]] -; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]] -; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 +; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}} +; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]] +; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]] +; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]] +; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] +; GCN: buffer_store_dword [[RESULT]] +; GCN: s_endpgm define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #1 { %gep2 = getelementptr float addrspace(1)* %in2, i32 4 @@ -40,11 +42,19 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ret void } -; TODO: This should check something when f64 fdiv is implemented -; correctly ; FUNC-LABEL: {{^}}frem_f64: -; SI: s_endpgm +; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 +; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 +; TODO: Check SI. +; CI: v_rcp_f64_e32 [[INVY:v\[[0-9]+:[0-9]+\]]], [[Y]] +; CI: v_mul_f64 [[DIV:v\[[0-9]+:[0-9]+\]]], [[X]], [[INVY]] +; CI: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[DIV]] +; CI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], [[TRUNC]], [[Y]] +; SI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, [[Y]] +; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[RESULTM]] +; GCN: buffer_store_dwordx2 [[RESULT]], {{.*}}, 0 +; GCN: s_endpgm define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 { %r0 = load double addrspace(1)* %in1, align 8 @@ -55,11 +65,12 @@ define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, } ; FUNC-LABEL: {{^}}unsafe_frem_f64: -; SI: v_rcp_f64_e32 -; SI: v_mul_f64 +; GCN: v_rcp_f64_e32 +; GCN: v_mul_f64 ; SI: v_bfe_u32 -; SI: v_fma_f64 -; SI: s_endpgm +; CI: v_trunc_f64_e32 +; GCN: v_fma_f64 +; GCN: s_endpgm define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #1 { %r0 = load double addrspace(1)* %in1, align 8 diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll index faf9b401b32..21399a817a2 100644 --- a/test/CodeGen/R600/ftrunc.f64.ll +++ b/test/CodeGen/R600/ftrunc.f64.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.trunc.f64(double) nounwind readnone declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll index 2d1892534dc..5c6920dc65e 100644 --- a/test/CodeGen/R600/gep-address-space.ll +++ b/test/CodeGen/R600/gep-address-space.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: {{^}}use_gep_address_space: diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll index c58e5846d98..45af71dd7a9 100644 --- a/test/CodeGen/R600/gv-const-addrspace.ll +++ b/test/CodeGen/R600/gv-const-addrspace.ll @@ -1,5 +1,6 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s @b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 @@ -9,6 +10,7 @@ ; FUNC-LABEL: {{^}}float: ; FIXME: We should be using s_load_dword here. ; SI: buffer_load_dword +; VI: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -31,6 +33,7 @@ entry: ; FIXME: We should be using s_load_dword here. ; SI: buffer_load_dword +; VI: s_load_dword ; EG-DAG: MOV {{\** *}}T2.X ; EG-DAG: MOV {{\** *}}T3.X @@ -53,7 +56,7 @@ entry: @struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] ; FUNC-LABEL: {{^}}struct_foo_gv_load: -; SI: s_load_dword +; GCN: s_load_dword define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index @@ -70,6 +73,7 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { ; FUNC-LABEL: {{^}}array_v1_gv_load: ; FIXME: We should be using s_load_dword here. ; SI: buffer_load_dword +; VI: s_load_dword define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index %load = load <1 x i32> addrspace(2)* %gep, align 4 diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll index 8577a7e5799..c8fd8b72091 100644 --- a/test/CodeGen/R600/imm.ll +++ b/test/CodeGen/R600/imm.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CHECK %s ; Use a 64-bit value with lo bits that can be represented as an inline constant ; CHECK-LABEL: {{^}}i64_imm_inline_lo: @@ -303,7 +304,8 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { @@ -313,7 +315,8 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) { @@ -323,7 +326,8 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) { @@ -333,7 +337,8 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) { @@ -343,7 +348,8 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) { @@ -353,7 +359,8 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) { @@ -363,7 +370,8 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) { @@ -373,7 +381,8 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) { @@ -383,7 +392,8 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { @@ -394,7 +404,8 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_1_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) { @@ -404,7 +415,8 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_2_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) { @@ -414,7 +426,8 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_16_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) { @@ -424,7 +437,8 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { @@ -434,7 +448,8 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { @@ -444,7 +459,8 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { @@ -454,7 +470,8 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_63_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) { @@ -464,7 +481,8 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) { } ; CHECK-LABEL: {{^}}add_inline_imm_64_f64 -; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]] ; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) { diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll index 42d289d4ef2..5db45ce7e62 100644 --- a/test/CodeGen/R600/kernel-args.ll +++ b/test/CodeGen/R600/kernel-args.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; EG-LABEL: {{^}}i8_arg: +; FUNC-LABEL: {{^}}i8_arg: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}i8_arg: -; SI: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { entry: @@ -14,10 +14,10 @@ entry: ret void } -; EG-LABEL: {{^}}i8_zext_arg: +; FUNC-LABEL: {{^}}i8_zext_arg: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}i8_zext_arg: ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { entry: @@ -26,10 +26,10 @@ entry: ret void } -; EG-LABEL: {{^}}i8_sext_arg: +; FUNC-LABEL: {{^}}i8_sext_arg: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}i8_sext_arg: ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { entry: @@ -38,10 +38,9 @@ entry: ret void } -; EG-LABEL: {{^}}i16_arg: +; FUNC-LABEL: {{^}}i16_arg: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}i16_arg: -; SI: buffer_load_ushort +; GCN: buffer_load_ushort define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { entry: @@ -50,10 +49,10 @@ entry: ret void } -; EG-LABEL: {{^}}i16_zext_arg: +; FUNC-LABEL: {{^}}i16_zext_arg: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}i16_zext_arg: ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { entry: @@ -62,10 +61,10 @@ entry: ret void } -; EG-LABEL: {{^}}i16_sext_arg: +; FUNC-LABEL: {{^}}i16_sext_arg: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}i16_sext_arg: ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { entry: @@ -74,176 +73,170 @@ entry: ret void } -; EG-LABEL: {{^}}i32_arg: +; FUNC-LABEL: {{^}}i32_arg: ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}i32_arg: -; s_load_dword s{{[0-9]}}, s[0:1], 0xb +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { entry: store i32 %in, i32 addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}f32_arg: +; FUNC-LABEL: {{^}}f32_arg: ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z -; SI-LABEL: {{^}}f32_arg: -; s_load_dword s{{[0-9]}}, s[0:1], 0xb +; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb +; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { entry: store float %in, float addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v2i8_arg: +; FUNC-LABEL: {{^}}v2i8_arg: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI-LABEL: {{^}}v2i8_arg: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { entry: store <2 x i8> %in, <2 x i8> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v2i16_arg: +; FUNC-LABEL: {{^}}v2i16_arg: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI-LABEL: {{^}}v2i16_arg: -; SI-DAG: buffer_load_ushort -; SI-DAG: buffer_load_ushort +; GCN-DAG: buffer_load_ushort +; GCN-DAG: buffer_load_ushort define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { entry: store <2 x i16> %in, <2 x i16> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v2i32_arg: +; FUNC-LABEL: {{^}}v2i32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W -; SI-LABEL: {{^}}v2i32_arg: ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb +; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { entry: store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v2f32_arg: +; FUNC-LABEL: {{^}}v2f32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W -; SI-LABEL: {{^}}v2f32_arg: ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb +; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { entry: store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v3i8_arg: +; FUNC-LABEL: {{^}}v3i8_arg: ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 -; SI-LABEL: {{^}}v3i8_arg: define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { entry: store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v3i16_arg: +; FUNC-LABEL: {{^}}v3i16_arg: ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 -; SI-LABEL: {{^}}v3i16_arg: define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { entry: store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v3i32_arg: +; FUNC-LABEL: {{^}}v3i32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W -; SI-LABEL: {{^}}v3i32_arg: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { entry: store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v3f32_arg: +; FUNC-LABEL: {{^}}v3f32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W -; SI-LABEL: {{^}}v3f32_arg: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { entry: store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v4i8_arg: +; FUNC-LABEL: {{^}}v4i8_arg: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI-LABEL: {{^}}v4i8_arg: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { entry: store <4 x i8> %in, <4 x i8> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v4i16_arg: +; FUNC-LABEL: {{^}}v4i16_arg: ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI-LABEL: {{^}}v4i16_arg: -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { entry: store <4 x i16> %in, <4 x i16> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v4i32_arg: +; FUNC-LABEL: {{^}}v4i32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X -; SI-LABEL: {{^}}v4i32_arg: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v4f32_arg: +; FUNC-LABEL: {{^}}v4f32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X -; SI-LABEL: {{^}}v4f32_arg: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd +; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { entry: store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v8i8_arg: +; FUNC-LABEL: {{^}}v8i8_arg: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 @@ -252,21 +245,20 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI-LABEL: {{^}}v8i8_arg: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { entry: store <8 x i8> %in, <8 x i8> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v8i16_arg: +; FUNC-LABEL: {{^}}v8i16_arg: ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 @@ -275,22 +267,21 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI-LABEL: {{^}}v8i16_arg: -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { entry: store <8 x i16> %in, <8 x i16> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v8i32_arg: +; FUNC-LABEL: {{^}}v8i32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W @@ -299,15 +290,15 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X -; SI-LABEL: {{^}}v8i32_arg: ; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 +; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { entry: store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v8f32_arg: +; FUNC-LABEL: {{^}}v8f32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W @@ -316,7 +307,6 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X -; SI-LABEL: {{^}}v8f32_arg: ; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { entry: @@ -324,7 +314,7 @@ entry: ret void } -; EG-LABEL: {{^}}v16i8_arg: +; FUNC-LABEL: {{^}}v16i8_arg: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 @@ -341,30 +331,29 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI-LABEL: {{^}}v16i8_arg: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte +; GCN: buffer_load_ubyte define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { entry: store <16 x i8> %in, <16 x i8> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v16i16_arg: +; FUNC-LABEL: {{^}}v16i16_arg: ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 @@ -381,30 +370,29 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI-LABEL: {{^}}v16i16_arg: -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { entry: store <16 x i16> %in, <16 x i16> addrspace(1)* %out ret void } -; EG-LABEL: {{^}}v16i32_arg: +; FUNC-LABEL: {{^}}v16i32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W @@ -421,15 +409,15 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X -; SI-LABEL: {{^}}v16i32_arg: ; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 +; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { entry: store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 ret void } -; EG-LABEL: {{^}}v16f32_arg: +; FUNC-LABEL: {{^}}v16f32_arg: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W @@ -446,8 +434,8 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X -; SI-LABEL: {{^}}v16f32_arg: ; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 +; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { entry: store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 @@ -455,18 +443,18 @@ entry: } ; FUNC-LABEL: {{^}}kernel_arg_i64: -; SI: s_load_dwordx2 -; SI: s_load_dwordx2 -; SI: buffer_store_dwordx2 +; GCN: s_load_dwordx2 +; GCN: s_load_dwordx2 +; GCN: buffer_store_dwordx2 define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { store i64 %a, i64 addrspace(1)* %out, align 8 ret void } ; XFUNC-LABEL: {{^}}kernel_arg_v1i64: -; XSI: s_load_dwordx2 -; XSI: s_load_dwordx2 -; XSI: buffer_store_dwordx2 +; XGCN: s_load_dwordx2 +; XGCN: s_load_dwordx2 +; XGCN: buffer_store_dwordx2 ; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { ; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 ; ret void diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll index 52d0519ef27..55ca9c7536e 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll @@ -1,25 +1,29 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone -; SI-LABEL: {{^}}test_div_fixup_f32: +; GCN-LABEL: {{^}}test_div_fixup_f32: ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] -; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] -; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] +; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_div_fixup_f64: -; SI: v_div_fixup_f64 +; GCN-LABEL: {{^}}test_div_fixup_f64: +; GCN: v_div_fixup_f64 define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone store double %result, double addrspace(1)* %out, align 8 diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll index 2c9085e926d..a15c10fab51 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll @@ -1,25 +1,29 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone -; SI-LABEL: {{^}}test_div_fmas_f32: +; GCN-LABEL: {{^}}test_div_fmas_f32: ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] -; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] -; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] +; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_div_fmas_f64: -; SI: v_div_fmas_f64 +; GCN-LABEL: {{^}}test_div_fmas_f64: +; GCN: v_div_fmas_f64 define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind { %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone store double %result, double addrspace(1)* %out, align 8 diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll index 4318aeaac78..67f1d22c717 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll @@ -1,9 +1,21 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone ; FUNC-LABEL: {{^}}rsq_clamped_f64: ; SI: v_rsq_clamp_f64_e32 + +; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3] +; TODO: this constant should be folded: +; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1 +; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff +; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]] +; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]] +; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff +; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]] +; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] + define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind { %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone store double %rsq_clamped, double addrspace(1)* %out, align 8 diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll index 9336baffc97..eeff2536b23 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -6,7 +7,15 @@ declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}rsq_clamped_f32: ; SI: v_rsq_clamp_f32_e32 + +; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}} +; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]] +; TODO: this constant should be folded: +; VI: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff +; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]] + ; EG: RECIPSQRT_CLAMPED + define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind { %rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone store float %rsq_clamped, float addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/R600/llvm.SI.tid.ll index 64efd2daf33..f6e6d7050ba 100644 --- a/test/CodeGen/R600/llvm.SI.tid.ll +++ b/test/CodeGen/R600/llvm.SI.tid.ll @@ -1,7 +1,9 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s -;CHECK: v_mbcnt_lo_u32_b32_e64 -;CHECK: v_mbcnt_hi_u32_b32_e32 +;GCN: v_mbcnt_lo_u32_b32_e64 +;SI: v_mbcnt_hi_u32_b32_e32 +;VI: v_mbcnt_hi_u32_b32_e64 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" { main_body: diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll index 2c926341f78..c63fb172794 100644 --- a/test/CodeGen/R600/llvm.rint.f64.ll +++ b/test/CodeGen/R600/llvm.rint.f64.ll @@ -1,3 +1,4 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll index 6c1fea38527..8d1cfb67e40 100644 --- a/test/CodeGen/R600/llvm.round.ll +++ b/test/CodeGen/R600/llvm.round.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}round_f32: diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll index f975bc1f56b..48c847879ea 100644 --- a/test/CodeGen/R600/local-64.ll +++ b/test/CodeGen/R600/local-64.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s ; BOTH-LABEL: {{^}}local_i32_load ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0] diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll index 78d747d7e72..3ac01b387bd 100644 --- a/test/CodeGen/R600/local-atomics.ll +++ b/test/CodeGen/R600/local-atomics.ll @@ -1,15 +1,16 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: ; EG: LDS_WRXCHG_RET * -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -18,8 +19,8 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* % ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset: ; EG: LDS_WRXCHG_RET * -; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst @@ -30,12 +31,12 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; EG: LDS_ADD_RET * -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -44,8 +45,8 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset: ; EG: LDS_ADD_RET * -; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst @@ -56,8 +57,8 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset: ; EG: LDS_ADD_RET * ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0] -; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -69,9 +70,9 @@ define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32: ; EG: LDS_ADD_RET * -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0] +; GCN: s_endpgm define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -80,9 +81,9 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: ; EG: LDS_ADD_RET * -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst @@ -93,8 +94,8 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset: ; EG: LDS_ADD_RET * ; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0] -; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -106,8 +107,8 @@ define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32: ; EG: LDS_SUB_RET * -; SI: ds_sub_rtn_u32 -; SI: s_endpgm +; GCN: ds_sub_rtn_u32 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -116,8 +117,8 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset: ; EG: LDS_SUB_RET * -; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst @@ -127,9 +128,9 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32: ; EG: LDS_SUB_RET * -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0] +; GCN: s_endpgm define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -138,9 +139,9 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset: ; EG: LDS_SUB_RET * -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst @@ -150,8 +151,8 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32: ; EG: LDS_AND_RET * -; SI: ds_and_rtn_b32 -; SI: s_endpgm +; GCN: ds_and_rtn_b32 +; GCN: s_endpgm define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -160,8 +161,8 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset: ; EG: LDS_AND_RET * -; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst @@ -171,8 +172,8 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32: ; EG: LDS_OR_RET * -; SI: ds_or_rtn_b32 -; SI: s_endpgm +; GCN: ds_or_rtn_b32 +; GCN: s_endpgm define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -181,8 +182,8 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset: ; EG: LDS_OR_RET * -; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst @@ -192,8 +193,8 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace( ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32: ; EG: LDS_XOR_RET * -; SI: ds_xor_rtn_b32 -; SI: s_endpgm +; GCN: ds_xor_rtn_b32 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -202,8 +203,8 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset: ; EG: LDS_XOR_RET * -; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst @@ -221,8 +222,8 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32: ; EG: LDS_MIN_INT_RET * -; SI: ds_min_rtn_i32 -; SI: s_endpgm +; GCN: ds_min_rtn_i32 +; GCN: s_endpgm define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -231,8 +232,8 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset: ; EG: LDS_MIN_INT_RET * -; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst @@ -242,8 +243,8 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32: ; EG: LDS_MAX_INT_RET * -; SI: ds_max_rtn_i32 -; SI: s_endpgm +; GCN: ds_max_rtn_i32 +; GCN: s_endpgm define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -252,8 +253,8 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset: ; EG: LDS_MAX_INT_RET * -; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst @@ -263,8 +264,8 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32: ; EG: LDS_MIN_UINT_RET * -; SI: ds_min_rtn_u32 -; SI: s_endpgm +; GCN: ds_min_rtn_u32 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -273,8 +274,8 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* % ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset: ; EG: LDS_MIN_UINT_RET * -; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst @@ -284,8 +285,8 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32: ; EG: LDS_MAX_UINT_RET * -; SI: ds_max_rtn_u32 -; SI: s_endpgm +; GCN: ds_max_rtn_u32 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst store i32 %result, i32 addrspace(1)* %out, align 4 @@ -294,8 +295,8 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* % ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset: ; EG: LDS_MAX_UINT_RET * -; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst @@ -304,19 +305,19 @@ define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32: -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] -; SI: s_endpgm +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset: -; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst @@ -325,19 +326,19 @@ define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32: -; SI: s_load_dword [[SPTR:s[0-9]+]], -; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] -; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0] -; SI: s_endpgm +; GCN: s_load_dword [[SPTR:s[0-9]+]], +; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; GCN: ds_add_u32 [[VPTR]], [[DATA]] [M0] +; GCN: s_endpgm define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset: -; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst @@ -346,8 +347,8 @@ define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0] -; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0] -; SI: s_endpgm +; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0] +; GCN: s_endpgm define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -357,18 +358,18 @@ define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32: -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0] +; GCN: s_endpgm define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset: -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst @@ -377,8 +378,8 @@ define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset: ; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} -; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; CIVI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b %add = add i32 %sub, 4 @@ -388,16 +389,16 @@ define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32: -; SI: ds_sub_u32 -; SI: s_endpgm +; GCN: ds_sub_u32 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset: -; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst @@ -405,18 +406,18 @@ define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32: -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] +; GCN: s_endpgm define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset: -; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 -; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1 +; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16 +; GCN: s_endpgm define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst @@ -424,16 +425,16 @@ define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32: -; SI: ds_and_b32 -; SI: s_endpgm +; GCN: ds_and_b32 +; GCN: s_endpgm define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset: -; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst @@ -441,16 +442,16 @@ define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32: -; SI: ds_or_b32 -; SI: s_endpgm +; GCN: ds_or_b32 +; GCN: s_endpgm define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset: -; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst @@ -458,16 +459,16 @@ define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32: -; SI: ds_xor_b32 -; SI: s_endpgm +; GCN: ds_xor_b32 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset: -; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst @@ -482,16 +483,16 @@ define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { ; } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32: -; SI: ds_min_i32 -; SI: s_endpgm +; GCN: ds_min_i32 +; GCN: s_endpgm define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset: -; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst @@ -499,16 +500,16 @@ define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32: -; SI: ds_max_i32 -; SI: s_endpgm +; GCN: ds_max_i32 +; GCN: s_endpgm define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset: -; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst @@ -516,16 +517,16 @@ define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32: -; SI: ds_min_u32 -; SI: s_endpgm +; GCN: ds_min_u32 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset: -; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst @@ -533,16 +534,16 @@ define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32: -; SI: ds_max_u32 -; SI: s_endpgm +; GCN: ds_max_u32 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset: -; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 -; SI: s_endpgm +; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll index cc9d3a72972..bb4e9ebc09a 100644 --- a/test/CodeGen/R600/local-atomics64.ll +++ b/test/CodeGen/R600/local-atomics64.ll @@ -1,8 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64: -; SI: ds_wrxchg_rtn_b64 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -10,8 +11,8 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* % } ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset: -; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst @@ -20,8 +21,8 @@ define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64: -; SI: ds_add_rtn_u64 -; SI: s_endpgm +; GCN: ds_add_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -29,13 +30,14 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset: -; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 -; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i64 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst @@ -44,11 +46,11 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64: -; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 -; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 -; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -56,8 +58,8 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset: -; SI: ds_inc_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_inc_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst @@ -66,8 +68,8 @@ define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64: -; SI: ds_sub_rtn_u64 -; SI: s_endpgm +; GCN: ds_sub_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -75,8 +77,8 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset: -; SI: ds_sub_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_sub_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst @@ -85,11 +87,11 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64: -; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 -; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 -; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]], -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: s_endpgm define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -97,8 +99,8 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset: -; SI: ds_dec_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_dec_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst @@ -107,8 +109,8 @@ define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64: -; SI: ds_and_rtn_b64 -; SI: s_endpgm +; GCN: ds_and_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -116,8 +118,8 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset: -; SI: ds_and_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_and_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst @@ -126,8 +128,8 @@ define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64: -; SI: ds_or_rtn_b64 -; SI: s_endpgm +; GCN: ds_or_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -135,8 +137,8 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset: -; SI: ds_or_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_or_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst @@ -145,8 +147,8 @@ define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace( } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64: -; SI: ds_xor_rtn_b64 -; SI: s_endpgm +; GCN: ds_xor_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -154,8 +156,8 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset: -; SI: ds_xor_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_xor_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst @@ -172,8 +174,8 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace ; } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64: -; SI: ds_min_rtn_i64 -; SI: s_endpgm +; GCN: ds_min_rtn_i64 +; GCN: s_endpgm define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -181,8 +183,8 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset: -; SI: ds_min_rtn_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_rtn_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst @@ -191,8 +193,8 @@ define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64: -; SI: ds_max_rtn_i64 -; SI: s_endpgm +; GCN: ds_max_rtn_i64 +; GCN: s_endpgm define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -200,8 +202,8 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset: -; SI: ds_max_rtn_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_rtn_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst @@ -210,8 +212,8 @@ define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64: -; SI: ds_min_rtn_u64 -; SI: s_endpgm +; GCN: ds_min_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -219,8 +221,8 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* % } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset: -; SI: ds_min_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst @@ -229,8 +231,8 @@ define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64: -; SI: ds_max_rtn_u64 -; SI: s_endpgm +; GCN: ds_max_rtn_u64 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst store i64 %result, i64 addrspace(1)* %out, align 8 @@ -238,8 +240,8 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* % } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset: -; SI: ds_max_rtn_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_rtn_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst @@ -248,16 +250,16 @@ define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64: -; SI: ds_wrxchg_rtn_b64 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset: -; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst @@ -265,8 +267,8 @@ define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64: -; SI: ds_add_u64 -; SI: s_endpgm +; GCN: ds_add_u64 +; GCN: s_endpgm define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst ret void @@ -274,11 +276,12 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset: ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 -; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] -; SI: s_endpgm +; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] +; GCN: s_endpgm define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i64 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst @@ -286,18 +289,18 @@ define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64: -; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 -; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 -; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: s_endpgm define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset: -; SI: ds_inc_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_inc_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst @@ -305,16 +308,16 @@ define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64: -; SI: ds_sub_u64 -; SI: s_endpgm +; GCN: ds_sub_u64 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset: -; SI: ds_sub_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_sub_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst @@ -322,18 +325,18 @@ define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64: -; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 -; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 -; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} -; SI: s_endpgm +; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1 +; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1 +; GCN: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: s_endpgm define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset: -; SI: ds_dec_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_dec_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst @@ -341,16 +344,16 @@ define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64: -; SI: ds_and_b64 -; SI: s_endpgm +; GCN: ds_and_b64 +; GCN: s_endpgm define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset: -; SI: ds_and_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_and_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst @@ -358,16 +361,16 @@ define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64: -; SI: ds_or_b64 -; SI: s_endpgm +; GCN: ds_or_b64 +; GCN: s_endpgm define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset: -; SI: ds_or_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_or_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst @@ -375,16 +378,16 @@ define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64: -; SI: ds_xor_b64 -; SI: s_endpgm +; GCN: ds_xor_b64 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset: -; SI: ds_xor_b64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_xor_b64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst @@ -399,16 +402,16 @@ define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { ; } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64: -; SI: ds_min_i64 -; SI: s_endpgm +; GCN: ds_min_i64 +; GCN: s_endpgm define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset: -; SI: ds_min_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst @@ -416,16 +419,16 @@ define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64: -; SI: ds_max_i64 -; SI: s_endpgm +; GCN: ds_max_i64 +; GCN: s_endpgm define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset: -; SI: ds_max_i64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_i64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst @@ -433,16 +436,16 @@ define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64: -; SI: ds_min_u64 -; SI: s_endpgm +; GCN: ds_min_u64 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset: -; SI: ds_min_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_min_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst @@ -450,16 +453,16 @@ define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64: -; SI: ds_max_u64 -; SI: s_endpgm +; GCN: ds_max_u64 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst ret void } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset: -; SI: ds_max_u64 {{.*}} offset:32 -; SI: s_endpgm +; GCN: ds_max_u64 {{.*}} offset:32 +; GCN: s_endpgm define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64 addrspace(3)* %ptr, i32 4 %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst diff --git a/test/CodeGen/R600/operand-spacing.ll b/test/CodeGen/R600/operand-spacing.ll index dd9f25aad7f..20420a84de6 100644 --- a/test/CodeGen/R600/operand-spacing.ll +++ b/test/CodeGen/R600/operand-spacing.ll @@ -1,13 +1,16 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s ; Make sure there isn't an extra space between the instruction name and first operands. -; SI-LABEL: {{^}}add_f32: +; GCN-LABEL: {{^}}add_f32: ; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]] -; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]] -; SI: buffer_store_dword [[RESULT]], +; VI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]] +; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]] +; GCN: buffer_store_dword [[RESULT]], define void @add_f32(float addrspace(1)* %out, float %a, float %b) { %result = fadd float %a, %b store float %result, float addrspace(1)* %out diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index 15153c69a48..b03029c9cd2 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -1,6 +1,8 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/R600/schedule-kernel-arg-loads.ll b/test/CodeGen/R600/schedule-kernel-arg-loads.ll index 215ebfccf5b..f9641fae68e 100644 --- a/test/CodeGen/R600/schedule-kernel-arg-loads.ll +++ b/test/CodeGen/R600/schedule-kernel-arg-loads.ll @@ -1,10 +1,18 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI %s ; FUNC-LABEL: {{^}}cluster_arg_loads: ; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9 ; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe +; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; VI-NEXT: s_nop 0 +; VI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-NEXT: s_nop 0 +; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; VI-NEXT: s_nop 0 +; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38 define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind { store i32 %x, i32 addrspace(1)* %out0, align 4 store i32 %y, i32 addrspace(1)* %out1, align 4 diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll index 740328a495d..8c5a9905bfd 100644 --- a/test/CodeGen/R600/scratch-buffer.ll +++ b/test/CodeGen/R600/scratch-buffer.ll @@ -1,4 +1,5 @@ ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s ; When a frame index offset is more than 12-bits, make sure we don't store ; it in mubuf's offset field. diff --git a/test/CodeGen/R600/sdivrem64.ll b/test/CodeGen/R600/sdivrem64.ll index 8dc4433dff2..a9b2b7f9df5 100644 --- a/test/CodeGen/R600/sdivrem64.ll +++ b/test/CodeGen/R600/sdivrem64.ll @@ -1,4 +1,5 @@ -;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test_sdiv: @@ -35,39 +36,40 @@ ;EG: BFE_UINT ;EG: BFE_UINT -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI: v_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN: v_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = sdiv i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -108,39 +110,40 @@ define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: AND_INT {{.*}}, 1, -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -151,10 +154,11 @@ define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: s_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 33 %2 = ashr i64 %y, 33 @@ -167,10 +171,11 @@ define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: s_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 33 %2 = ashr i64 %y, 33 @@ -186,10 +191,11 @@ define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: s_bfe_u32 -;SI: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN-NOT: s_bfe_u32 +;GCN: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 40 %2 = ashr i64 %y, 40 @@ -205,10 +211,11 @@ define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: s_bfe_u32 -;SI: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN-NOT: s_bfe_u32 +;GCN: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 40 %2 = ashr i64 %y, 40 diff --git a/test/CodeGen/R600/setcc-opt.ll b/test/CodeGen/R600/setcc-opt.ll index 5ccdd3e2ba7..93860f518fd 100644 --- a/test/CodeGen/R600/setcc-opt.ll +++ b/test/CodeGen/R600/setcc-opt.ll @@ -1,12 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: -; SI-NOT: v_cmp -; SI: v_cmp_ne_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT:buffer_store_byte [[RESULT]] -; SI-NEXT: s_endpgm +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT:buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 @@ -19,11 +20,11 @@ define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: -; SI-NOT: v_cmp -; SI: v_cmp_ne_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI-NEXT: s_endpgm +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 @@ -37,12 +38,12 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; This really folds away to false ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1: -; SI: v_cmp_eq_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc -; SI-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}} -; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, -; SI-NEXT: buffer_store_byte [[TMP]] -; SI-NEXT: s_endpgm +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc +; GCN-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, +; GCN-NEXT: buffer_store_byte [[TMP]] +; GCN-NEXT: s_endpgm define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp eq i32 %a, %b %ext = sext i1 %icmp0 to i32 @@ -53,12 +54,12 @@ define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; This really folds away to true ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1: -; SI: v_cmp_ne_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc -; SI-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}} -; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, -; SI-NEXT: buffer_store_byte [[TMP]] -; SI-NEXT: s_endpgm +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc +; GCN-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, +; GCN-NEXT: buffer_store_byte [[TMP]] +; GCN-NEXT: s_endpgm define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = sext i1 %icmp0 to i32 @@ -68,11 +69,11 @@ define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: -; SI-NOT: v_cmp -; SI: v_cmp_ne_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI-NEXT: s_endpgm +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp eq i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -82,11 +83,11 @@ define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: -; SI-NOT: v_cmp -; SI: v_cmp_ne_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI-NEXT: s_endpgm +; GCN-NOT: v_cmp +; GCN: v_cmp_ne_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -96,11 +97,11 @@ define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: -; SI-NOT: v_cmp -; SI: v_cmp_eq_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI-NEXT: s_endpgm +; GCN-NOT: v_cmp +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp eq i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -110,10 +111,10 @@ define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: -; SI-NOT: v_cmp -; SI: v_cmp_eq_i32_e32 vcc, -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT: buffer_store_byte [[RESULT]] +; GCN-NOT: v_cmp +; GCN: v_cmp_eq_i32_e32 vcc, +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -125,11 +126,13 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k: ; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] -; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}} -; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] -; SI: buffer_store_byte -; SI: s_endpgm +; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] +; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}} +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] +; GCN: buffer_store_byte +; GCN: s_endpgm define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = sext i1 %icmp0 to i32 @@ -139,12 +142,12 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}cmp_zext_k_i8max: -; SI: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 -; SI: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}} -; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI: s_endpgm +; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 +; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}} +; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]] +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { %b.ext = zext i8 %b to i32 %icmp0 = icmp ne i32 %b.ext, 255 @@ -153,11 +156,11 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { } ; FUNC-LABEL: {{^}}cmp_sext_k_neg1: -; SI: buffer_load_sbyte [[B:v[0-9]+]] -; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI: s_endpgm +; GCN: buffer_load_sbyte [[B:v[0-9]+]] +; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind { %b = load i8 addrspace(1)* %b.ptr %b.ext = sext i8 %b to i32 @@ -167,11 +170,11 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou } ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg: -; SI: s_load_dword [[B:s[0-9]+]] -; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI: s_endpgm +; GCN: s_load_dword [[B:s[0-9]+]] +; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind { %b.ext = sext i8 %b to i32 %icmp0 = icmp ne i32 %b.ext, -1 @@ -184,12 +187,12 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n ; Should do a buffer_load_sbyte and compare with -1 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg: -; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]] -; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}} -; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}} -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; SI-NEXT: buffer_store_byte [[RESULT]] -; SI: s_endpgm +; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}} +; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}} +; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind { %b.ext = sext i8 %b to i32 %icmp0 = icmp ne i32 %b.ext, -1 @@ -198,9 +201,9 @@ define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind { } ; FUNC-LABEL: {{^}}cmp_zext_k_neg1: -; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} -; SI: buffer_store_byte [[RESULT]] -; SI: s_endpgm +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind { %b.ext = zext i8 %b to i32 %icmp0 = icmp ne i32 %b.ext, -1 @@ -209,9 +212,9 @@ define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind { } ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k: -; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} -; SI: buffer_store_byte [[RESULT]] -; SI-NEXT: s_endpgm +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} +; GCN: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -221,9 +224,9 @@ define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k: -; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} -; SI: buffer_store_byte [[RESULT]] -; SI-NEXT: s_endpgm +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; GCN: buffer_store_byte [[RESULT]] +; GCN-NEXT: s_endpgm define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = zext i1 %icmp0 to i32 diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll index de60554a779..bad16688269 100644 --- a/test/CodeGen/R600/smrd.ll +++ b/test/CodeGen/R600/smrd.ll @@ -1,8 +1,10 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s ; SMRD load with an immediate offset. -; CHECK-LABEL: {{^}}smrd0: -; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 +; GCN-LABEL: {{^}}smrd0: +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 1 @@ -12,8 +14,9 @@ entry: } ; SMRD load with the largest possible immediate offset. -; CHECK-LABEL: {{^}}smrd1: -; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; GCN-LABEL: {{^}}smrd1: +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 255 @@ -23,10 +26,11 @@ entry: } ; SMRD load with an offset greater than the largest possible immediate. -; CHECK-LABEL: {{^}}smrd2: -; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 -; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] -; CHECK: s_endpgm +; GCN-LABEL: {{^}}smrd2: +; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 +; GCN: s_endpgm define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 256 @@ -36,17 +40,18 @@ entry: } ; SMRD load with a 64-bit offset -; CHECK-LABEL: {{^}}smrd3: +; GCN-LABEL: {{^}}smrd3: ; FIXME: There are too many copies here because we don't fold immediates ; through REG_SEQUENCE -; CHECK: s_mov_b32 s[[SLO:[0-9]+]], 0 ; -; CHECK: s_mov_b32 s[[SHI:[0-9]+]], 4 -; CHECK: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]] -; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]] -; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] +; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ; +; SI: s_mov_b32 s[[SHI:[0-9]+]], 4 +; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]] +; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]] +; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; FIXME: We should be able to use s_load_dword here -; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 -; CHECK: s_endpgm +; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 +; TODO: Add VI checks +; GCN: s_endpgm define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32 @@ -56,8 +61,9 @@ entry: } ; SMRD load using the load.const intrinsic with an immediate offset -; CHECK-LABEL: {{^}}smrd_load_const0: -; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04 +; GCN-LABEL: {{^}}smrd_load_const0: +; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04 +; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 @@ -69,8 +75,9 @@ main_body: ; SMRD load using the load.const intrinsic with the largest possible immediate ; offset. -; CHECK-LABEL: {{^}}smrd_load_const1: -; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; GCN-LABEL: {{^}}smrd_load_const1: +; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 @@ -82,9 +89,10 @@ main_body: ; SMRD load using the load.const intrinsic with an offset greater than the ; largets possible immediate. ; immediate offset. -; CHECK-LABEL: {{^}}smrd_load_const2: -; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 -; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; GCN-LABEL: {{^}}smrd_load_const2: +; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 +; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0 diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll index 9e7b35e8338..1f9b620b9e0 100644 --- a/test/CodeGen/R600/srl.ll +++ b/test/CodeGen/R600/srl.ll @@ -1,8 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lshr_i32: ; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1 @@ -17,6 +19,9 @@ define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { @@ -34,6 +39,11 @@ define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -49,6 +59,7 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ; FUNC-LABEL: {{^}}lshr_i64: ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} @@ -74,6 +85,9 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] ; EG-DAG: LSHL {{\*? *}}[[COMPSHA]] @@ -111,6 +125,11 @@ define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} +; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] ; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll index 44ac1217295..9f3069bdf80 100644 --- a/test/CodeGen/R600/udivrem64.ll +++ b/test/CodeGen/R600/udivrem64.ll @@ -1,5 +1,5 @@ -;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s -;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test_udiv: @@ -36,39 +36,40 @@ ;EG: BFE_UINT ;EG: BFE_UINT -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = udiv i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -109,39 +110,40 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: AND_INT {{.*}}, 1, -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI: s_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -152,10 +154,11 @@ define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: s_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 33 %2 = lshr i64 %y, 33 @@ -168,10 +171,11 @@ define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: s_bfe_u32 -;SI-NOT: v_mad_f32 -;SI-NOT: v_lshr_64 -;SI: s_endpgm +;GCN-NOT: s_bfe_u32 +;GCN-NOT: v_mad_f32 +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: s_endpgm define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 33 %2 = lshr i64 %y, 33 @@ -187,9 +191,10 @@ define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: v_lshr_64 -;SI: v_mad_f32 -;SI: s_endpgm +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: v_mad_f32 +;GCN: s_endpgm define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 40 %2 = lshr i64 %y, 40 @@ -205,9 +210,10 @@ define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -;SI-NOT: v_lshr_64 -;SI: v_mad_f32 -;SI: s_endpgm +;SI-NOT: v_lshr_b64 +;VI-NOT: v_lshrrev_b64 +;GCN: v_mad_f32 +;GCN: s_endpgm define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 40 %2 = lshr i64 %y, 40 diff --git a/test/CodeGen/R600/use-sgpr-multiple-times.ll b/test/CodeGen/R600/use-sgpr-multiple-times.ll index 97d73ba74bc..f26f30022b4 100644 --- a/test/CodeGen/R600/use-sgpr-multiple-times.ll +++ b/test/CodeGen/R600/use-sgpr-multiple-times.ll @@ -1,80 +1,87 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare float @llvm.fma.f32(float, float, float) #1 declare float @llvm.fmuladd.f32(float, float, float) #1 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1 -; SI-LABEL: {{^}}test_sgpr_use_twice_binop: -; SI: s_load_dword [[SGPR:s[0-9]+]], -; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_binop: +; GCN: s_load_dword [[SGPR:s[0-9]+]], +; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 { %dbl = fadd float %a, %a store float %dbl, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op: -; SI: s_load_dword [[SGPR:s[0-9]+]], -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op: +; GCN: s_load_dword [[SGPR:s[0-9]+]], +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 { %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b: +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b: ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] -; SI: buffer_store_dword [[RESULT]] +; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a: +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a: ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] -; SI: buffer_store_dword [[RESULT]] +; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a: +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a: ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] -; SI: buffer_store_dword [[RESULT]] +; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm: -; SI: s_load_dword [[SGPR:s[0-9]+]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm: +; GCN: s_load_dword [[SGPR:s[0-9]+]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 { %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1 store float %fma, float addrspace(1)* %out, align 4 ret void } -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a: -; SI: s_load_dword [[SGPR:s[0-9]+]] -; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a: +; GCN: s_load_dword [[SGPR:s[0-9]+]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 { %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1 store float %fma, float addrspace(1)* %out, align 4 @@ -82,10 +89,10 @@ define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, fl } ; Don't use fma since fma c, x, y is canonicalized to fma x, c, y -; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a: -; SI: s_load_dword [[SGPR:s[0-9]+]] -; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]] -; SI: buffer_store_dword [[RESULT]] +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a: +; GCN: s_load_dword [[SGPR:s[0-9]+]] +; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT]] define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 { %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1 store i32 %fma, i32 addrspace(1)* %out, align 4 diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll index 37c0e0f304c..4328e964c1b 100644 --- a/test/CodeGen/R600/work-item-intrinsics.ll +++ b/test/CodeGen/R600/work-item-intrinsics.ll @@ -1,14 +1,15 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}ngroups_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV [[VAL]], KC0[0].X -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @ngroups_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.x() #0 @@ -21,8 +22,9 @@ entry: ; EG: MOV [[VAL]], KC0[0].Y ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @ngroups_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.y() #0 @@ -35,8 +37,9 @@ entry: ; EG: MOV [[VAL]], KC0[0].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @ngroups_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.z() #0 @@ -49,8 +52,9 @@ entry: ; EG: MOV [[VAL]], KC0[0].W ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @global_size_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.x() #0 @@ -63,8 +67,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].X ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @global_size_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.y() #0 @@ -77,8 +82,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].Y ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @global_size_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.z() #0 @@ -91,8 +97,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @local_size_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.local.size.x() #0 @@ -105,8 +112,9 @@ entry: ; EG: MOV [[VAL]], KC0[1].W ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @local_size_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.local.size.y() #0 @@ -119,8 +127,9 @@ entry: ; EG: MOV [[VAL]], KC0[2].X ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @local_size_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.local.size.z() #0 @@ -133,8 +142,9 @@ entry: ; EG: MOV [[VAL]], KC0[2].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[VVAL]] +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] define void @get_work_dim (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.AMDGPU.read.workdim() #0 @@ -147,8 +157,8 @@ entry: ; kernel arguments, but this may change in the future. ; FUNC-LABEL: {{^}}tgid_x: -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4 -; SI: buffer_store_dword [[VVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4 +; GCN: buffer_store_dword [[VVAL]] define void @tgid_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.x() #0 @@ -157,8 +167,8 @@ entry: } ; FUNC-LABEL: {{^}}tgid_y: -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5 -; SI: buffer_store_dword [[VVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5 +; GCN: buffer_store_dword [[VVAL]] define void @tgid_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.y() #0 @@ -167,8 +177,8 @@ entry: } ; FUNC-LABEL: {{^}}tgid_z: -; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6 -; SI: buffer_store_dword [[VVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6 +; GCN: buffer_store_dword [[VVAL]] define void @tgid_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.z() #0 @@ -177,7 +187,7 @@ entry: } ; FUNC-LABEL: {{^}}tidig_x: -; SI: buffer_store_dword v0 +; GCN: buffer_store_dword v0 define void @tidig_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #0 @@ -186,7 +196,7 @@ entry: } ; FUNC-LABEL: {{^}}tidig_y: -; SI: buffer_store_dword v1 +; GCN: buffer_store_dword v1 define void @tidig_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.y() #0 @@ -195,7 +205,7 @@ entry: } ; FUNC-LABEL: {{^}}tidig_z: -; SI: buffer_store_dword v2 +; GCN: buffer_store_dword v2 define void @tidig_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.z() #0 -- 2.11.0