From d0edb1f758970506b44883154088bc83f8d1da35 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 24 Sep 2015 19:52:27 +0000 Subject: [PATCH] AMDGPU: Add s_dcache_* instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248533 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAMDGPU.td | 19 ++++++++++ lib/Target/AMDGPU/CIInstructions.td | 9 +++-- lib/Target/AMDGPU/SIInsertWaits.cpp | 20 +++++++---- lib/Target/AMDGPU/SIInstrInfo.td | 40 +++++++++++++++++++--- lib/Target/AMDGPU/SIInstructions.td | 4 ++- lib/Target/AMDGPU/VIInstructions.td | 10 ++++++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll | 29 ++++++++++++++++ .../CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll | 29 ++++++++++++++++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll | 27 +++++++++++++++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll | 27 +++++++++++++++ test/MC/AMDGPU/smem.s | 11 ++++++ test/MC/AMDGPU/smrd.s | 7 ++++ 12 files changed, 218 insertions(+), 14 deletions(-) create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll create mode 100644 test/MC/AMDGPU/smem.s diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index c197a663001..12943a2bde1 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -100,4 +100,23 @@ def int_amdgcn_buffer_wbinvl1 : GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, Intrinsic<[], [], []>; +def int_amdgcn_s_dcache_inv : + GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, + Intrinsic<[], [], []>; + +// CI+ +def int_amdgcn_s_dcache_inv_vol : + GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, + Intrinsic<[], [], []>; + +// VI +def int_amdgcn_s_dcache_wb : + GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, + Intrinsic<[], [], []>; + +// VI +def int_amdgcn_s_dcache_wb_vol : + GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, + Intrinsic<[], [], []>; + } diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td index 2bb740beebb..7b8cb125dad 100644 --- a/lib/Target/AMDGPU/CIInstructions.td +++ b/lib/Target/AMDGPU/CIInstructions.td @@ -9,12 +9,10 @@ // Instruction definitions for CI and newer. //===----------------------------------------------------------------------===// // Remaining instructions: -// FLAT_* // S_CBRANCH_CDBGUSER // S_CBRANCH_CDBGSYS // S_CBRANCH_CDBGSYS_OR_USER // S_CBRANCH_CDBGSYS_AND_USER -// S_DCACHE_INV_VOL // DS_NOP // DS_GWS_SEMA_RELEASE_ALL // DS_WRAP_RTN_B32 @@ -100,6 +98,13 @@ defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f // DS_CONDXCHG32_RTN_B128 //===----------------------------------------------------------------------===// +// SMRD Instructions +//===----------------------------------------------------------------------===// + +defm S_DCACHE_INV_VOL : SMRD_Inval , + "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; + +//===----------------------------------------------------------------------===// // MUBUF Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 2379b1fcf6a..b47c09bc9b6 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -140,7 +140,7 @@ FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - Counters Result; + Counters Result = { { 0, 0, 0 } }; Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); @@ -153,13 +153,21 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { if (TII->isSMRD(MI.getOpcode())) { - MachineOperand &Op = MI.getOperand(0); - assert(Op.isReg() && "First LGKM operand must be a register!"); + if (MI.getNumOperands() != 0) { + MachineOperand &Op = MI.getOperand(0); + assert(Op.isReg() && "First LGKM operand must be a register!"); - unsigned Reg = Op.getReg(); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; + unsigned Reg = Op.getReg(); + // XXX - What if this is a write into a super register? + unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + Result.Named.LGKM = Size > 4 ? 2 : 1; + } else { + // s_dcache_inv etc. do not have a a destination register. Assume we + // want a wait on these. + // XXX - What is the right value? + Result.Named.LGKM = 1; + } } else { // DS Result.Named.LGKM = 1; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 4f478104564..674f5b70836 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -73,9 +73,12 @@ class sopk si, bits<5> vi = si> { } // Specify an SMRD opcode for SI and SMEM opcode for VI -class smrd si, bits<5> vi = si> { - field bits<5> SI = si; - field bits<8> VI = { 0, 0, 0, vi }; + +// FIXME: This should really be bits<5> si, Tablegen crashes if +// parameter default value is other parameter with different bit size +class smrd si, bits<8> vi = si> { + field bits<5> SI = si{4-0}; + field bits<8> VI = vi; } // Execpt for the NONE field, this must be kept in sync with the SISubtarget enum @@ -899,8 +902,8 @@ class SMRD_Real_si op, string opName, bit imm, dag outs, dag ins, } class SMRD_Real_vi op, string opName, bit imm, dag outs, dag ins, - string asm> : - SMRD , + string asm, list pattern = []> : + SMRD , SMEMe_vi , SIMCInstr { let AssemblerPredicates = [isVI]; @@ -920,6 +923,33 @@ multiclass SMRD_m { + let hasSideEffects = 1, mayStore = 1 in { + def "" : SMRD_Pseudo ; + + let sbase = 0, offset = 0 in { + let sdst = 0 in { + def _si : SMRD_Real_si ; + } + + let glc = 0, sdata = 0 in { + def _vi : SMRD_Real_vi ; + } + } + } +} + +class SMEM_Inval op, string opName, SDPatternOperator node> : + SMRD_Real_vi { + let hasSideEffects = 1; + let mayStore = 1; + let sbase = 0; + let sdata = 0; + let glc = 0; + let offset = 0; +} + multiclass SMRD_Helper { defm _IMM : SMRD_m < diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 1043890d85f..796e21fdf6d 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -93,7 +93,9 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < } // mayLoad = 1 //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; -//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>; + +defm S_DCACHE_INV : SMRD_Inval , "s_dcache_inv", + int_amdgcn_s_dcache_inv>; //===----------------------------------------------------------------------===// // SOP1 Instructions diff --git a/lib/Target/AMDGPU/VIInstructions.td b/lib/Target/AMDGPU/VIInstructions.td index aca46732adb..cd7148161d4 100644 --- a/lib/Target/AMDGPU/VIInstructions.td +++ b/lib/Target/AMDGPU/VIInstructions.td @@ -89,6 +89,16 @@ def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; +//===----------------------------------------------------------------------===// +// SMEM Instructions +//===----------------------------------------------------------------------===// + +def S_DCACHE_WB : SMEM_Inval <0x21, + "s_dcache_wb", int_amdgcn_s_dcache_wb>; + +def S_DCACHE_WB_VOL : SMEM_Inval <0x23, + "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; + } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll new file mode 100644 index 00000000000..f8af67c17ec --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.inv() #0 + +; GCN-LABEL: {{^}}test_s_dcache_inv: +; GCN-NEXT: ; BB#0: +; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7] +; VI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0x80,0xc0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_s_dcache_inv() #0 { + call void @llvm.amdgcn.s.dcache.inv() + ret void +} + +; GCN-LABEL: {{^}}test_s_dcache_inv_insert_wait: +; GCN-NEXT: ; BB#0: +; GCN-NEXT: s_dcache_inv +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_inv_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.inv() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll new file mode 100644 index 00000000000..a8502a7c503 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.inv.vol() #0 + +; GCN-LABEL: {{^}}test_s_dcache_inv_vol: +; GCN-NEXT: ; BB#0: +; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] +; VI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x88,0xc0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_s_dcache_inv_vol() #0 { + call void @llvm.amdgcn.s.dcache.inv.vol() + ret void +} + +; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait: +; GCN-NEXT: ; BB#0: +; GCN-NEXT: s_dcache_inv_vol +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_inv_vol_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.inv.vol() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll new file mode 100644 index 00000000000..f9ae09b391a --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.wb() #0 + +; VI-LABEL: {{^}}test_s_dcache_wb: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] +; VI-NEXT: s_endpgm +define void @test_s_dcache_wb() #0 { + call void @llvm.amdgcn.s.dcache.wb() + ret void +} + +; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb +; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_wb_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.wb() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll new file mode 100644 index 00000000000..d9145458a1f --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.wb.vol() #0 + +; VI-LABEL: {{^}}test_s_dcache_wb_vol: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] +; VI-NEXT: s_endpgm +define void @test_s_dcache_wb_vol() #0 { + call void @llvm.amdgcn.s.dcache.wb.vol() + ret void +} + +; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb_vol +; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_wb_vol_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.wb.vol() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/MC/AMDGPU/smem.s b/test/MC/AMDGPU/smem.s new file mode 100644 index 00000000000..8fa964ca8d1 --- /dev/null +++ b/test/MC/AMDGPU/smem.s @@ -0,0 +1,11 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSI %s + +s_dcache_wb +; VI: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] +; NOSI: error: instruction not supported on this GPU + +s_dcache_wb_vol +; VI: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] +; NOSI: error: instruction not supported on this GPU diff --git a/test/MC/AMDGPU/smrd.s b/test/MC/AMDGPU/smrd.s index 2ef73a11504..9b8471b19d2 100644 --- a/test/MC/AMDGPU/smrd.s +++ b/test/MC/AMDGPU/smrd.s @@ -51,3 +51,10 @@ s_load_dwordx16 s[16:31], s[2:3], 1 s_load_dwordx16 s[16:31], s[2:3], s4 // GCN: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1] + +s_dcache_inv +// GCN: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7] + +s_dcache_inv_vol +// CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] +// NOSI: error: instruction not supported on this GPU -- 2.11.0