From 1c18e5b909377e768aef8039d47d8a7a92a15323 Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Wed, 13 Jan 2016 17:23:20 +0000 Subject: [PATCH] AMDGPU/SI: Fix a GPU hang with POS_W_FLOAT enabled Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16037 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257625 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIISelLowering.cpp | 9 ++++++++- test/CodeGen/AMDGPU/ret.ll | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 4206e6fb869..544867513d9 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -646,8 +646,15 @@ SDValue SITargetLowering::LowerFormalArguments( // based on run-time states. Since we can't know what the final PSInputEna // will look like, so we shouldn't do anything here and the user should take // responsibility for the correct programming. + // + // Otherwise, the following restrictions apply: + // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. + // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be + // enabled too. if (Info->getShaderType() == ShaderType::PIXEL && - (Info->getPSInputAddr() & 0x7F) == 0) { + ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11)))) { CCInfo.AllocateReg(AMDGPU::VGPR0); CCInfo.AllocateReg(AMDGPU::VGPR1); Info->markPSInputAllocated(0); diff --git a/test/CodeGen/AMDGPU/ret.ll b/test/CodeGen/AMDGPU/ret.ll index 26817729355..2bd9fd6858f 100644 --- a/test/CodeGen/AMDGPU/ret.ll +++ b/test/CodeGen/AMDGPU/ret.ll @@ -77,6 +77,23 @@ define float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 in ; GCN: .long 165580 +; GCN-NEXT: .long 2081 +; GCN-NEXT: .long 165584 +; GCN-NEXT: .long 2081 +; GCN-LABEL: {{^}}ps_input_ena_pos_w: +; GCN-DAG: v_mov_b32_e32 v0, v4 +; GCN-DAG: v_mov_b32_e32 v1, v2 +; GCN: v_mov_b32_e32 v2, v3 +; GCN-NOT: s_endpgm +define {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 { + %f = bitcast <2 x i32> %8 to <2 x float> + %s = insertvalue {float, <2 x float>} undef, float %14, 0 + %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1 + ret {float, <2 x float>} %s1 +} + + +; GCN: .long 165580 ; GCN-NEXT: .long 562 ; GCN-NEXT: .long 165584 ; GCN-NEXT: .long 563 -- 2.11.0