From 47b72990fedeea6358be841f7b8f679a06c33ed9 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 17 Aug 2016 06:26:01 -0700 Subject: [PATCH 1/1] i965: Fix execution size of scalar TCS barrier setup code. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Previously, the scalar TCS backend was generating: mov(8) g17<1>UD 0x00000000UD { align1 WE_all 1Q compacted }; and(8) g17.2<1>UD g0.2<0,1,0>UD 0x0001e000UD { align1 WE_all 1Q }; shl(8) g17.2<1>UD g17.2<8,8,1>UD 0x0000000bUD { align1 WE_all 1Q }; or(8) g17.2<1>UD g17.2<8,8,1>UD 0x00008200UD { align1 WE_all 1Q }; send(8) null<1>UW g17<8,8,1>UD gateway (barrier msg) mlen 1 rlen 0 { align1 WE_all 1Q }; This is rubbish - g17.2<8,8,1>UD spans two registers, and is an illegal region. Not to mention it clobbers 8 channels of data when we only wanted to touch m0.2. Instead, we want: mov(8) g17<1>UD 0x00000000UD { align1 WE_all 1Q compacted }; and(1) g17.2<1>UD g0.2<0,1,0>UD 0x0001e000UD { align1 WE_all }; shl(1) g17.2<1>UD g17.2<0,1,0>UD 0x0000000bUD { align1 WE_all }; or(1) g17.2<1>UD g17.2<0,1,0>UD 0x00008200UD { align1 WE_all }; send(8) null<1>UW g17<8,8,1>UD gateway (barrier msg) mlen 1 rlen 0 { align1 WE_all 1Q }; Using component() accomplishes this. Fixes GL44-CTS.tessellation_shader.tessellation_shader_tc_barriers. barrier_guarded_read_write_calls on Skylake. Probably fixes other barrier issues on Gen8+. v2: Use a group(1, 0) builder so inst->exec_size is set correctly (thanks to Francisco Jerez for catching that it was incorrect). Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Kenneth Graunke Reviewed-by: Alejandro Piñeiro [v1] Reviewed-by: Francisco Jerez (cherry picked from commit 159f0377556c45630cdc0721b193f34217a329b0) --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 096c9e03318..f5b8078cf7d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2322,22 +2322,22 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, break; fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t)); + fs_reg m0_2 = component(m0, 2); - const fs_builder fwa_bld = bld.exec_all(); + const fs_builder chanbld = bld.exec_all().group(1, 0); /* Zero the message header */ - fwa_bld.MOV(m0, brw_imm_ud(0u)); + bld.exec_all().MOV(m0, brw_imm_ud(0u)); /* Copy "Barrier ID" from r0.2, bits 16:13 */ - fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), + chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(INTEL_MASK(16, 13))); /* Shift it up to bits 27:24. */ - fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11)); + chanbld.SHL(m0_2, m0_2, brw_imm_ud(11)); /* Set the Barrier Count and the enable bit */ - fwa_bld.OR(m0_2, m0_2, + chanbld.OR(m0_2, m0_2, brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15))); bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0); -- 2.11.0