From 5e621cb9fef7eada5a3c131d27f5b0b142658758 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 11 Sep 2013 14:03:13 -0700 Subject: [PATCH] i965/gen7: Implement code generation for untyped surface read instructions. --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_eu.h | 8 ++++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 56 ++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 18 ++++++++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++ src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 19 ++++++++ 9 files changed, 112 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 1cadf6c2c3e..fbc787a7be1 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -776,6 +776,7 @@ enum opcode { SHADER_OPCODE_SHADER_TIME_ADD, SHADER_OPCODE_UNTYPED_ATOMIC, + SHADER_OPCODE_UNTYPED_SURFACE_READ, FS_OPCODE_DDX, FS_OPCODE_DDY, diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 66b7ba7720b..1a448d055c3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -433,6 +433,14 @@ brw_untyped_atomic(struct brw_compile *p, GLuint msg_length, GLuint response_length); +void +brw_untyped_surface_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + GLuint bind_table_index, + GLuint msg_length, + GLuint response_length); + /*********************************************************************** * brw_eu_util.c: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index d1d40f7bee3..f6085192c09 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2547,6 +2547,62 @@ brw_untyped_atomic(struct brw_compile *p, insn->header.access_mode == BRW_ALIGN_1); } +static void +brw_set_dp_untyped_surface_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint bind_table_index, + GLuint msg_length, + GLuint response_length, + bool header_present) +{ + const unsigned dispatch_width = + (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8); + const unsigned num_channels = response_length / (dispatch_width / 8); + + if (p->brw->is_haswell) { + brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, + msg_length, response_length, + header_present, false); + + insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ; + } else { + brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, + msg_length, response_length, + header_present, false); + + insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (dispatch_width == 16) + insn->bits3.ud |= 1 << 12; /* SIMD16 mode */ + else + insn->bits3.ud |= 2 << 12; /* SIMD8 mode */ + } + + insn->bits3.gen7_dp.binding_table_index = bind_table_index; + + /* Set mask of 32-bit channels to drop. */ + insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8; +} + +void +brw_untyped_surface_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + GLuint bind_table_index, + GLuint msg_length, + GLuint response_length) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD)); + brw_set_dp_untyped_surface_read_message( + p, insn, bind_table_index, msg_length, response_length, + insn->header.access_mode == BRW_ALIGN_1); +} + /** * This instruction is generated as a single-channel align1 instruction by * both the VS and FS stages when using INTEL_DEBUG=shader_time. diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f3ceaad97b7..76d8a2997ab 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -773,6 +773,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case FS_OPCODE_SPILL: return 2; case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: return 0; default: assert(!"not reached"); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index bc67637f315..5b783137ddf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -559,6 +559,10 @@ private: struct brw_reg atomic_op, struct brw_reg surf_index); + void generate_untyped_surface_read(fs_inst *inst, + struct brw_reg dst, + struct brw_reg surf_index); + void mark_surface_used(unsigned surf_index); void patch_discard_jumps_to_fb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index f639d7e185b..ef858370dbb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1207,6 +1207,20 @@ fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst, } void +fs_generator::generate_untyped_surface_read(fs_inst *inst, struct brw_reg dst, + struct brw_reg surf_index) +{ + assert(surf_index.file == BRW_IMMEDIATE_VALUE && + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf), + surf_index.dw1.ud, + inst->mlen, dispatch_width / 8); + + mark_surface_used(surf_index.dw1.ud); +} + +void fs_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = p->next_insn_offset; @@ -1609,6 +1623,10 @@ fs_generator::generate_code(exec_list *instructions) generate_untyped_atomic(inst, dst, src[0], src[1]); break; + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + generate_untyped_surface_read(inst, dst, src[0]); + break; + case FS_OPCODE_SET_SIMD4X2_OFFSET: generate_set_simd4x2_offset(inst, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index dbdf442bbf3..c1cfefa8687 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -277,6 +277,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) case SHADER_OPCODE_TG4_OFFSET: return inst->header_present ? 1 : 0; case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: return 0; default: assert(!"not reached"); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5ba32b64530..a479646c69b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -622,6 +622,10 @@ private: struct brw_reg atomic_op, struct brw_reg surf_index); + void generate_untyped_surface_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index); + void mark_surface_used(unsigned surf_index); struct brw_context *brw; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index e8e9f072d14..426f78c2abe 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -872,6 +872,21 @@ vec4_generator::generate_untyped_atomic(vec4_instruction *inst, mark_surface_used(surf_index.dw1.ud); } +void +vec4_generator::generate_untyped_surface_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index) +{ + assert(surf_index.file == BRW_IMMEDIATE_VALUE && + surf_index.type == BRW_REGISTER_TYPE_UD); + + brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf), + surf_index.dw1.ud, + inst->mlen, 1); + + mark_surface_used(surf_index.dw1.ud); +} + /** * Generate assembly for a Vec4 IR instruction. * @@ -1188,6 +1203,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, generate_untyped_atomic(inst, dst, src[0], src[1]); break; + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + generate_untyped_surface_read(inst, dst, src[0]); + break; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(inst, dst); break; -- 2.11.0