From 3cf69b228404791cf15231321b6a18b5701be0a6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 18 Mar 2013 11:30:57 -0700 Subject: [PATCH] i965/fs: Bake regs_written into the IR instead of recomputing it later. For sampler messages, it depends on the target gen, and on gen4 SIMD16-sampler-on-SIMD8-execution we were returning 4 instead of 8 like we should. Reviewed-by: Kenneth Graunke NOTE: This is a candidate for the 9.1 branch. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 29 ++++++++-------------- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 6 ++--- .../drivers/dri/i965/brw_fs_live_variables.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 +++--- .../dri/i965/brw_fs_schedule_instructions.cpp | 6 ++--- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ++++-- 7 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 703c3c5d8b4..84e88b1aa91 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -60,6 +60,9 @@ fs_inst::init() this->src[0] = reg_undef; this->src[1] = reg_undef; this->src[2] = reg_undef; + + /* This will be the case for almost all instructions. */ + this->regs_written = 1; } fs_inst::fs_inst() @@ -254,6 +257,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type); inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, vec4_result, surf_index, vec4_offset); + inst->regs_written = 4; instructions.push_tail(inst); vec4_result.reg_offset += const_offset & 3; @@ -329,26 +333,13 @@ fs_inst::equals(fs_inst *inst) offset == inst->offset); } -int -fs_inst::regs_written() -{ - if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7) - return 4; - - /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2, - * but we don't currently use them...nor do we have an opcode for them. - */ - - return 1; -} - bool fs_inst::overwrites_reg(const fs_reg ®) { return (reg.file == dst.file && reg.reg == dst.reg && reg.reg_offset >= dst.reg_offset && - reg.reg_offset < dst.reg_offset + regs_written()); + reg.reg_offset < dst.reg_offset + regs_written); } bool @@ -1376,7 +1367,7 @@ fs_visitor::split_virtual_grfs() /* If there's a SEND message that requires contiguous destination * registers, no splitting is allowed. */ - if (inst->regs_written() > 1) { + if (inst->regs_written > 1) { split_grf[inst->dst.reg] = false; } @@ -2104,7 +2095,7 @@ fs_visitor::compute_to_mrf() /* Things returning more than one register would need us to * understand coalescing out more than one MOV at a time. */ - if (scan_inst->regs_written() > 1) + if (scan_inst->regs_written > 1) break; /* SEND instructions can't have MRF as a destination. */ @@ -2321,7 +2312,7 @@ void fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) { int reg_size = dispatch_width / 8; - int write_len = inst->regs_written() * reg_size; + int write_len = inst->regs_written * reg_size; int first_write_grf = inst->dst.reg; bool needs_dep[BRW_MAX_MRF]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -2361,7 +2352,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) * dependency has more latency than a MOV. */ if (scan_inst->dst.file == GRF) { - for (int i = 0; i < scan_inst->regs_written(); i++) { + for (int i = 0; i < scan_inst->regs_written; i++) { int reg = scan_inst->dst.reg + i * reg_size; if (reg >= first_write_grf && @@ -2400,7 +2391,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) void fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst) { - int write_len = inst->regs_written() * dispatch_width / 8; + int write_len = inst->regs_written * dispatch_width / 8; int first_write_grf = inst->dst.reg; bool needs_dep[BRW_MAX_MRF]; assert(write_len < (int)sizeof(needs_dep) - 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 06106c31bf6..0940489ed49 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -174,7 +174,6 @@ public: fs_reg src0, fs_reg src1,fs_reg src2); bool equals(fs_inst *inst); - int regs_written(); bool overwrites_reg(const fs_reg ®); bool is_tex(); bool is_math(); @@ -192,6 +191,7 @@ public: uint8_t flag_subreg; int mlen; /**< SEND message length */ + int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ uint32_t texture_offset; /**< Texture offset bitfield */ int sampler; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 5a50d45ddc9..8a8616d39c7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -130,7 +130,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp) { - int written = entry->generator->regs_written(); + int written = entry->generator->regs_written; fs_reg orig_dst = entry->generator->dst; fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), @@ -150,8 +150,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) } /* dest <- temp */ - int written = inst->regs_written(); - assert(written == entry->generator->regs_written()); + int written = inst->regs_written; + assert(written == entry->generator->regs_written); assert(inst->dst.type == entry->tmp.type); fs_reg dst = inst->dst; fs_reg tmp = entry->tmp; diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 63af1480d56..373aa2d5e3f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -77,7 +77,7 @@ fs_live_variables::setup_def_use() * variable, and thus qualify for being in def[]. */ if (inst->dst.file == GRF && - inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] && + inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] && !inst->predicate && !inst->force_uncompressed && !inst->force_sechalf) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b8936dcfe7d..4ee7bbc6659 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -553,7 +553,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) } if (inst->dst.file == GRF) { - spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale; + spill_costs[inst->dst.reg] += inst->regs_written * loop_scale; if (inst->dst.smear >= 0) { no_spill[inst->dst.reg] = true; @@ -622,7 +622,7 @@ fs_visitor::spill_reg(int spill_reg) inst->dst.reg == spill_reg) { int subset_spill_offset = (spill_offset + REG_SIZE * inst->dst.reg_offset); - inst->dst.reg = virtual_grf_alloc(inst->regs_written()); + inst->dst.reg = virtual_grf_alloc(inst->regs_written); inst->dst.reg_offset = 0; /* If our write is going to affect just part of the @@ -631,7 +631,7 @@ fs_visitor::spill_reg(int spill_reg) */ if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) { fs_reg unspill_reg = inst->dst; - for (int chan = 0; chan < inst->regs_written(); chan++) { + for (int chan = 0; chan < inst->regs_written; chan++) { emit_unspill(inst, unspill_reg, subset_spill_offset + REG_SIZE * chan); unspill_reg.reg_offset++; @@ -644,7 +644,7 @@ fs_visitor::spill_reg(int spill_reg) spill_src.negate = false; spill_src.smear = -1; - for (int chan = 0; chan < inst->regs_written(); chan++) { + for (int chan = 0; chan < inst->regs_written; chan++) { fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL, reg_null_f, spill_src); spill_src.reg_offset++; diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 997341b153c..901a3621047 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -556,7 +556,7 @@ instruction_scheduler::calculate_deps() /* write-after-write deps. */ if (inst->dst.file == GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written() * reg_width; r++) { + for (int r = 0; r < inst->regs_written * reg_width; r++) { add_dep(last_grf_write[inst->dst.reg + r], n); last_grf_write[inst->dst.reg + r] = n; } @@ -663,7 +663,7 @@ instruction_scheduler::calculate_deps() */ if (inst->dst.file == GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written() * reg_width; r++) + for (int r = 0; r < inst->regs_written * reg_width; r++) last_grf_write[inst->dst.reg + r] = n; } else { last_grf_write[inst->dst.reg] = n; @@ -762,7 +762,7 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) schedule_node *n = (schedule_node *)node; chosen = n; - if (chosen->inst->regs_written() <= 1) + if (chosen->inst->regs_written <= 1) break; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 5d11e6719a6..d54d13410ae 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -261,7 +261,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir) * src, generate a saturated MOV */ fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); - if (!modify || modify->regs_written() != 1) { + if (!modify || modify->regs_written != 1) { this->result = fs_reg(this, ir->type); fs_inst *inst = emit(MOV(this->result, src)); inst->saturate = true; @@ -746,7 +746,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, /* If last_rhs_inst wrote a different number of components than our LHS, * we can't safely rewrite it. */ - if (virtual_grf_sizes[dst.reg] != modify->regs_written()) + if (virtual_grf_sizes[dst.reg] != modify->regs_written) return false; /* Success! Rewrite the instruction. */ @@ -948,6 +948,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_present = true; + inst->regs_written = simd16 ? 8 : 4; if (simd16) { for (int i = 0; i < 4; i++) { @@ -1089,6 +1090,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_present = header_present; + inst->regs_written = 4; if (mlen > 11) { fail("Message length >11 disallowed by hardware\n"); @@ -1244,6 +1246,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->base_mrf = base_mrf; inst->mlen = mlen; inst->header_present = header_present; + inst->regs_written = 4; if (mlen > 11) { fail("Message length >11 disallowed by hardware\n"); -- 2.11.0