From 4c7206bafdd7bde7617e14840812e43459682718 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sun, 8 Dec 2013 04:57:08 +0100 Subject: [PATCH] i965/fs: Add support for sub-register byte offsets to the FS back-end IR. It would be nice if we could have a single 'reg_offset' field expressed in bytes that would serve the purpose of both, but the semantics of 'reg_offset' are quite complex currently (it's measured in units of one, eight or sixteen dwords depending on the register file and the dispatch width) and changing it to bytes would be a very intrusive change at this stage. Add a separate 'subreg_offset' field for now. Reviewed-by: Matt Turner Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_fs.cpp | 25 +++++++++++++++------- src/mesa/drivers/dri/i965/brw_fs.h | 14 ++++++++++++ .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 7 ++++-- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 20 ++++++++--------- 5 files changed, 46 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 773ecac2167..d82e3adfd6a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -432,6 +432,7 @@ fs_reg::equals(const fs_reg &r) const return (file == r.file && reg == r.reg && reg_offset == r.reg_offset && + subreg_offset == r.subreg_offset && type == r.type && negate == r.negate && abs == r.abs && @@ -1407,7 +1408,9 @@ fs_visitor::assign_curb_setup() constant_nr % 8); inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); + inst->src[i].fixed_hw_reg = byte_offset( + retype(brw_reg, inst->src[i].type), + inst->src[i].subreg_offset); } } } @@ -2473,7 +2476,8 @@ fs_visitor::compute_to_mrf() inst->is_partial_write() || inst->dst.file != MRF || inst->src[0].file != GRF || inst->dst.type != inst->src[0].type || - inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1) + inst->src[0].abs || inst->src[0].negate || + inst->src[0].smear != -1 || inst->src[0].subreg_offset) continue; /* Work out which hardware MRF registers are written by this @@ -3008,8 +3012,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst) switch (inst->dst.file) { case GRF: printf("vgrf%d", inst->dst.reg); - if (virtual_grf_sizes[inst->dst.reg] != 1) - printf("+%d", inst->dst.reg_offset); + if (virtual_grf_sizes[inst->dst.reg] != 1 || + inst->dst.subreg_offset) + printf("+%d.%d", inst->dst.reg_offset, inst->dst.subreg_offset); break; case MRF: printf("m%d", inst->dst.reg); @@ -3061,16 +3066,20 @@ fs_visitor::dump_instruction(backend_instruction *be_inst) switch (inst->src[i].file) { case GRF: printf("vgrf%d", inst->src[i].reg); - if (virtual_grf_sizes[inst->src[i].reg] != 1) - printf("+%d", inst->src[i].reg_offset); + if (virtual_grf_sizes[inst->src[i].reg] != 1 || + inst->src[i].subreg_offset) + printf("+%d.%d", inst->src[i].reg_offset, + inst->src[i].subreg_offset); break; case MRF: printf("***m%d***", inst->src[i].reg); break; case UNIFORM: printf("u%d", inst->src[i].reg); - if (virtual_grf_sizes[inst->src[i].reg] != 1) - printf(".%d", inst->src[i].reg_offset); + if (virtual_grf_sizes[inst->src[i].reg] != 1 || + inst->src[i].subreg_offset) + printf("+%d.%d", inst->src[i].reg_offset, + inst->src[i].subreg_offset); break; case BAD_FILE: printf("(null)"); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 3d668b9e471..2ec2ee73f43 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -114,9 +114,23 @@ public: float f; } imm; + /** + * Offset in bytes from the start of the register. Values up to a + * backend_reg::reg_offset unit are valid. + */ + int subreg_offset; + fs_reg *reladdr; }; +static inline fs_reg +byte_offset(fs_reg reg, unsigned delta) +{ + assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM)); + reg.subreg_offset += delta; + return reg; +} + static const fs_reg reg_undef; static const fs_reg reg_null_f(retype(brw_null_reg(), BRW_REGISTER_TYPE_F)); static const fs_reg reg_null_d(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index fc47d22a182..5cf019cd903 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -284,7 +284,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (inst->src[arg].file != entry->dst.file || inst->src[arg].reg != entry->dst.reg || - inst->src[arg].reg_offset != entry->dst.reg_offset) { + inst->src[arg].reg_offset != entry->dst.reg_offset || + inst->src[arg].subreg_offset != entry->dst.subreg_offset) { return false; } @@ -308,6 +309,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->src[arg].reg_offset = entry->src.reg_offset; if (entry->src.smear != -1) inst->src[arg].smear = entry->src.smear; + inst->src[arg].subreg_offset = entry->src.subreg_offset; if (!inst->src[arg].abs) { inst->src[arg].abs = entry->src.abs; @@ -329,7 +331,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) for (int i = 2; i >= 0; i--) { if (inst->src[i].file != entry->dst.file || inst->src[i].reg != entry->dst.reg || - inst->src[i].reg_offset != entry->dst.reg_offset) + inst->src[i].reg_offset != entry->dst.reg_offset || + inst->src[i].subreg_offset != entry->dst.subreg_offset) continue; /* Don't bother with cases that should have been taken care of by the diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 4820b042876..0d7e8c5d4d0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1038,6 +1038,7 @@ brw_reg_from_fs_reg(fs_reg *reg) brw_reg = retype(brw_reg, reg->type); if (reg->sechalf) brw_reg = sechalf(brw_reg); + brw_reg = byte_offset(brw_reg, reg->subreg_offset); break; case IMM: switch (reg->type) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 2494b117e57..fb05aec43a7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -686,11 +686,12 @@ fs_visitor::spill_reg(int spill_reg) int regs_read = inst->regs_read(this, i); int subset_spill_offset = (spill_offset + reg_size * inst->src[i].reg_offset); + fs_reg unspill_dst(GRF, virtual_grf_alloc(regs_read)); - inst->src[i].reg = virtual_grf_alloc(regs_read); + inst->src[i].reg = unspill_dst.reg; inst->src[i].reg_offset = 0; - emit_unspill(inst, inst->src[i], subset_spill_offset, regs_read); + emit_unspill(inst, unspill_dst, subset_spill_offset, regs_read); } } @@ -698,24 +699,21 @@ fs_visitor::spill_reg(int spill_reg) inst->dst.reg == spill_reg) { int subset_spill_offset = (spill_offset + reg_size * inst->dst.reg_offset); - inst->dst.reg = virtual_grf_alloc(inst->regs_written); + fs_reg spill_src(GRF, virtual_grf_alloc(inst->regs_written)); + + inst->dst.reg = spill_src.reg; inst->dst.reg_offset = 0; /* If our write is going to affect just part of the * inst->regs_written(), then we need to unspill the destination * since we write back out all of the regs_written(). */ - if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) { - emit_unspill(inst, inst->dst, subset_spill_offset, + if (inst->predicate || inst->force_uncompressed || + inst->force_sechalf || inst->dst.subreg_offset) { + emit_unspill(inst, spill_src, subset_spill_offset, inst->regs_written); } - fs_reg spill_src = inst->dst; - spill_src.reg_offset = 0; - spill_src.abs = false; - spill_src.negate = false; - spill_src.smear = -1; - for (int chan = 0; chan < inst->regs_written; chan++) { fs_inst *spill_inst = new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_WRITE, -- 2.11.0