From 9237f0ea8d176fb5dcd41868dcc723fe34f6b1f3 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 1 Oct 2012 15:28:56 -0700 Subject: [PATCH] i965/vs: Implement register spilling. To validate this code, I ran piglit -t vs quick.tests with the "go spill everything" debugging code enabled. There was only one regression: glsl-vs-unroll-explosion simply ran out of registers. This should be fine in the real world, since no one actually spills every single register. NOTE: This is a candidate for the 9.0 branch. Even if it proves to have bugs, it's likely better than simply failing to compile. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_vec4.h | 3 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 14 +++ .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 128 ++++++++++++++++++++- 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index deac55d6f01..407e227a51d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -314,6 +314,9 @@ public: void setup_payload(); void reg_allocate_trivial(); void reg_allocate(); + void evaluate_spill_costs(float *spill_costs, bool *no_spill); + int choose_spill_reg(struct ra_graph *g); + void spill_reg(int spill_reg); void move_grf_array_access_to_scratch(); void move_uniform_array_access_to_pull_constants(); void move_push_constants_to_pull_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 27758abeb53..22671f3f6ce 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -809,6 +809,20 @@ vec4_visitor::run() return false; setup_payload(); + + if (false) { + /* Debug of register spilling: Go spill everything. */ + const int grf_count = virtual_grf_count; + float spill_costs[virtual_grf_count]; + bool no_spill[virtual_grf_count]; + evaluate_spill_costs(spill_costs, no_spill); + for (int i = 0; i < grf_count; i++) { + if (no_spill[i]) + continue; + spill_reg(i); + } + } + reg_allocate(); if (failed) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 2cda6784d0e..01ca6b5dc89 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -203,8 +203,16 @@ vec4_visitor::reg_allocate() } if (!ra_allocate_no_spills(g)) { + /* Failed to allocate registers. Spill a reg, and the caller will + * loop back into here to try again. + */ + int reg = choose_spill_reg(g); + if (reg == -1) { + fail("no register to spill\n"); + } else { + spill_reg(reg); + } ralloc_free(g); - fail("No register spilling support yet\n"); return; } @@ -233,4 +241,122 @@ vec4_visitor::reg_allocate() ralloc_free(g); } +void +vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) +{ + float loop_scale = 1.0; + + for (int i = 0; i < this->virtual_grf_count; i++) { + spill_costs[i] = 0.0; + no_spill[i] = virtual_grf_sizes[i] != 1; + } + + /* Calculate costs for spilling nodes. Call it a cost of 1 per + * spill/unspill we'll have to do, and guess that the insides of + * loops run 10 times. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *) node; + + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + spill_costs[inst->src[i].reg] += loop_scale; + if (inst->src[i].reladdr) + no_spill[inst->src[i].reg] = true; + } + } + + if (inst->dst.file == GRF) { + spill_costs[inst->dst.reg] += loop_scale; + if (inst->dst.reladdr) + no_spill[inst->dst.reg] = true; + } + + switch (inst->opcode) { + + case BRW_OPCODE_DO: + loop_scale *= 10; + break; + + case BRW_OPCODE_WHILE: + loop_scale /= 10; + break; + + case VS_OPCODE_SCRATCH_READ: + case VS_OPCODE_SCRATCH_WRITE: + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + no_spill[inst->src[i].reg] = true; + } + if (inst->dst.file == GRF) + no_spill[inst->dst.reg] = true; + break; + + default: + break; + } + } +} + +int +vec4_visitor::choose_spill_reg(struct ra_graph *g) +{ + float spill_costs[this->virtual_grf_count]; + bool no_spill[this->virtual_grf_count]; + + evaluate_spill_costs(spill_costs, no_spill); + + for (int i = 0; i < this->virtual_grf_count; i++) { + if (!no_spill[i]) + ra_set_node_spill_cost(g, i, spill_costs[i]); + } + + return ra_get_best_spill_node(g); +} + +void +vec4_visitor::spill_reg(int spill_reg_nr) +{ + assert(virtual_grf_sizes[spill_reg_nr] == 1); + unsigned int spill_offset = c->last_scratch++; + + /* Generate spill/unspill instructions for the objects being spilled. */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *) node; + + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { + src_reg spill_reg = inst->src[i]; + inst->src[i].reg = virtual_grf_alloc(1); + dst_reg temp = dst_reg(inst->src[i]); + + /* Only read the necessary channels, to avoid overwriting the rest + * with data that may not have been written to scratch. + */ + temp.writemask = 0; + for (int c = 0; c < 4; c++) + temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c)); + assert(temp.writemask != 0); + + emit_scratch_read(inst, temp, spill_reg, spill_offset); + } + } + + if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) { + dst_reg spill_reg = inst->dst; + inst->dst.reg = virtual_grf_alloc(1); + + /* We don't want a swizzle when reading from the source; read the + * whole register and use spill_reg's writemask to select which + * channels to write. + */ + src_reg temp = src_reg(inst->dst); + temp.swizzle = BRW_SWIZZLE_XYZW; + emit_scratch_write(inst, temp, spill_reg, spill_offset); + } + } + + this->live_intervals_valid = false; +} + } /* namespace brw */ -- 2.11.0