From 973f01d55a2fff699f28816e3946c15f138a4c39 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 1 Jan 2019 15:16:44 -0800 Subject: [PATCH] iris: Move create and bind driver hooks to the end of iris_program.c This just moves the code for dealing with pipe_shader_state / pipe_compute_state / iris_uncompiled_shader to the end of the file. Now that those do precompiles, they want to call the actual compile functions. Putting them at the end eliminates the need for a bunch of prototypes. --- src/gallium/drivers/iris/iris_program.c | 1782 +++++++++++++++---------------- 1 file changed, 882 insertions(+), 900 deletions(-) diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 8b1fa56659a..eb341e94fdd 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -45,26 +45,6 @@ #define ALL_SAMPLERS_XYZW .tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688 #define KEY_INIT .program_string_id = ish->program_id, ALL_SAMPLERS_XYZW -static struct iris_compiled_shader * -iris_compile_vs(struct iris_context *, struct iris_uncompiled_shader *, - const struct brw_vs_prog_key *); -static struct iris_compiled_shader * -iris_compile_tcs(struct iris_context *, struct iris_uncompiled_shader *, - const struct brw_tcs_prog_key *); -static struct iris_compiled_shader * -iris_compile_tes(struct iris_context *, struct iris_uncompiled_shader *, - const struct brw_tes_prog_key *); -static struct iris_compiled_shader * -iris_compile_gs(struct iris_context *, struct iris_uncompiled_shader *, - const struct brw_gs_prog_key *); -static struct iris_compiled_shader * -iris_compile_fs(struct iris_context *, struct iris_uncompiled_shader *, - const struct brw_wm_prog_key *, struct brw_vue_map *); -static struct iris_compiled_shader * -iris_compile_cs(struct iris_context *, struct iris_uncompiled_shader *, - const struct brw_cs_prog_key *); - - static unsigned get_new_program_id(struct iris_screen *screen) { @@ -231,470 +211,160 @@ update_so_info(struct pipe_stream_output_info *so_info, } /** - * The pipe->create_[stage]_state() driver hooks. - * - * Performs basic NIR preprocessing, records any state dependencies, and - * returns an iris_uncompiled_shader as the Gallium CSO. + * Sets up the starting offsets for the groups of binding table entries + * common to all pipeline stages. * - * Actual shader compilation to assembly happens later, at first use. + * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're + * unused but also make sure that addition of small offsets to them will + * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. */ -static void * -iris_create_uncompiled_shader(struct pipe_context *ctx, - nir_shader *nir, - const struct pipe_stream_output_info *so_info) +static uint32_t +assign_common_binding_table_offsets(const struct gen_device_info *devinfo, + const struct nir_shader *nir, + struct brw_stage_prog_data *prog_data, + uint32_t next_binding_table_offset, + unsigned num_system_values, + unsigned num_cbufs) { - struct iris_screen *screen = (struct iris_screen *)ctx->screen; - const struct gen_device_info *devinfo = &screen->devinfo; - - struct iris_uncompiled_shader *ish = - calloc(1, sizeof(struct iris_uncompiled_shader)); - if (!ish) - return NULL; + const struct shader_info *info = &nir->info; - nir = brw_preprocess_nir(screen->compiler, nir); + unsigned num_textures = util_last_bit(info->textures_used); - NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo); - NIR_PASS_V(nir, iris_lower_storage_image_derefs); + if (num_textures) { + prog_data->binding_table.texture_start = next_binding_table_offset; + prog_data->binding_table.gather_texture_start = next_binding_table_offset; + next_binding_table_offset += num_textures; + } else { + prog_data->binding_table.texture_start = 0xd0d0d0d0; + prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; + } - ish->program_id = get_new_program_id(screen); - ish->nir = nir; - if (so_info) { - memcpy(&ish->stream_output, so_info, sizeof(*so_info)); - update_so_info(&ish->stream_output, nir->info.outputs_written); + if (info->num_images) { + prog_data->binding_table.image_start = next_binding_table_offset; + next_binding_table_offset += info->num_images; + } else { + prog_data->binding_table.image_start = 0xd0d0d0d0; } - return ish; -} + if (num_cbufs) { + //assert(info->num_ubos <= BRW_MAX_UBO); + prog_data->binding_table.ubo_start = next_binding_table_offset; + next_binding_table_offset += num_cbufs; + } else { + prog_data->binding_table.ubo_start = 0xd0d0d0d0; + } -static struct iris_uncompiled_shader * -iris_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - assert(state->type == PIPE_SHADER_IR_NIR); + if (info->num_ssbos || info->num_abos) { + prog_data->binding_table.ssbo_start = next_binding_table_offset; + // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment + next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos; + } else { + prog_data->binding_table.ssbo_start = 0xd0d0d0d0; + } - return iris_create_uncompiled_shader(ctx, state->ir.nir, - &state->stream_output); -} + prog_data->binding_table.shader_time_start = 0xd0d0d0d0; -static void * -iris_create_vs_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct iris_context *ice = (void *) ctx; - struct iris_screen *screen = (void *) ctx->screen; - struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); + /* Plane 0 is just the regular texture section */ + prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start; - /* User clip planes */ - if (ish->nir->info.clip_distance_array_size == 0) - ish->nos |= (1ull << IRIS_NOS_RASTERIZER); + prog_data->binding_table.plane_start[1] = next_binding_table_offset; + next_binding_table_offset += num_textures; - if (screen->precompile) { - struct brw_vs_prog_key key = { KEY_INIT }; + prog_data->binding_table.plane_start[2] = next_binding_table_offset; + next_binding_table_offset += num_textures; - iris_compile_vs(ice, ish, &key); - } + /* Set the binding table size */ + prog_data->binding_table.size_bytes = next_binding_table_offset * 4; - return ish; + return next_binding_table_offset; } -static void * -iris_create_tcs_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) +static void +setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx, + unsigned offset, unsigned n) { - struct iris_context *ice = (void *) ctx; - struct iris_screen *screen = (void *) ctx->screen; - struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); - struct shader_info *info = &ish->nir->info; - - // XXX: NOS? - - if (screen->precompile) { - const unsigned _GL_TRIANGLES = 0x0004; - struct brw_tcs_prog_key key = { - KEY_INIT, - // XXX: make sure the linker fills this out from the TES... - .tes_primitive_mode = - info->tess.primitive_mode ? info->tess.primitive_mode - : _GL_TRIANGLES, - .outputs_written = info->outputs_written, - .patch_outputs_written = info->patch_outputs_written, - }; + assert(offset % sizeof(uint32_t) == 0); - iris_compile_tcs(ice, ish, &key); - } + for (unsigned i = 0; i < n; ++i) + sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); - return ish; + for (unsigned i = n; i < 4; ++i) + sysvals[i] = BRW_PARAM_BUILTIN_ZERO; } -static void * -iris_create_tes_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) +/** + * Associate NIR uniform variables with the prog_data->param[] mechanism + * used by the backend. Also, decide which UBOs we'd like to push in an + * ideal situation (though the backend can reduce this). + */ +static void +iris_setup_uniforms(const struct brw_compiler *compiler, + void *mem_ctx, + nir_shader *nir, + struct brw_stage_prog_data *prog_data, + enum brw_param_builtin **out_system_values, + unsigned *out_num_system_values, + unsigned *out_num_cbufs) { - struct iris_context *ice = (void *) ctx; - struct iris_screen *screen = (void *) ctx->screen; - struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); - struct shader_info *info = &ish->nir->info; - - // XXX: NOS? + const struct gen_device_info *devinfo = compiler->devinfo; - if (screen->precompile) { - struct brw_tes_prog_key key = { - KEY_INIT, - // XXX: not ideal, need TCS output/TES input unification - .inputs_read = info->inputs_read, - .patch_inputs_read = info->patch_inputs_read, - }; + /* The intel compiler assumes that num_uniforms is in bytes. For + * scalar that means 4 bytes per uniform slot. + * + * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes. + */ + nir->num_uniforms *= 4; - iris_compile_tes(ice, ish, &key); - } + const unsigned IRIS_MAX_SYSTEM_VALUES = + PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE; + enum brw_param_builtin *system_values = + rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES); + unsigned num_system_values = 0; - return ish; -} + unsigned patch_vert_idx = -1; + unsigned ucp_idx[IRIS_MAX_CLIP_PLANES]; + unsigned img_idx[PIPE_MAX_SHADER_IMAGES]; + memset(ucp_idx, -1, sizeof(ucp_idx)); + memset(img_idx, -1, sizeof(img_idx)); -static void * -iris_create_gs_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct iris_context *ice = (void *) ctx; - struct iris_screen *screen = (void *) ctx->screen; - struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); + nir_function_impl *impl = nir_shader_get_entrypoint(nir); - // XXX: NOS? + nir_builder b; + nir_builder_init(&b, impl); - if (screen->precompile) { - struct brw_gs_prog_key key = { KEY_INIT }; + b.cursor = nir_before_block(nir_start_block(impl)); + nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32); - iris_compile_gs(ice, ish, &key); - } + /* Turn system value intrinsics into uniforms */ + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; - return ish; -} + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + nir_ssa_def *offset; -static void * -iris_create_fs_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct iris_context *ice = (void *) ctx; - struct iris_screen *screen = (void *) ctx->screen; - struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); - struct shader_info *info = &ish->nir->info; + switch (intrin->intrinsic) { + case nir_intrinsic_load_user_clip_plane: { + unsigned ucp = nir_intrinsic_ucp_id(intrin); - ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) | - (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) | - (1ull << IRIS_NOS_RASTERIZER) | - (1ull << IRIS_NOS_BLEND); + if (ucp_idx[ucp] == -1) { + ucp_idx[ucp] = num_system_values; + num_system_values += 4; + } - /* The program key needs the VUE map if there are > 16 inputs */ - if (util_bitcount64(ish->nir->info.inputs_read & - BRW_FS_VARYING_INPUT_MASK) > 16) { - ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP); - } + for (int i = 0; i < 4; i++) { + system_values[ucp_idx[ucp] + i] = + BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i); + } - if (screen->precompile) { - const uint64_t color_outputs = info->outputs_written & - ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | - BITFIELD64_BIT(FRAG_RESULT_STENCIL) | - BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)); - - bool can_rearrange_varyings = - util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16; - - struct brw_wm_prog_key key = { - KEY_INIT, - .nr_color_regions = util_bitcount(color_outputs), - .coherent_fb_fetch = true, - .input_slots_valid = - can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS, - }; - - iris_compile_fs(ice, ish, &key, NULL); - } - - return ish; -} - -static void * -iris_create_compute_state(struct pipe_context *ctx, - const struct pipe_compute_state *state) -{ - assert(state->ir_type == PIPE_SHADER_IR_NIR); - - struct iris_context *ice = (void *) ctx; - struct iris_screen *screen = (void *) ctx->screen; - struct iris_uncompiled_shader *ish = - iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL); - - // XXX: disallow more than 64KB of shared variables - - if (screen->precompile) { - struct brw_cs_prog_key key = { KEY_INIT }; - - iris_compile_cs(ice, ish, &key); - } - - return ish; -} - -/** - * The pipe->delete_[stage]_state() driver hooks. - * - * Frees the iris_uncompiled_shader. - */ -static void -iris_delete_shader_state(struct pipe_context *ctx, void *state) -{ - struct iris_uncompiled_shader *ish = state; - - ralloc_free(ish->nir); - free(ish); -} - -/** - * The pipe->bind_[stage]_state() driver hook. - * - * Binds an uncompiled shader as the current one for a particular stage. - * Updates dirty tracking to account for the shader's NOS. - */ -static void -bind_state(struct iris_context *ice, - struct iris_uncompiled_shader *ish, - gl_shader_stage stage) -{ - uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage; - const uint64_t nos = ish ? ish->nos : 0; - - ice->shaders.uncompiled[stage] = ish; - ice->state.dirty |= dirty_bit; - - /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change - * (or that they no longer need to do so). - */ - for (int i = 0; i < IRIS_NOS_COUNT; i++) { - if (nos & (1 << i)) - ice->state.dirty_for_nos[i] |= dirty_bit; - else - ice->state.dirty_for_nos[i] &= ~dirty_bit; - } -} - -static void -iris_bind_vs_state(struct pipe_context *ctx, void *state) -{ - bind_state((void *) ctx, state, MESA_SHADER_VERTEX); -} - -static void -iris_bind_tcs_state(struct pipe_context *ctx, void *state) -{ - bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL); -} - -static void -iris_bind_tes_state(struct pipe_context *ctx, void *state) -{ - struct iris_context *ice = (struct iris_context *)ctx; - - /* Enabling/disabling optional stages requires a URB reconfiguration. */ - if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) - ice->state.dirty |= IRIS_DIRTY_URB; - - bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL); -} - -static void -iris_bind_gs_state(struct pipe_context *ctx, void *state) -{ - struct iris_context *ice = (struct iris_context *)ctx; - - /* Enabling/disabling optional stages requires a URB reconfiguration. */ - if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) - ice->state.dirty |= IRIS_DIRTY_URB; - - bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY); -} - -static void -iris_bind_fs_state(struct pipe_context *ctx, void *state) -{ - struct iris_context *ice = (struct iris_context *) ctx; - struct iris_uncompiled_shader *old_ish = - ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; - struct iris_uncompiled_shader *new_ish = state; - - const unsigned color_bits = - BITFIELD64_BIT(FRAG_RESULT_COLOR) | - BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS); - - /* Fragment shader outputs influence HasWriteableRT */ - if (!old_ish || !new_ish || - (old_ish->nir->info.outputs_written & color_bits) != - (new_ish->nir->info.outputs_written & color_bits)) - ice->state.dirty |= IRIS_DIRTY_PS_BLEND; - - bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT); -} - -static void -iris_bind_cs_state(struct pipe_context *ctx, void *state) -{ - bind_state((void *) ctx, state, MESA_SHADER_COMPUTE); -} - -/** - * Sets up the starting offsets for the groups of binding table entries - * common to all pipeline stages. - * - * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're - * unused but also make sure that addition of small offsets to them will - * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. - */ -static uint32_t -assign_common_binding_table_offsets(const struct gen_device_info *devinfo, - const struct nir_shader *nir, - struct brw_stage_prog_data *prog_data, - uint32_t next_binding_table_offset, - unsigned num_system_values, - unsigned num_cbufs) -{ - const struct shader_info *info = &nir->info; - - unsigned num_textures = util_last_bit(info->textures_used); - - if (num_textures) { - prog_data->binding_table.texture_start = next_binding_table_offset; - prog_data->binding_table.gather_texture_start = next_binding_table_offset; - next_binding_table_offset += num_textures; - } else { - prog_data->binding_table.texture_start = 0xd0d0d0d0; - prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; - } - - if (info->num_images) { - prog_data->binding_table.image_start = next_binding_table_offset; - next_binding_table_offset += info->num_images; - } else { - prog_data->binding_table.image_start = 0xd0d0d0d0; - } - - if (num_cbufs) { - //assert(info->num_ubos <= BRW_MAX_UBO); - prog_data->binding_table.ubo_start = next_binding_table_offset; - next_binding_table_offset += num_cbufs; - } else { - prog_data->binding_table.ubo_start = 0xd0d0d0d0; - } - - if (info->num_ssbos || info->num_abos) { - prog_data->binding_table.ssbo_start = next_binding_table_offset; - // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment - next_binding_table_offset += IRIS_MAX_ABOS + info->num_ssbos; - } else { - prog_data->binding_table.ssbo_start = 0xd0d0d0d0; - } - - prog_data->binding_table.shader_time_start = 0xd0d0d0d0; - - /* Plane 0 is just the regular texture section */ - prog_data->binding_table.plane_start[0] = prog_data->binding_table.texture_start; - - prog_data->binding_table.plane_start[1] = next_binding_table_offset; - next_binding_table_offset += num_textures; - - prog_data->binding_table.plane_start[2] = next_binding_table_offset; - next_binding_table_offset += num_textures; - - /* Set the binding table size */ - prog_data->binding_table.size_bytes = next_binding_table_offset * 4; - - return next_binding_table_offset; -} - -static void -setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx, - unsigned offset, unsigned n) -{ - assert(offset % sizeof(uint32_t) == 0); - - for (unsigned i = 0; i < n; ++i) - sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); - - for (unsigned i = n; i < 4; ++i) - sysvals[i] = BRW_PARAM_BUILTIN_ZERO; -} - -/** - * Associate NIR uniform variables with the prog_data->param[] mechanism - * used by the backend. Also, decide which UBOs we'd like to push in an - * ideal situation (though the backend can reduce this). - */ -static void -iris_setup_uniforms(const struct brw_compiler *compiler, - void *mem_ctx, - nir_shader *nir, - struct brw_stage_prog_data *prog_data, - enum brw_param_builtin **out_system_values, - unsigned *out_num_system_values, - unsigned *out_num_cbufs) -{ - const struct gen_device_info *devinfo = compiler->devinfo; - - /* The intel compiler assumes that num_uniforms is in bytes. For - * scalar that means 4 bytes per uniform slot. - * - * Ref: brw_nir_lower_uniforms, type_size_scalar_bytes. - */ - nir->num_uniforms *= 4; - - const unsigned IRIS_MAX_SYSTEM_VALUES = - PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE; - enum brw_param_builtin *system_values = - rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES); - unsigned num_system_values = 0; - - unsigned patch_vert_idx = -1; - unsigned ucp_idx[IRIS_MAX_CLIP_PLANES]; - unsigned img_idx[PIPE_MAX_SHADER_IMAGES]; - memset(ucp_idx, -1, sizeof(ucp_idx)); - memset(img_idx, -1, sizeof(img_idx)); - - nir_function_impl *impl = nir_shader_get_entrypoint(nir); - - nir_builder b; - nir_builder_init(&b, impl); - - b.cursor = nir_before_block(nir_start_block(impl)); - nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32); - - /* Turn system value intrinsics into uniforms */ - nir_foreach_block(block, impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - nir_ssa_def *offset; - - switch (intrin->intrinsic) { - case nir_intrinsic_load_user_clip_plane: { - unsigned ucp = nir_intrinsic_ucp_id(intrin); - - if (ucp_idx[ucp] == -1) { - ucp_idx[ucp] = num_system_values; - num_system_values += 4; - } - - for (int i = 0; i < 4; i++) { - system_values[ucp_idx[ucp] + i] = - BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i); - } - - b.cursor = nir_before_instr(instr); - offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t)); - break; - } - case nir_intrinsic_load_patch_vertices_in: - if (patch_vert_idx == -1) - patch_vert_idx = num_system_values++; + b.cursor = nir_before_instr(instr); + offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t)); + break; + } + case nir_intrinsic_load_patch_vertices_in: + if (patch_vert_idx == -1) + patch_vert_idx = num_system_values++; system_values[patch_vert_idx] = BRW_PARAM_BUILTIN_PATCH_VERTICES_IN; @@ -982,76 +652,267 @@ get_unified_tess_slots(const struct iris_context *ice, const struct shader_info *tes = iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); - *per_vertex_slots = tes->inputs_read; - *per_patch_slots = tes->patch_inputs_read; + *per_vertex_slots = tes->inputs_read; + *per_patch_slots = tes->patch_inputs_read; + + if (tcs) { + *per_vertex_slots |= tcs->outputs_written; + *per_patch_slots |= tcs->patch_outputs_written; + } +} + +/** + * Compile a tessellation control shader, and upload the assembly. + */ +static struct iris_compiled_shader * +iris_compile_tcs(struct iris_context *ice, + struct iris_uncompiled_shader *ish, + const struct brw_tcs_prog_key *key) +{ + struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + const struct brw_compiler *compiler = screen->compiler; + const struct nir_shader_compiler_options *options = + compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions; + const struct gen_device_info *devinfo = &screen->devinfo; + void *mem_ctx = ralloc_context(NULL); + struct brw_tcs_prog_data *tcs_prog_data = + rzalloc(mem_ctx, struct brw_tcs_prog_data); + struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; + struct brw_stage_prog_data *prog_data = &vue_prog_data->base; + enum brw_param_builtin *system_values = NULL; + unsigned num_system_values = 0; + unsigned num_cbufs; + + nir_shader *nir; + + if (ish) { + nir = nir_shader_clone(mem_ctx, ish->nir); + + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + &num_system_values, &num_cbufs); + assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, + num_system_values, num_cbufs); + } else { + nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key); + + /* Reserve space for passing the default tess levels as constants. */ + prog_data->param = rzalloc_array(mem_ctx, uint32_t, 8); + prog_data->nr_params = 8; + prog_data->ubo_ranges[0].length = 1; + } + + char *error_str = NULL; + const unsigned *program = + brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir, + -1, &error_str); + if (program == NULL) { + dbg_printf("Failed to compile control shader: %s\n", error_str); + ralloc_free(mem_ctx); + return false; + } + + struct iris_compiled_shader *shader = + iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program, + prog_data, NULL, system_values, num_system_values, + num_cbufs); + + if (ish) { + if (ish->compiled_once) { + perf_debug(&ice->dbg, "Recompiling tessellation control shader\n"); + } else { + ish->compiled_once = true; + } + } + + ralloc_free(mem_ctx); + return shader; +} + +/** + * Update the current tessellation control shader variant. + * + * Fill out the key, look in the cache, compile and bind if needed. + */ +static void +iris_update_compiled_tcs(struct iris_context *ice) +{ + struct iris_uncompiled_shader *tcs = + ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]; + + const struct shader_info *tes_info = + iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); + struct brw_tcs_prog_key key = { + ALL_SAMPLERS_XYZW, + .program_string_id = tcs ? tcs->program_id : 0, + .tes_primitive_mode = tes_info->tess.primitive_mode, + .input_vertices = ice->state.vertices_per_patch, + }; + get_unified_tess_slots(ice, &key.outputs_written, + &key.patch_outputs_written); + ice->vtbl.populate_tcs_key(ice, &key); + + struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS]; + struct iris_compiled_shader *shader = + iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key); + + if (!shader) + shader = iris_compile_tcs(ice, tcs, &key); + + if (old != shader) { + ice->shaders.prog[IRIS_CACHE_TCS] = shader; + ice->state.dirty |= IRIS_DIRTY_TCS | + IRIS_DIRTY_BINDINGS_TCS | + IRIS_DIRTY_CONSTANTS_TCS; + } +} + +/** + * Compile a tessellation evaluation shader, and upload the assembly. + */ +static struct iris_compiled_shader * +iris_compile_tes(struct iris_context *ice, + struct iris_uncompiled_shader *ish, + const struct brw_tes_prog_key *key) +{ + struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + const struct brw_compiler *compiler = screen->compiler; + const struct gen_device_info *devinfo = &screen->devinfo; + void *mem_ctx = ralloc_context(NULL); + struct brw_tes_prog_data *tes_prog_data = + rzalloc(mem_ctx, struct brw_tes_prog_data); + struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base; + struct brw_stage_prog_data *prog_data = &vue_prog_data->base; + enum brw_param_builtin *system_values; + unsigned num_system_values; + unsigned num_cbufs; + + nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); + + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + &num_system_values, &num_cbufs); + + assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, + num_system_values, num_cbufs); + + struct brw_vue_map input_vue_map; + brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, + key->patch_inputs_read); + + char *error_str = NULL; + const unsigned *program = + brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map, + tes_prog_data, nir, NULL, -1, &error_str); + if (program == NULL) { + dbg_printf("Failed to compile evaluation shader: %s\n", error_str); + ralloc_free(mem_ctx); + return false; + } + + uint32_t *so_decls = + ice->vtbl.create_so_decl_list(&ish->stream_output, + &vue_prog_data->vue_map); + + + struct iris_compiled_shader *shader = + iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program, + prog_data, so_decls, system_values, num_system_values, + num_cbufs); + + if (ish->compiled_once) { + perf_debug(&ice->dbg, "Recompiling tessellation evaluation shader\n"); + } else { + ish->compiled_once = true; + } + + ralloc_free(mem_ctx); + return shader; +} + +/** + * Update the current tessellation evaluation shader variant. + * + * Fill out the key, look in the cache, compile and bind if needed. + */ +static void +iris_update_compiled_tes(struct iris_context *ice) +{ + struct iris_uncompiled_shader *ish = + ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; + + struct brw_tes_prog_key key = { KEY_INIT }; + get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read); + ice->vtbl.populate_tes_key(ice, &key); + + struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES]; + struct iris_compiled_shader *shader = + iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key); - if (tcs) { - *per_vertex_slots |= tcs->outputs_written; - *per_patch_slots |= tcs->patch_outputs_written; + if (!shader) + shader = iris_compile_tes(ice, ish, &key); + + if (old != shader) { + ice->shaders.prog[IRIS_CACHE_TES] = shader; + ice->state.dirty |= IRIS_DIRTY_TES | + IRIS_DIRTY_BINDINGS_TES | + IRIS_DIRTY_CONSTANTS_TES; } } /** - * Compile a tessellation control shader, and upload the assembly. + * Compile a geometry shader, and upload the assembly. */ static struct iris_compiled_shader * -iris_compile_tcs(struct iris_context *ice, - struct iris_uncompiled_shader *ish, - const struct brw_tcs_prog_key *key) +iris_compile_gs(struct iris_context *ice, + struct iris_uncompiled_shader *ish, + const struct brw_gs_prog_key *key) { struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; const struct brw_compiler *compiler = screen->compiler; - const struct nir_shader_compiler_options *options = - compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions; const struct gen_device_info *devinfo = &screen->devinfo; void *mem_ctx = ralloc_context(NULL); - struct brw_tcs_prog_data *tcs_prog_data = - rzalloc(mem_ctx, struct brw_tcs_prog_data); - struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; + struct brw_gs_prog_data *gs_prog_data = + rzalloc(mem_ctx, struct brw_gs_prog_data); + struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base; struct brw_stage_prog_data *prog_data = &vue_prog_data->base; - enum brw_param_builtin *system_values = NULL; - unsigned num_system_values = 0; + enum brw_param_builtin *system_values; + unsigned num_system_values; unsigned num_cbufs; - nir_shader *nir; + nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); - if (ish) { - nir = nir_shader_clone(mem_ctx, ish->nir); + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + &num_system_values, &num_cbufs); - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, - &num_system_values, &num_cbufs); - assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, - num_system_values, num_cbufs); - } else { - nir = brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, key); + assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, + num_system_values, num_cbufs); - /* Reserve space for passing the default tess levels as constants. */ - prog_data->param = rzalloc_array(mem_ctx, uint32_t, 8); - prog_data->nr_params = 8; - prog_data->ubo_ranges[0].length = 1; - } + brw_compute_vue_map(devinfo, + &vue_prog_data->vue_map, nir->info.outputs_written, + nir->info.separate_shader); char *error_str = NULL; const unsigned *program = - brw_compile_tcs(compiler, &ice->dbg, mem_ctx, key, tcs_prog_data, nir, - -1, &error_str); + brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir, + NULL, -1, &error_str); if (program == NULL) { - dbg_printf("Failed to compile control shader: %s\n", error_str); + dbg_printf("Failed to compile geometry shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } + uint32_t *so_decls = + ice->vtbl.create_so_decl_list(&ish->stream_output, + &vue_prog_data->vue_map); + struct iris_compiled_shader *shader = - iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program, - prog_data, NULL, system_values, num_system_values, + iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program, + prog_data, so_decls, system_values, num_system_values, num_cbufs); - if (ish) { - if (ish->compiled_once) { - perf_debug(&ice->dbg, "Recompiling tessellation control shader\n"); - } else { - ish->compiled_once = true; - } + if (ish->compiled_once) { + perf_debug(&ice->dbg, "Recompiling geometry shader\n"); + } else { + ish->compiled_once = true; } ralloc_free(mem_ctx); @@ -1059,188 +920,317 @@ iris_compile_tcs(struct iris_context *ice, } /** - * Update the current tessellation control shader variant. + * Update the current geometry shader variant. * * Fill out the key, look in the cache, compile and bind if needed. */ static void -iris_update_compiled_tcs(struct iris_context *ice) +iris_update_compiled_gs(struct iris_context *ice) { - struct iris_uncompiled_shader *tcs = - ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]; + struct iris_uncompiled_shader *ish = + ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]; + struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS]; + struct iris_compiled_shader *shader = NULL; - const struct shader_info *tes_info = - iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); - struct brw_tcs_prog_key key = { - ALL_SAMPLERS_XYZW, - .program_string_id = tcs ? tcs->program_id : 0, - .tes_primitive_mode = tes_info->tess.primitive_mode, - .input_vertices = ice->state.vertices_per_patch, - }; - get_unified_tess_slots(ice, &key.outputs_written, - &key.patch_outputs_written); - ice->vtbl.populate_tcs_key(ice, &key); + if (ish) { + struct brw_gs_prog_key key = { KEY_INIT }; + ice->vtbl.populate_gs_key(ice, &key); - struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS]; - struct iris_compiled_shader *shader = - iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key); + shader = + iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key); - if (!shader) - shader = iris_compile_tcs(ice, tcs, &key); + if (!shader) + shader = iris_compile_gs(ice, ish, &key); + } if (old != shader) { - ice->shaders.prog[IRIS_CACHE_TCS] = shader; - ice->state.dirty |= IRIS_DIRTY_TCS | - IRIS_DIRTY_BINDINGS_TCS | - IRIS_DIRTY_CONSTANTS_TCS; + ice->shaders.prog[IRIS_CACHE_GS] = shader; + ice->state.dirty |= IRIS_DIRTY_GS | + IRIS_DIRTY_BINDINGS_GS | + IRIS_DIRTY_CONSTANTS_GS; } } /** - * Compile a tessellation evaluation shader, and upload the assembly. + * Compile a fragment (pixel) shader, and upload the assembly. */ static struct iris_compiled_shader * -iris_compile_tes(struct iris_context *ice, - struct iris_uncompiled_shader *ish, - const struct brw_tes_prog_key *key) +iris_compile_fs(struct iris_context *ice, + struct iris_uncompiled_shader *ish, + const struct brw_wm_prog_key *key, + struct brw_vue_map *vue_map) { struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; const struct brw_compiler *compiler = screen->compiler; const struct gen_device_info *devinfo = &screen->devinfo; void *mem_ctx = ralloc_context(NULL); - struct brw_tes_prog_data *tes_prog_data = - rzalloc(mem_ctx, struct brw_tes_prog_data); - struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base; - struct brw_stage_prog_data *prog_data = &vue_prog_data->base; + struct brw_wm_prog_data *fs_prog_data = + rzalloc(mem_ctx, struct brw_wm_prog_data); + struct brw_stage_prog_data *prog_data = &fs_prog_data->base; enum brw_param_builtin *system_values; unsigned num_system_values; unsigned num_cbufs; nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); + if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0) + prog_data->use_alt_mode = true; + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, &num_system_values, &num_cbufs); - assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, + assign_common_binding_table_offsets(devinfo, nir, prog_data, + MAX2(key->nr_color_regions, 1), num_system_values, num_cbufs); - - struct brw_vue_map input_vue_map; - brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, - key->patch_inputs_read); - char *error_str = NULL; const unsigned *program = - brw_compile_tes(compiler, &ice->dbg, mem_ctx, key, &input_vue_map, - tes_prog_data, nir, NULL, -1, &error_str); + brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data, + nir, NULL, -1, -1, -1, true, false, vue_map, &error_str); if (program == NULL) { - dbg_printf("Failed to compile evaluation shader: %s\n", error_str); + dbg_printf("Failed to compile fragment shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } - uint32_t *so_decls = - ice->vtbl.create_so_decl_list(&ish->stream_output, - &vue_prog_data->vue_map); + struct iris_compiled_shader *shader = + iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program, + prog_data, NULL, system_values, num_system_values, + num_cbufs); + + if (ish->compiled_once) { + perf_debug(&ice->dbg, "Recompiling fragment shader\n"); + } else { + ish->compiled_once = true; + } + + ralloc_free(mem_ctx); + return shader; +} + +/** + * Update the current fragment shader variant. + * + * Fill out the key, look in the cache, compile and bind if needed. + */ +static void +iris_update_compiled_fs(struct iris_context *ice) +{ + struct iris_uncompiled_shader *ish = + ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; + struct brw_wm_prog_key key = { KEY_INIT }; + ice->vtbl.populate_fs_key(ice, &key); + + if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP)) + key.input_slots_valid = ice->shaders.last_vue_map->slots_valid; + + struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS]; + struct iris_compiled_shader *shader = + iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key); + + if (!shader) + shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map); + + if (old != shader) { + // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE + // toggles. might be able to avoid flagging SBE too. + ice->shaders.prog[IRIS_CACHE_FS] = shader; + ice->state.dirty |= IRIS_DIRTY_FS | + IRIS_DIRTY_BINDINGS_FS | + IRIS_DIRTY_CONSTANTS_FS | + IRIS_DIRTY_WM | + IRIS_DIRTY_CLIP | + IRIS_DIRTY_SBE; + } +} + +/** + * Get the compiled shader for the last enabled geometry stage. + * + * This stage is the one which will feed stream output and the rasterizer. + */ +static gl_shader_stage +last_vue_stage(struct iris_context *ice) +{ + if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) + return MESA_SHADER_GEOMETRY; + + if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) + return MESA_SHADER_TESS_EVAL; + + return MESA_SHADER_VERTEX; +} + +/** + * Update the last enabled stage's VUE map. + * + * When the shader feeding the rasterizer's output interface changes, we + * need to re-emit various packets. + */ +static void +update_last_vue_map(struct iris_context *ice, + struct brw_stage_prog_data *prog_data) +{ + struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; + struct brw_vue_map *vue_map = &vue_prog_data->vue_map; + struct brw_vue_map *old_map = ice->shaders.last_vue_map; + const uint64_t changed_slots = + (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid; + + if (changed_slots & VARYING_BIT_VIEWPORT) { + // XXX: could use ctx->Const.MaxViewports for old API efficiency + ice->state.num_viewports = + (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1; + ice->state.dirty |= IRIS_DIRTY_CLIP | + IRIS_DIRTY_SF_CL_VIEWPORT | + IRIS_DIRTY_CC_VIEWPORT | + IRIS_DIRTY_SCISSOR_RECT | + IRIS_DIRTY_UNCOMPILED_FS | + ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP]; + // XXX: CC_VIEWPORT? + } + if (changed_slots || (old_map && old_map->separate != vue_map->separate)) { + ice->state.dirty |= IRIS_DIRTY_SBE; + } - struct iris_compiled_shader *shader = - iris_upload_shader(ice, IRIS_CACHE_TES, sizeof(*key), key, program, - prog_data, so_decls, system_values, num_system_values, - num_cbufs); + ice->shaders.last_vue_map = &vue_prog_data->vue_map; +} - if (ish->compiled_once) { - perf_debug(&ice->dbg, "Recompiling tessellation evaluation shader\n"); - } else { - ish->compiled_once = true; - } +/** + * Get the prog_data for a given stage, or NULL if the stage is disabled. + */ +static struct brw_vue_prog_data * +get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage) +{ + if (!ice->shaders.prog[stage]) + return NULL; - ralloc_free(mem_ctx); - return shader; + return (void *) ice->shaders.prog[stage]->prog_data; } +// XXX: iris_compiled_shaders are space-leaking :( +// XXX: do remember to unbind them if deleting them. + /** - * Update the current tessellation evaluation shader variant. + * Update the current shader variants for the given state. * - * Fill out the key, look in the cache, compile and bind if needed. + * This should be called on every draw call to ensure that the correct + * shaders are bound. It will also flag any dirty state triggered by + * swapping out those shaders. */ -static void -iris_update_compiled_tes(struct iris_context *ice) +void +iris_update_compiled_shaders(struct iris_context *ice) { - struct iris_uncompiled_shader *ish = - ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; + const uint64_t dirty = ice->state.dirty; - struct brw_tes_prog_key key = { KEY_INIT }; - get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read); - ice->vtbl.populate_tes_key(ice, &key); + struct brw_vue_prog_data *old_prog_datas[4]; + if (!(dirty & IRIS_DIRTY_URB)) { + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) + old_prog_datas[i] = get_vue_prog_data(ice, i); + } - struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES]; - struct iris_compiled_shader *shader = - iris_find_cached_shader(ice, IRIS_CACHE_TES, sizeof(key), &key); + if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) { + struct iris_uncompiled_shader *tes = + ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; + if (tes) { + iris_update_compiled_tcs(ice); + iris_update_compiled_tes(ice); + } else { + ice->shaders.prog[IRIS_CACHE_TCS] = NULL; + ice->shaders.prog[IRIS_CACHE_TES] = NULL; + ice->state.dirty |= + IRIS_DIRTY_TCS | IRIS_DIRTY_TES | + IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES | + IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES; + } + } - if (!shader) - shader = iris_compile_tes(ice, ish, &key); + if (dirty & IRIS_DIRTY_UNCOMPILED_VS) + iris_update_compiled_vs(ice); + if (dirty & IRIS_DIRTY_UNCOMPILED_GS) + iris_update_compiled_gs(ice); - if (old != shader) { - ice->shaders.prog[IRIS_CACHE_TES] = shader; - ice->state.dirty |= IRIS_DIRTY_TES | - IRIS_DIRTY_BINDINGS_TES | - IRIS_DIRTY_CONSTANTS_TES; + gl_shader_stage last_stage = last_vue_stage(ice); + struct iris_compiled_shader *shader = ice->shaders.prog[last_stage]; + struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage]; + update_last_vue_map(ice, shader->prog_data); + if (ice->state.streamout != shader->streamout) { + ice->state.streamout = shader->streamout; + ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT; + } + + if (ice->state.streamout_active) { + for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + struct iris_stream_output_target *so = + (void *) ice->state.so_target[i]; + if (so) + so->stride = ish->stream_output.stride[i]; + } + } + + if (dirty & IRIS_DIRTY_UNCOMPILED_FS) + iris_update_compiled_fs(ice); + // ... + + /* Changing shader interfaces may require a URB configuration. */ + if (!(dirty & IRIS_DIRTY_URB)) { + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + struct brw_vue_prog_data *old = old_prog_datas[i]; + struct brw_vue_prog_data *new = get_vue_prog_data(ice, i); + if (!!old != !!new || + (new && new->urb_entry_size != old->urb_entry_size)) { + ice->state.dirty |= IRIS_DIRTY_URB; + break; + } + } } } -/** - * Compile a geometry shader, and upload the assembly. - */ static struct iris_compiled_shader * -iris_compile_gs(struct iris_context *ice, +iris_compile_cs(struct iris_context *ice, struct iris_uncompiled_shader *ish, - const struct brw_gs_prog_key *key) + const struct brw_cs_prog_key *key) { struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; const struct brw_compiler *compiler = screen->compiler; const struct gen_device_info *devinfo = &screen->devinfo; void *mem_ctx = ralloc_context(NULL); - struct brw_gs_prog_data *gs_prog_data = - rzalloc(mem_ctx, struct brw_gs_prog_data); - struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base; - struct brw_stage_prog_data *prog_data = &vue_prog_data->base; + struct brw_cs_prog_data *cs_prog_data = + rzalloc(mem_ctx, struct brw_cs_prog_data); + struct brw_stage_prog_data *prog_data = &cs_prog_data->base; enum brw_param_builtin *system_values; unsigned num_system_values; unsigned num_cbufs; nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); + cs_prog_data->binding_table.work_groups_start = 0; + + prog_data->total_shared = nir->info.cs.shared_size; + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, &num_system_values, &num_cbufs); - assign_common_binding_table_offsets(devinfo, nir, prog_data, 0, + assign_common_binding_table_offsets(devinfo, nir, prog_data, 1, num_system_values, num_cbufs); - brw_compute_vue_map(devinfo, - &vue_prog_data->vue_map, nir->info.outputs_written, - nir->info.separate_shader); - char *error_str = NULL; const unsigned *program = - brw_compile_gs(compiler, &ice->dbg, mem_ctx, key, gs_prog_data, nir, - NULL, -1, &error_str); + brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data, + nir, -1, &error_str); if (program == NULL) { - dbg_printf("Failed to compile geometry shader: %s\n", error_str); + dbg_printf("Failed to compile compute shader: %s\n", error_str); ralloc_free(mem_ctx); return false; } - uint32_t *so_decls = - ice->vtbl.create_so_decl_list(&ish->stream_output, - &vue_prog_data->vue_map); - struct iris_compiled_shader *shader = - iris_upload_shader(ice, IRIS_CACHE_GS, sizeof(*key), key, program, - prog_data, so_decls, system_values, num_system_values, + iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program, + prog_data, NULL, system_values, num_system_values, num_cbufs); if (ish->compiled_once) { - perf_debug(&ice->dbg, "Recompiling geometry shader\n"); + perf_debug(&ice->dbg, "Recompiling compute shader\n"); } else { ish->compiled_once = true; } @@ -1249,413 +1239,405 @@ iris_compile_gs(struct iris_context *ice, return shader; } -/** - * Update the current geometry shader variant. - * - * Fill out the key, look in the cache, compile and bind if needed. - */ -static void -iris_update_compiled_gs(struct iris_context *ice) +void +iris_update_compiled_compute_shader(struct iris_context *ice) { struct iris_uncompiled_shader *ish = - ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]; - struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS]; - struct iris_compiled_shader *shader = NULL; + ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; - if (ish) { - struct brw_gs_prog_key key = { KEY_INIT }; - ice->vtbl.populate_gs_key(ice, &key); + struct brw_cs_prog_key key = { KEY_INIT }; + ice->vtbl.populate_cs_key(ice, &key); - shader = - iris_find_cached_shader(ice, IRIS_CACHE_GS, sizeof(key), &key); + struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS]; + struct iris_compiled_shader *shader = + iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key); - if (!shader) - shader = iris_compile_gs(ice, ish, &key); - } + if (!shader) + shader = iris_compile_cs(ice, ish, &key); if (old != shader) { - ice->shaders.prog[IRIS_CACHE_GS] = shader; - ice->state.dirty |= IRIS_DIRTY_GS | - IRIS_DIRTY_BINDINGS_GS | - IRIS_DIRTY_CONSTANTS_GS; + ice->shaders.prog[IRIS_CACHE_CS] = shader; + ice->state.dirty |= IRIS_DIRTY_CS | + IRIS_DIRTY_BINDINGS_CS | + IRIS_DIRTY_CONSTANTS_CS; } } +void +iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, + uint32_t *dst) +{ + struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + assert(cs_prog_data->push.total.size > 0); + assert(cs_prog_data->push.cross_thread.size == 0); + assert(cs_prog_data->push.per_thread.dwords == 1); + assert(prog_data->param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); + for (unsigned t = 0; t < cs_prog_data->threads; t++) + dst[8 * t] = t; +} + /** - * Compile a fragment (pixel) shader, and upload the assembly. + * Allocate scratch BOs as needed for the given per-thread size and stage. */ -static struct iris_compiled_shader * -iris_compile_fs(struct iris_context *ice, - struct iris_uncompiled_shader *ish, - const struct brw_wm_prog_key *key, - struct brw_vue_map *vue_map) +struct iris_bo * +iris_get_scratch_space(struct iris_context *ice, + unsigned per_thread_scratch, + gl_shader_stage stage) { struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; - const struct brw_compiler *compiler = screen->compiler; + struct iris_bufmgr *bufmgr = screen->bufmgr; const struct gen_device_info *devinfo = &screen->devinfo; - void *mem_ctx = ralloc_context(NULL); - struct brw_wm_prog_data *fs_prog_data = - rzalloc(mem_ctx, struct brw_wm_prog_data); - struct brw_stage_prog_data *prog_data = &fs_prog_data->base; - enum brw_param_builtin *system_values; - unsigned num_system_values; - unsigned num_cbufs; - - nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); - if (nir->info.name && strncmp(nir->info.name, "ARB", 3) == 0) - prog_data->use_alt_mode = true; + unsigned encoded_size = ffs(per_thread_scratch) - 11; + assert(encoded_size < (1 << 16)); - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, - &num_system_values, &num_cbufs); + struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; - assign_common_binding_table_offsets(devinfo, nir, prog_data, - MAX2(key->nr_color_regions, 1), - num_system_values, num_cbufs); - char *error_str = NULL; - const unsigned *program = - brw_compile_fs(compiler, &ice->dbg, mem_ctx, key, fs_prog_data, - nir, NULL, -1, -1, -1, true, false, vue_map, &error_str); - if (program == NULL) { - dbg_printf("Failed to compile fragment shader: %s\n", error_str); - ralloc_free(mem_ctx); - return false; - } + /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: + * + * "Scratch Space per slice is computed based on 4 sub-slices. SW + * must allocate scratch space enough so that each slice has 4 + * slices allowed." + * + * According to the other driver team, this applies to compute shaders + * as well. This is not currently documented at all. + * + * This hack is no longer necessary on Gen11+. + */ + unsigned subslice_total = screen->subslice_total; + if (devinfo->gen < 11) + subslice_total = 4 * devinfo->num_slices; + assert(subslice_total >= screen->subslice_total); - struct iris_compiled_shader *shader = - iris_upload_shader(ice, IRIS_CACHE_FS, sizeof(*key), key, program, - prog_data, NULL, system_values, num_system_values, - num_cbufs); + if (!*bop) { + unsigned scratch_ids_per_subslice = devinfo->max_cs_threads; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads, + [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total, + }; - if (ish->compiled_once) { - perf_debug(&ice->dbg, "Recompiling fragment shader\n"); - } else { - ish->compiled_once = true; + uint32_t size = per_thread_scratch * max_threads[stage]; + + *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER); } - ralloc_free(mem_ctx); - return shader; + return *bop; } +/* ------------------------------------------------------------------- */ + /** - * Update the current fragment shader variant. + * The pipe->create_[stage]_state() driver hooks. * - * Fill out the key, look in the cache, compile and bind if needed. + * Performs basic NIR preprocessing, records any state dependencies, and + * returns an iris_uncompiled_shader as the Gallium CSO. + * + * Actual shader compilation to assembly happens later, at first use. */ -static void -iris_update_compiled_fs(struct iris_context *ice) +static void * +iris_create_uncompiled_shader(struct pipe_context *ctx, + nir_shader *nir, + const struct pipe_stream_output_info *so_info) { - struct iris_uncompiled_shader *ish = - ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; - struct brw_wm_prog_key key = { KEY_INIT }; - ice->vtbl.populate_fs_key(ice, &key); + struct iris_screen *screen = (struct iris_screen *)ctx->screen; + const struct gen_device_info *devinfo = &screen->devinfo; - if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP)) - key.input_slots_valid = ice->shaders.last_vue_map->slots_valid; + struct iris_uncompiled_shader *ish = + calloc(1, sizeof(struct iris_uncompiled_shader)); + if (!ish) + return NULL; - struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS]; - struct iris_compiled_shader *shader = - iris_find_cached_shader(ice, IRIS_CACHE_FS, sizeof(key), &key); + nir = brw_preprocess_nir(screen->compiler, nir); - if (!shader) - shader = iris_compile_fs(ice, ish, &key, ice->shaders.last_vue_map); + NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo); + NIR_PASS_V(nir, iris_lower_storage_image_derefs); - if (old != shader) { - // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE - // toggles. might be able to avoid flagging SBE too. - ice->shaders.prog[IRIS_CACHE_FS] = shader; - ice->state.dirty |= IRIS_DIRTY_FS | - IRIS_DIRTY_BINDINGS_FS | - IRIS_DIRTY_CONSTANTS_FS | - IRIS_DIRTY_WM | - IRIS_DIRTY_CLIP | - IRIS_DIRTY_SBE; + ish->program_id = get_new_program_id(screen); + ish->nir = nir; + if (so_info) { + memcpy(&ish->stream_output, so_info, sizeof(*so_info)); + update_so_info(&ish->stream_output, nir->info.outputs_written); } + + return ish; } -/** - * Get the compiled shader for the last enabled geometry stage. - * - * This stage is the one which will feed stream output and the rasterizer. - */ -static gl_shader_stage -last_vue_stage(struct iris_context *ice) +static struct iris_uncompiled_shader * +iris_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) { - if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) - return MESA_SHADER_GEOMETRY; - - if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) - return MESA_SHADER_TESS_EVAL; + assert(state->type == PIPE_SHADER_IR_NIR); - return MESA_SHADER_VERTEX; + return iris_create_uncompiled_shader(ctx, state->ir.nir, + &state->stream_output); } -/** - * Update the last enabled stage's VUE map. - * - * When the shader feeding the rasterizer's output interface changes, we - * need to re-emit various packets. - */ -static void -update_last_vue_map(struct iris_context *ice, - struct brw_stage_prog_data *prog_data) +static void * +iris_create_vs_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) { - struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; - struct brw_vue_map *vue_map = &vue_prog_data->vue_map; - struct brw_vue_map *old_map = ice->shaders.last_vue_map; - const uint64_t changed_slots = - (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid; + struct iris_context *ice = (void *) ctx; + struct iris_screen *screen = (void *) ctx->screen; + struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); - if (changed_slots & VARYING_BIT_VIEWPORT) { - // XXX: could use ctx->Const.MaxViewports for old API efficiency - ice->state.num_viewports = - (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1; - ice->state.dirty |= IRIS_DIRTY_CLIP | - IRIS_DIRTY_SF_CL_VIEWPORT | - IRIS_DIRTY_CC_VIEWPORT | - IRIS_DIRTY_SCISSOR_RECT | - IRIS_DIRTY_UNCOMPILED_FS | - ice->state.dirty_for_nos[IRIS_NOS_LAST_VUE_MAP]; - // XXX: CC_VIEWPORT? - } + /* User clip planes */ + if (ish->nir->info.clip_distance_array_size == 0) + ish->nos |= (1ull << IRIS_NOS_RASTERIZER); - if (changed_slots || (old_map && old_map->separate != vue_map->separate)) { - ice->state.dirty |= IRIS_DIRTY_SBE; + if (screen->precompile) { + struct brw_vs_prog_key key = { KEY_INIT }; + + iris_compile_vs(ice, ish, &key); } - ice->shaders.last_vue_map = &vue_prog_data->vue_map; + return ish; } -/** - * Get the prog_data for a given stage, or NULL if the stage is disabled. - */ -static struct brw_vue_prog_data * -get_vue_prog_data(struct iris_context *ice, gl_shader_stage stage) +static void * +iris_create_tcs_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) { - if (!ice->shaders.prog[stage]) - return NULL; + struct iris_context *ice = (void *) ctx; + struct iris_screen *screen = (void *) ctx->screen; + struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); + struct shader_info *info = &ish->nir->info; - return (void *) ice->shaders.prog[stage]->prog_data; -} + // XXX: NOS? -// XXX: iris_compiled_shaders are space-leaking :( -// XXX: do remember to unbind them if deleting them. + if (screen->precompile) { + const unsigned _GL_TRIANGLES = 0x0004; + struct brw_tcs_prog_key key = { + KEY_INIT, + // XXX: make sure the linker fills this out from the TES... + .tes_primitive_mode = + info->tess.primitive_mode ? info->tess.primitive_mode + : _GL_TRIANGLES, + .outputs_written = info->outputs_written, + .patch_outputs_written = info->patch_outputs_written, + }; -/** - * Update the current shader variants for the given state. - * - * This should be called on every draw call to ensure that the correct - * shaders are bound. It will also flag any dirty state triggered by - * swapping out those shaders. - */ -void -iris_update_compiled_shaders(struct iris_context *ice) + iris_compile_tcs(ice, ish, &key); + } + + return ish; +} + +static void * +iris_create_tes_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) { - const uint64_t dirty = ice->state.dirty; + struct iris_context *ice = (void *) ctx; + struct iris_screen *screen = (void *) ctx->screen; + struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); + struct shader_info *info = &ish->nir->info; - struct brw_vue_prog_data *old_prog_datas[4]; - if (!(dirty & IRIS_DIRTY_URB)) { - for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) - old_prog_datas[i] = get_vue_prog_data(ice, i); - } + // XXX: NOS? - if (dirty & (IRIS_DIRTY_UNCOMPILED_TCS | IRIS_DIRTY_UNCOMPILED_TES)) { - struct iris_uncompiled_shader *tes = - ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; - if (tes) { - iris_update_compiled_tcs(ice); - iris_update_compiled_tes(ice); - } else { - ice->shaders.prog[IRIS_CACHE_TCS] = NULL; - ice->shaders.prog[IRIS_CACHE_TES] = NULL; - ice->state.dirty |= - IRIS_DIRTY_TCS | IRIS_DIRTY_TES | - IRIS_DIRTY_BINDINGS_TCS | IRIS_DIRTY_BINDINGS_TES | - IRIS_DIRTY_CONSTANTS_TCS | IRIS_DIRTY_CONSTANTS_TES; - } + if (screen->precompile) { + struct brw_tes_prog_key key = { + KEY_INIT, + // XXX: not ideal, need TCS output/TES input unification + .inputs_read = info->inputs_read, + .patch_inputs_read = info->patch_inputs_read, + }; + + iris_compile_tes(ice, ish, &key); } - if (dirty & IRIS_DIRTY_UNCOMPILED_VS) - iris_update_compiled_vs(ice); - if (dirty & IRIS_DIRTY_UNCOMPILED_GS) - iris_update_compiled_gs(ice); + return ish; +} - gl_shader_stage last_stage = last_vue_stage(ice); - struct iris_compiled_shader *shader = ice->shaders.prog[last_stage]; - struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage]; - update_last_vue_map(ice, shader->prog_data); - if (ice->state.streamout != shader->streamout) { - ice->state.streamout = shader->streamout; - ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT; - } +static void * +iris_create_gs_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct iris_context *ice = (void *) ctx; + struct iris_screen *screen = (void *) ctx->screen; + struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); - if (ice->state.streamout_active) { - for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - struct iris_stream_output_target *so = - (void *) ice->state.so_target[i]; - if (so) - so->stride = ish->stream_output.stride[i]; - } - } + // XXX: NOS? - if (dirty & IRIS_DIRTY_UNCOMPILED_FS) - iris_update_compiled_fs(ice); - // ... + if (screen->precompile) { + struct brw_gs_prog_key key = { KEY_INIT }; - /* Changing shader interfaces may require a URB configuration. */ - if (!(dirty & IRIS_DIRTY_URB)) { - for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { - struct brw_vue_prog_data *old = old_prog_datas[i]; - struct brw_vue_prog_data *new = get_vue_prog_data(ice, i); - if (!!old != !!new || - (new && new->urb_entry_size != old->urb_entry_size)) { - ice->state.dirty |= IRIS_DIRTY_URB; - break; - } - } + iris_compile_gs(ice, ish, &key); } + + return ish; } -static struct iris_compiled_shader * -iris_compile_cs(struct iris_context *ice, - struct iris_uncompiled_shader *ish, - const struct brw_cs_prog_key *key) +static void * +iris_create_fs_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) { - struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; - const struct brw_compiler *compiler = screen->compiler; - const struct gen_device_info *devinfo = &screen->devinfo; - void *mem_ctx = ralloc_context(NULL); - struct brw_cs_prog_data *cs_prog_data = - rzalloc(mem_ctx, struct brw_cs_prog_data); - struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - enum brw_param_builtin *system_values; - unsigned num_system_values; - unsigned num_cbufs; + struct iris_context *ice = (void *) ctx; + struct iris_screen *screen = (void *) ctx->screen; + struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); + struct shader_info *info = &ish->nir->info; - nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir); + ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) | + (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) | + (1ull << IRIS_NOS_RASTERIZER) | + (1ull << IRIS_NOS_BLEND); - cs_prog_data->binding_table.work_groups_start = 0; + /* The program key needs the VUE map if there are > 16 inputs */ + if (util_bitcount64(ish->nir->info.inputs_read & + BRW_FS_VARYING_INPUT_MASK) > 16) { + ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP); + } - prog_data->total_shared = nir->info.cs.shared_size; + if (screen->precompile) { + const uint64_t color_outputs = info->outputs_written & + ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | + BITFIELD64_BIT(FRAG_RESULT_STENCIL) | + BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)); - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, - &num_system_values, &num_cbufs); + bool can_rearrange_varyings = + util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16; - assign_common_binding_table_offsets(devinfo, nir, prog_data, 1, - num_system_values, num_cbufs); + struct brw_wm_prog_key key = { + KEY_INIT, + .nr_color_regions = util_bitcount(color_outputs), + .coherent_fb_fetch = true, + .input_slots_valid = + can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS, + }; - char *error_str = NULL; - const unsigned *program = - brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data, - nir, -1, &error_str); - if (program == NULL) { - dbg_printf("Failed to compile compute shader: %s\n", error_str); - ralloc_free(mem_ctx); - return false; + iris_compile_fs(ice, ish, &key, NULL); } - struct iris_compiled_shader *shader = - iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program, - prog_data, NULL, system_values, num_system_values, - num_cbufs); + return ish; +} - if (ish->compiled_once) { - perf_debug(&ice->dbg, "Recompiling compute shader\n"); - } else { - ish->compiled_once = true; +static void * +iris_create_compute_state(struct pipe_context *ctx, + const struct pipe_compute_state *state) +{ + assert(state->ir_type == PIPE_SHADER_IR_NIR); + + struct iris_context *ice = (void *) ctx; + struct iris_screen *screen = (void *) ctx->screen; + struct iris_uncompiled_shader *ish = + iris_create_uncompiled_shader(ctx, (void *) state->prog, NULL); + + // XXX: disallow more than 64KB of shared variables + + if (screen->precompile) { + struct brw_cs_prog_key key = { KEY_INIT }; + + iris_compile_cs(ice, ish, &key); } - ralloc_free(mem_ctx); - return shader; + return ish; } -void -iris_update_compiled_compute_shader(struct iris_context *ice) +/** + * The pipe->delete_[stage]_state() driver hooks. + * + * Frees the iris_uncompiled_shader. + */ +static void +iris_delete_shader_state(struct pipe_context *ctx, void *state) { - struct iris_uncompiled_shader *ish = - ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; + struct iris_uncompiled_shader *ish = state; - struct brw_cs_prog_key key = { KEY_INIT }; - ice->vtbl.populate_cs_key(ice, &key); + ralloc_free(ish->nir); + free(ish); +} - struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS]; - struct iris_compiled_shader *shader = - iris_find_cached_shader(ice, IRIS_CACHE_CS, sizeof(key), &key); +/** + * The pipe->bind_[stage]_state() driver hook. + * + * Binds an uncompiled shader as the current one for a particular stage. + * Updates dirty tracking to account for the shader's NOS. + */ +static void +bind_state(struct iris_context *ice, + struct iris_uncompiled_shader *ish, + gl_shader_stage stage) +{ + uint64_t dirty_bit = IRIS_DIRTY_UNCOMPILED_VS << stage; + const uint64_t nos = ish ? ish->nos : 0; - if (!shader) - shader = iris_compile_cs(ice, ish, &key); + ice->shaders.uncompiled[stage] = ish; + ice->state.dirty |= dirty_bit; - if (old != shader) { - ice->shaders.prog[IRIS_CACHE_CS] = shader; - ice->state.dirty |= IRIS_DIRTY_CS | - IRIS_DIRTY_BINDINGS_CS | - IRIS_DIRTY_CONSTANTS_CS; + /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change + * (or that they no longer need to do so). + */ + for (int i = 0; i < IRIS_NOS_COUNT; i++) { + if (nos & (1 << i)) + ice->state.dirty_for_nos[i] |= dirty_bit; + else + ice->state.dirty_for_nos[i] &= ~dirty_bit; } } -void -iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data, - uint32_t *dst) +static void +iris_bind_vs_state(struct pipe_context *ctx, void *state) { - struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - assert(cs_prog_data->push.total.size > 0); - assert(cs_prog_data->push.cross_thread.size == 0); - assert(cs_prog_data->push.per_thread.dwords == 1); - assert(prog_data->param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); - for (unsigned t = 0; t < cs_prog_data->threads; t++) - dst[8 * t] = t; + bind_state((void *) ctx, state, MESA_SHADER_VERTEX); } -/** - * Allocate scratch BOs as needed for the given per-thread size and stage. - */ -struct iris_bo * -iris_get_scratch_space(struct iris_context *ice, - unsigned per_thread_scratch, - gl_shader_stage stage) +static void +iris_bind_tcs_state(struct pipe_context *ctx, void *state) { - struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; - struct iris_bufmgr *bufmgr = screen->bufmgr; - const struct gen_device_info *devinfo = &screen->devinfo; + bind_state((void *) ctx, state, MESA_SHADER_TESS_CTRL); +} - unsigned encoded_size = ffs(per_thread_scratch) - 11; - assert(encoded_size < (1 << 16)); +static void +iris_bind_tes_state(struct pipe_context *ctx, void *state) +{ + struct iris_context *ice = (struct iris_context *)ctx; - struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; + /* Enabling/disabling optional stages requires a URB reconfiguration. */ + if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) + ice->state.dirty |= IRIS_DIRTY_URB; - /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: - * - * "Scratch Space per slice is computed based on 4 sub-slices. SW - * must allocate scratch space enough so that each slice has 4 - * slices allowed." - * - * According to the other driver team, this applies to compute shaders - * as well. This is not currently documented at all. - * - * This hack is no longer necessary on Gen11+. - */ - unsigned subslice_total = screen->subslice_total; - if (devinfo->gen < 11) - subslice_total = 4 * devinfo->num_slices; - assert(subslice_total >= screen->subslice_total); + bind_state((void *) ctx, state, MESA_SHADER_TESS_EVAL); +} - if (!*bop) { - unsigned scratch_ids_per_subslice = devinfo->max_cs_threads; - uint32_t max_threads[] = { - [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads, - [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads, - [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, - [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, - [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslice_total, - }; +static void +iris_bind_gs_state(struct pipe_context *ctx, void *state) +{ + struct iris_context *ice = (struct iris_context *)ctx; - uint32_t size = per_thread_scratch * max_threads[stage]; + /* Enabling/disabling optional stages requires a URB reconfiguration. */ + if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) + ice->state.dirty |= IRIS_DIRTY_URB; - *bop = iris_bo_alloc(bufmgr, "scratch", size, IRIS_MEMZONE_SHADER); - } + bind_state((void *) ctx, state, MESA_SHADER_GEOMETRY); +} - return *bop; +static void +iris_bind_fs_state(struct pipe_context *ctx, void *state) +{ + struct iris_context *ice = (struct iris_context *) ctx; + struct iris_uncompiled_shader *old_ish = + ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; + struct iris_uncompiled_shader *new_ish = state; + + const unsigned color_bits = + BITFIELD64_BIT(FRAG_RESULT_COLOR) | + BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS); + + /* Fragment shader outputs influence HasWriteableRT */ + if (!old_ish || !new_ish || + (old_ish->nir->info.outputs_written & color_bits) != + (new_ish->nir->info.outputs_written & color_bits)) + ice->state.dirty |= IRIS_DIRTY_PS_BLEND; + + bind_state((void *) ctx, state, MESA_SHADER_FRAGMENT); +} + +static void +iris_bind_cs_state(struct pipe_context *ctx, void *state) +{ + bind_state((void *) ctx, state, MESA_SHADER_COMPUTE); } void -- 2.11.0