From 9e62aec9cd4853016b4d03a56b5756111a312d65 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 21 Mar 2018 15:18:34 -0700 Subject: [PATCH] broadcom/vc5: Limit each transform feedback data spec to 16 dwords. The length-1 field only has 4 bits, so we need to generate separate specs when there's too much TF output per buffer. Fixes GTF-GLES3.gtf.GL3Tests.transform_feedback.transform_feedback_builtin_type and transform_feedback_max_interleaved. --- src/gallium/drivers/vc5/vc5_context.h | 2 +- src/gallium/drivers/vc5/vc5_program.c | 43 ++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h index 1ab5a6b1532..976fba90f81 100644 --- a/src/gallium/drivers/vc5/vc5_context.h +++ b/src/gallium/drivers/vc5/vc5_context.h @@ -130,7 +130,7 @@ struct vc5_uncompiled_shader { struct pipe_shader_state base; uint32_t num_tf_outputs; struct v3d_varying_slot *tf_outputs; - uint16_t tf_specs[PIPE_MAX_SO_BUFFERS]; + uint16_t tf_specs[16]; uint32_t num_tf_specs; /** diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c index 87c21abe8b1..a7a089510b2 100644 --- a/src/gallium/drivers/vc5/vc5_program.c +++ b/src/gallium/drivers/vc5/vc5_program.c @@ -49,6 +49,14 @@ vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) return -1; } +/** + * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader. + * + * A shader can have 16 of these specs, and each one of them can write up to + * 16 dwords. Since we allow a total of 64 transform feedback output + * components (not 16 vectors), we have to group the writes of multiple + * varyings together in a single data spec. + */ static void vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so, const struct pipe_stream_output_info *stream_output) @@ -102,19 +110,28 @@ vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so, if (!vpm_size) continue; - struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { - /* We need the offset from the coordinate shader's VPM - * output block, which has the [X, Y, Z, W, Xs, Ys] - * values at the start. Note that this will need some - * shifting when PSIZ is also present. - */ - .first_shaded_vertex_value_to_output = vpm_start + 6, - .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = vpm_size - 1, - .output_buffer_to_write_to = buffer, - }; - V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, - (void *)&so->tf_specs[so->num_tf_specs++], - &unpacked); + uint32_t vpm_start_offset = vpm_start + 6; + + while (vpm_size) { + uint32_t write_size = MIN2(vpm_size, 1 << 4); + + struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = { + /* We need the offset from the coordinate shader's VPM + * output block, which has the [X, Y, Z, W, Xs, Ys] + * values at the start. + */ + .first_shaded_vertex_value_to_output = vpm_start_offset, + .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1, + .output_buffer_to_write_to = buffer, + }; + + assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs)); + V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL, + (void *)&so->tf_specs[so->num_tf_specs++], + &unpacked); + vpm_start_offset += write_size; + vpm_size -= write_size; + } } so->num_tf_outputs = slot_count; -- 2.11.0