From 9e62aec9cd4853016b4d03a56b5756111a312d65 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 21 Mar 2018 15:18:34 -0700
Subject: [PATCH] broadcom/vc5: Limit each transform feedback data spec to 16
 dwords.

The length-1 field only has 4 bits, so we need to generate separate specs
when there's too much TF output per buffer.

Fixes
GTF-GLES3.gtf.GL3Tests.transform_feedback.transform_feedback_builtin_type
and transform_feedback_max_interleaved.
---
 src/gallium/drivers/vc5/vc5_context.h |  2 +-
 src/gallium/drivers/vc5/vc5_program.c | 43 ++++++++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h
index 1ab5a6b1532..976fba90f81 100644
--- a/src/gallium/drivers/vc5/vc5_context.h
+++ b/src/gallium/drivers/vc5/vc5_context.h
@@ -130,7 +130,7 @@ struct vc5_uncompiled_shader {
         struct pipe_shader_state base;
         uint32_t num_tf_outputs;
         struct v3d_varying_slot *tf_outputs;
-        uint16_t tf_specs[PIPE_MAX_SO_BUFFERS];
+        uint16_t tf_specs[16];
         uint32_t num_tf_specs;
 
         /**
diff --git a/src/gallium/drivers/vc5/vc5_program.c b/src/gallium/drivers/vc5/vc5_program.c
index 87c21abe8b1..a7a089510b2 100644
--- a/src/gallium/drivers/vc5/vc5_program.c
+++ b/src/gallium/drivers/vc5/vc5_program.c
@@ -49,6 +49,14 @@ vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
         return -1;
 }
 
+/**
+ * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader.
+ *
+ * A shader can have 16 of these specs, and each one of them can write up to
+ * 16 dwords.  Since we allow a total of 64 transform feedback output
+ * components (not 16 vectors), we have to group the writes of multiple
+ * varyings together in a single data spec.
+ */
 static void
 vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
                                    const struct pipe_stream_output_info *stream_output)
@@ -102,19 +110,28 @@ vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
                 if (!vpm_size)
                         continue;
 
-                struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
-                        /* We need the offset from the coordinate shader's VPM
-                         * output block, which has the [X, Y, Z, W, Xs, Ys]
-                         * values at the start.  Note that this will need some
-                         * shifting when PSIZ is also present.
-                         */
-                        .first_shaded_vertex_value_to_output = vpm_start + 6,
-                        .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = vpm_size - 1,
-                        .output_buffer_to_write_to = buffer,
-                };
-                V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
-                                                               (void *)&so->tf_specs[so->num_tf_specs++],
-                                                               &unpacked);
+                uint32_t vpm_start_offset = vpm_start + 6;
+
+                while (vpm_size) {
+                        uint32_t write_size = MIN2(vpm_size, 1 << 4);
+
+                        struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+                                /* We need the offset from the coordinate shader's VPM
+                                 * output block, which has the [X, Y, Z, W, Xs, Ys]
+                                 * values at the start.
+                                 */
+                                .first_shaded_vertex_value_to_output = vpm_start_offset,
+                                .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1,
+                                .output_buffer_to_write_to = buffer,
+                        };
+
+                        assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
+                        V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                                                                       (void *)&so->tf_specs[so->num_tf_specs++],
+                                                                       &unpacked);
+                        vpm_start_offset += write_size;
+                        vpm_size -= write_size;
+                }
         }
 
         so->num_tf_outputs = slot_count;
-- 
2.11.0