OSDN Git Service

0706da613cb0992a536d22450a58f93d59288dde
[android-x86/external-mesa.git] / src / gallium / drivers / vc5 / vc5_rcl.c
1 /*
2  * Copyright © 2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include "util/u_format.h"
25 #include "vc5_context.h"
26 #include "vc5_tiling.h"
27 #include "broadcom/cle/v3d_packet_v33_pack.h"
28
29 static void
30 vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job)
31 {
32         /* Emit the generic list in our indirect state -- the rcl will just
33          * have pointers into it.
34          */
35         struct vc5_cl *cl = &job->indirect;
36         vc5_cl_ensure_space(cl, 200, 1);
37         struct vc5_cl_reloc tile_list_start = cl_get_address(cl);
38
39         const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 |
40                                                    PIPE_CLEAR_COLOR1 |
41                                                    PIPE_CLEAR_COLOR2 |
42                                                    PIPE_CLEAR_COLOR3);
43         const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1);
44
45         uint32_t read_but_not_cleared = job->resolve & ~job->cleared;
46
47         /* The initial reload will be queued until we get the
48          * tile coordinates.
49          */
50         if (read_but_not_cleared) {
51                 cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) {
52                         load.disable_colour_buffer_load =
53                                 (~read_but_not_cleared & pipe_clear_color_buffers) >>
54                                 first_color_buffer_bit;
55                         load.enable_z_load =
56                                 read_but_not_cleared & PIPE_CLEAR_DEPTH;
57                         load.enable_stencil_load =
58                                 read_but_not_cleared & PIPE_CLEAR_STENCIL;
59                 }
60         }
61
62         /* Tile Coordinates triggers the reload and sets where the stores
63          * go. There must be one per store packet.
64          */
65         cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
66
67         cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
68
69         cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
70                 uint32_t color_write_enables =
71                         job->resolve >> first_color_buffer_bit;
72
73                 store.disable_color_buffer_write = (~color_write_enables) & 0xf;
74                 store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
75                 store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
76
77                 store.disable_colour_buffers_clear_on_write =
78                         (job->cleared & pipe_clear_color_buffers) == 0;
79                 store.disable_z_buffer_clear_on_write =
80                         !(job->cleared & PIPE_CLEAR_DEPTH);
81                 store.disable_stencil_buffer_clear_on_write =
82                         !(job->cleared & PIPE_CLEAR_STENCIL);
83         };
84
85         cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
86
87         cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
88                 branch.start = tile_list_start;
89                 branch.end = cl_get_address(cl);
90         }
91 }
92
93 #define div_round_up(a, b) (((a) + (b) - 1) / b)
94
95 void
96 vc5_emit_rcl(struct vc5_job *job)
97 {
98         /* The RCL list should be empty. */
99         assert(!job->rcl.bo);
100
101         vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 *
102                                         cl_packet_length(SUPERTILE_COORDINATES));
103         job->submit.rcl_start = job->rcl.bo->offset;
104         vc5_job_add_bo(job, job->rcl.bo);
105
106         int nr_cbufs = 0;
107         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
108                 if (job->cbufs[i])
109                         nr_cbufs = i + 1;
110         }
111
112         /* Comon config must be the first TILE_RENDERING_MODE_CONFIGURATION
113          * and Z_STENCIL_CLEAR_VALUES must be last.  The ones in between are
114          * optional updates to the previous HW state.
115          */
116         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION,
117                 config) {
118                 config.enable_z_store = job->resolve & PIPE_CLEAR_DEPTH;
119                 config.enable_stencil_store = job->resolve & PIPE_CLEAR_STENCIL;
120
121                 config.early_z_disable = !job->uses_early_z;
122
123                 config.image_width_pixels = job->draw_width;
124                 config.image_height_pixels = job->draw_height;
125
126                 config.number_of_render_targets_minus_1 =
127                         MAX2(nr_cbufs, 1) - 1;
128
129                 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
130         }
131
132         for (int i = 0; i < nr_cbufs; i++) {
133                 struct pipe_surface *psurf = job->cbufs[i];
134                 if (!psurf)
135                         continue;
136
137                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG, rt) {
138                         struct vc5_surface *surf = vc5_surface(psurf);
139                         struct vc5_resource *rsc = vc5_resource(psurf->texture);
140                         rt.address = cl_address(rsc->bo, surf->offset);
141                         rt.internal_type = surf->internal_type;
142                         rt.output_image_format = surf->format;
143                         rt.memory_format = surf->tiling;
144                         rt.internal_bpp = surf->internal_bpp;
145                         rt.render_target_number = i;
146
147                         if (job->resolve & PIPE_CLEAR_COLOR0 << i)
148                                 rsc->writes++;
149                 }
150
151                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1,
152                         clear) {
153                         clear.clear_color_low_32_bits = job->clear_color[i][0];
154                         clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
155                         clear.render_target_number = i;
156                 };
157
158                 if (util_format_get_blocksize(psurf->format) > 7) {
159                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2,
160                                 clear) {
161                                 clear.clear_color_mid_low_32_bits =
162                                         ((job->clear_color[i][1] >> 24) |
163                                          (job->clear_color[i][2] << 8));
164                                 clear.clear_color_mid_high_24_bits =
165                                         ((job->clear_color[i][2] >> 24) |
166                                          ((job->clear_color[i][3] & 0xffff) << 8));
167                                 clear.render_target_number = i;
168                         };
169                 }
170
171                 if (util_format_get_blocksize(psurf->format) > 14) {
172                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3,
173                                 clear) {
174                                 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
175                                 clear.render_target_number = i;
176                         };
177                 }
178         }
179
180         /* TODO: Don't bother emitting if we don't load/clear Z/S. */
181         if (job->zsbuf) {
182                 struct pipe_surface *psurf = job->zsbuf;
183                 struct vc5_surface *surf = vc5_surface(psurf);
184                 struct vc5_resource *rsc = vc5_resource(psurf->texture);
185
186                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG, zs) {
187                         zs.address = cl_address(rsc->bo, surf->offset);
188
189                         zs.internal_type = surf->internal_type;
190                         zs.output_image_format = surf->format;
191
192                         struct vc5_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
193                         /* XXX */
194                         zs.padded_height_of_output_image_in_uif_blocks =
195                                 (slice->size / slice->stride) / (2 * vc5_utile_height(rsc->cpp));
196
197                         assert(surf->tiling != VC5_TILING_RASTER);
198                         zs.memory_format = surf->tiling;
199                 }
200
201                 if (job->resolve & PIPE_CLEAR_DEPTHSTENCIL)
202                         rsc->writes++;
203         }
204
205         /* Ends rendering mode config. */
206         cl_emit(&job->rcl, TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES,
207                 clear) {
208                 clear.z_clear_value = job->clear_z;
209                 clear.stencil_vg_mask_clear_value = job->clear_s;
210         };
211
212         /* Always set initial block size before the first branch, which needs
213          * to match the value from binning mode config.
214          */
215         cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
216                 init.use_auto_chained_tile_lists = true;
217                 init.size_of_first_block_in_chained_tile_lists =
218                         TILE_ALLOCATION_BLOCK_SIZE_64B;
219         }
220
221         uint32_t supertile_w = 1, supertile_h = 1;
222
223         /* If doing multicore binning, we would need to initialize each core's
224          * tile list here.
225          */
226         cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
227                 list.address = cl_address(job->tile_alloc, 0);
228         }
229
230         cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) {
231                 uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
232                 const uint32_t max_supertiles = 256;
233
234                 /* Size up our supertiles until we get under the limit. */
235                 for (;;) {
236                         frame_w_in_supertiles = div_round_up(job->draw_tiles_x,
237                                                              supertile_w);
238                         frame_h_in_supertiles = div_round_up(job->draw_tiles_y,
239                                                              supertile_h);
240                         if (frame_w_in_supertiles * frame_h_in_supertiles <
241                             max_supertiles) {
242                                 break;
243                         }
244
245                         if (supertile_w < supertile_h)
246                                 supertile_w++;
247                         else
248                                 supertile_h++;
249                 }
250
251                 config.total_frame_width_in_tiles = job->draw_tiles_x;
252                 config.total_frame_height_in_tiles = job->draw_tiles_y;
253
254                 config.supertile_width_in_tiles_minus_1 = supertile_w - 1;
255                 config.supertile_height_in_tiles_minus_1 = supertile_h - 1;
256
257                 config.total_frame_width_in_supertiles = frame_w_in_supertiles;
258                 config.total_frame_height_in_supertiles = frame_h_in_supertiles;
259         }
260
261         /* Start by clearing the tile buffer. */
262         cl_emit(&job->rcl, TILE_COORDINATES, coords) {
263                 coords.tile_column_number = 0;
264                 coords.tile_row_number = 0;
265         }
266
267         cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
268                 store.buffer_to_store = NONE;
269         }
270
271         cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
272
273         vc5_rcl_emit_generic_per_tile_list(job);
274
275         cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
276
277         /* XXX: Use Morton order */
278         uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
279         uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
280         uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
281         uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
282         uint32_t max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
283         uint32_t max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
284
285         for (int y = min_y_supertile; y <= max_y_supertile; y++) {
286                 for (int x = min_x_supertile; x <= max_x_supertile; x++) {
287                         cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
288                                 coords.column_number_in_supertiles = x;
289                                 coords.row_number_in_supertiles = y;
290                         }
291                 }
292         }
293
294         cl_emit(&job->rcl, END_OF_RENDERING, end);
295 }