OSDN Git Service

broadcom/vc5: Make sure the TMU indirect struct is appropriately aligned.
[android-x86/external-mesa.git] / src / gallium / drivers / vc5 / vc5_emit.c
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include "util/u_format.h"
25 #include "util/u_half.h"
26 #include "vc5_context.h"
27 #include "broadcom/cle/v3d_packet_v33_pack.h"
28 #include "broadcom/compiler/v3d_compiler.h"
29
30 static uint8_t
31 vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
32 {
33         /* We may get a bad blendfactor when blending is disabled. */
34         if (factor == 0)
35                 return V3D_BLEND_FACTOR_ZERO;
36
37         switch (factor) {
38         case PIPE_BLENDFACTOR_ZERO:
39                 return V3D_BLEND_FACTOR_ZERO;
40         case PIPE_BLENDFACTOR_ONE:
41                 return V3D_BLEND_FACTOR_ONE;
42         case PIPE_BLENDFACTOR_SRC_COLOR:
43                 return V3D_BLEND_FACTOR_SRC_COLOR;
44         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
45                 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
46         case PIPE_BLENDFACTOR_DST_COLOR:
47                 return V3D_BLEND_FACTOR_DST_COLOR;
48         case PIPE_BLENDFACTOR_INV_DST_COLOR:
49                 return V3D_BLEND_FACTOR_INV_DST_COLOR;
50         case PIPE_BLENDFACTOR_SRC_ALPHA:
51                 return V3D_BLEND_FACTOR_SRC_ALPHA;
52         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
53                 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
54         case PIPE_BLENDFACTOR_DST_ALPHA:
55                 return (dst_alpha_one ?
56                         V3D_BLEND_FACTOR_ONE :
57                         V3D_BLEND_FACTOR_DST_ALPHA);
58         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
59                 return (dst_alpha_one ?
60                         V3D_BLEND_FACTOR_ZERO :
61                         V3D_BLEND_FACTOR_INV_DST_ALPHA);
62         case PIPE_BLENDFACTOR_CONST_COLOR:
63                 return V3D_BLEND_FACTOR_CONST_COLOR;
64         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
65                 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
66         case PIPE_BLENDFACTOR_CONST_ALPHA:
67                 return V3D_BLEND_FACTOR_CONST_ALPHA;
68         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
69                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
70         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
71                 return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
72         default:
73                 unreachable("Bad blend factor");
74         }
75 }
76
77 static inline uint16_t
78 swizzled_border_color(struct pipe_sampler_state *sampler,
79                       struct vc5_sampler_view *sview,
80                       int chan)
81 {
82         const struct util_format_description *desc =
83                 util_format_description(sview->base.format);
84         uint8_t swiz = chan;
85
86         /* If we're doing swizzling in the sampler, then only rearrange the
87          * border color for the mismatch between the VC5 texture format and
88          * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
89          * the sampler's swizzle.
90          *
91          * For swizzling in the shader, we don't do any pre-swizzling of the
92          * border color.
93          */
94         if (vc5_get_tex_return_size(sview->base.format) != 32)
95                 swiz = desc->swizzle[swiz];
96
97         switch (swiz) {
98         case PIPE_SWIZZLE_0:
99                 return util_float_to_half(0.0);
100         case PIPE_SWIZZLE_1:
101                 return util_float_to_half(1.0);
102         default:
103                 return util_float_to_half(sampler->border_color.f[swiz]);
104         }
105 }
106
107 static void
108 emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
109                  int i)
110 {
111         struct vc5_job *job = vc5->job;
112         struct pipe_sampler_state *psampler = stage_tex->samplers[i];
113         struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
114         struct pipe_sampler_view *psview = stage_tex->textures[i];
115         struct vc5_sampler_view *sview = vc5_sampler_view(psview);
116         struct pipe_resource *prsc = psview->texture;
117         struct vc5_resource *rsc = vc5_resource(prsc);
118
119         stage_tex->texture_state[i].offset =
120                 vc5_cl_ensure_space(&job->indirect,
121                                     cl_packet_length(TEXTURE_SHADER_STATE),
122                                     32);
123         vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
124                              job->indirect.bo);
125
126         struct V3D33_TEXTURE_SHADER_STATE unpacked = {
127                 /* XXX */
128                 .border_color_red = swizzled_border_color(psampler, sview, 0),
129                 .border_color_green = swizzled_border_color(psampler, sview, 1),
130                 .border_color_blue = swizzled_border_color(psampler, sview, 2),
131                 .border_color_alpha = swizzled_border_color(psampler, sview, 3),
132
133                 /* XXX: Disable min/maxlod for txf */
134                 .max_level_of_detail = MIN2(MIN2(psampler->max_lod,
135                                                  VC5_MAX_MIP_LEVELS),
136                                             psview->u.tex.last_level),
137
138                 .texture_base_pointer = cl_address(rsc->bo,
139                                                    rsc->slices[0].offset),
140         };
141
142         int min_img_filter = psampler->min_img_filter;
143         int min_mip_filter = psampler->min_mip_filter;
144         int mag_img_filter = psampler->mag_img_filter;
145
146         if (vc5_get_tex_return_size(psview->format) == 32) {
147                 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
148                 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
149                 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
150         }
151
152         bool min_nearest = (min_img_filter == PIPE_TEX_FILTER_NEAREST);
153         switch (min_mip_filter) {
154         case PIPE_TEX_MIPFILTER_NONE:
155                 unpacked.minification_filter = 0 + min_nearest;
156                 break;
157         case PIPE_TEX_MIPFILTER_NEAREST:
158                 unpacked.minification_filter = 2 + !min_nearest;
159                 break;
160         case PIPE_TEX_MIPFILTER_LINEAR:
161                 unpacked.minification_filter = 4 + !min_nearest;
162                 break;
163         }
164         unpacked.magnification_filter = (mag_img_filter ==
165                                          PIPE_TEX_FILTER_NEAREST);
166
167         uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
168         cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
169
170         for (int i = 0; i < ARRAY_SIZE(packed); i++)
171                 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
172
173         /* TMU indirect structs need to be 32b aligned. */
174         vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
175         cl_emit_prepacked(&job->indirect, &packed);
176 }
177
178 static void
179 emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
180 {
181         for (int i = 0; i < stage_tex->num_textures; i++)
182                 emit_one_texture(vc5, stage_tex, i);
183 }
184
185 void
186 vc5_emit_state(struct pipe_context *pctx)
187 {
188         struct vc5_context *vc5 = vc5_context(pctx);
189         struct vc5_job *job = vc5->job;
190
191         if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
192                           VC5_DIRTY_RASTERIZER)) {
193                 float *vpscale = vc5->viewport.scale;
194                 float *vptranslate = vc5->viewport.translate;
195                 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
196                 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
197                 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
198                 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
199
200                 /* Clip to the scissor if it's enabled, but still clip to the
201                  * drawable regardless since that controls where the binner
202                  * tries to put things.
203                  *
204                  * Additionally, always clip the rendering to the viewport,
205                  * since the hardware does guardband clipping, meaning
206                  * primitives would rasterize outside of the view volume.
207                  */
208                 uint32_t minx, miny, maxx, maxy;
209                 if (!vc5->rasterizer->base.scissor) {
210                         minx = MAX2(vp_minx, 0);
211                         miny = MAX2(vp_miny, 0);
212                         maxx = MIN2(vp_maxx, job->draw_width);
213                         maxy = MIN2(vp_maxy, job->draw_height);
214                 } else {
215                         minx = MAX2(vp_minx, vc5->scissor.minx);
216                         miny = MAX2(vp_miny, vc5->scissor.miny);
217                         maxx = MIN2(vp_maxx, vc5->scissor.maxx);
218                         maxy = MIN2(vp_maxy, vc5->scissor.maxy);
219                 }
220
221                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
222                         clip.clip_window_left_pixel_coordinate = minx;
223                         clip.clip_window_bottom_pixel_coordinate = miny;
224                         clip.clip_window_width_in_pixels = maxx - minx;
225                         clip.clip_window_height_in_pixels = maxy - miny;
226                 }
227
228                 job->draw_min_x = MIN2(job->draw_min_x, minx);
229                 job->draw_min_y = MIN2(job->draw_min_y, miny);
230                 job->draw_max_x = MAX2(job->draw_max_x, maxx);
231                 job->draw_max_y = MAX2(job->draw_max_y, maxy);
232         }
233
234         if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
235                           VC5_DIRTY_ZSA |
236                           VC5_DIRTY_BLEND |
237                           VC5_DIRTY_COMPILED_FS)) {
238                 cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
239                         config.enable_forward_facing_primitive =
240                                 !(vc5->rasterizer->base.cull_face &
241                                   PIPE_FACE_FRONT);
242                         config.enable_reverse_facing_primitive =
243                                 !(vc5->rasterizer->base.cull_face &
244                                   PIPE_FACE_BACK);
245                         /* This seems backwards, but it's what gets the
246                          * clipflat test to pass.
247                          */
248                         config.clockwise_primitives =
249                                 vc5->rasterizer->base.front_ccw;
250
251                         config.enable_depth_offset =
252                                 vc5->rasterizer->base.offset_tri;
253
254                         config.rasterizer_oversample_mode =
255                                 vc5->rasterizer->base.multisample;
256
257                         config.direct3d_provoking_vertex =
258                                 vc5->rasterizer->base.flatshade_first;
259
260                         config.blend_enable = vc5->blend->rt[0].blend_enable;
261
262                         config.early_z_updates_enable = true;
263                         if (vc5->zsa->base.depth.enabled) {
264                                 config.z_updates_enable =
265                                         vc5->zsa->base.depth.writemask;
266                                 config.early_z_enable =
267                                         vc5->zsa->early_z_enable;
268                                 config.depth_test_function =
269                                         vc5->zsa->base.depth.func;
270                         } else {
271                                 config.depth_test_function = PIPE_FUNC_ALWAYS;
272                         }
273
274                         config.stencil_enable =
275                                 vc5->zsa->base.stencil[0].enabled;
276                 }
277
278         }
279
280         if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
281                 cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
282                         depth.depth_offset_factor =
283                                 vc5->rasterizer->offset_factor;
284                         depth.depth_offset_units =
285                                 vc5->rasterizer->offset_units;
286                 }
287
288                 cl_emit(&job->bcl, POINT_SIZE, point_size) {
289                         point_size.point_size = vc5->rasterizer->point_size;
290                 }
291
292                 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
293                         line_width.line_width = vc5->rasterizer->base.line_width;
294                 }
295         }
296
297         if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
298                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
299                         clip.viewport_half_width_in_1_256th_of_pixel =
300                                 vc5->viewport.scale[0] * 256.0f;
301                         clip.viewport_half_height_in_1_256th_of_pixel =
302                                 vc5->viewport.scale[1] * 256.0f;
303                 }
304
305                 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
306                         clip.viewport_z_offset_zc_to_zs =
307                                 vc5->viewport.translate[2];
308                         clip.viewport_z_scale_zc_to_zs =
309                                 vc5->viewport.scale[2];
310                 }
311                 if (0 /* XXX */) {
312                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
313                         clip.minimum_zw = (vc5->viewport.translate[2] -
314                                            vc5->viewport.scale[2]);
315                         clip.maximum_zw = (vc5->viewport.translate[2] +
316                                            vc5->viewport.scale[2]);
317                 }
318                 }
319
320                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
321                         vp.viewport_centre_x_coordinate =
322                                 vc5->viewport.translate[0];
323                         vp.viewport_centre_y_coordinate =
324                                 vc5->viewport.translate[1];
325                 }
326         }
327
328         if (vc5->dirty & VC5_DIRTY_BLEND) {
329                 struct pipe_blend_state *blend = vc5->blend;
330
331                 cl_emit(&job->bcl, BLEND_CONFIG, config) {
332                         struct pipe_rt_blend_state *rtblend = &blend->rt[0];
333
334                         config.colour_blend_mode = rtblend->rgb_func;
335                         config.colour_blend_dst_factor =
336                                 vc5_factor(rtblend->rgb_dst_factor,
337                                            vc5->blend_dst_alpha_one);
338                         config.colour_blend_src_factor =
339                                 vc5_factor(rtblend->rgb_src_factor,
340                                            vc5->blend_dst_alpha_one);
341
342                         config.alpha_blend_mode = rtblend->alpha_func;
343                         config.alpha_blend_dst_factor =
344                                 vc5_factor(rtblend->alpha_dst_factor,
345                                            vc5->blend_dst_alpha_one);
346                         config.alpha_blend_src_factor =
347                                 vc5_factor(rtblend->alpha_src_factor,
348                                            vc5->blend_dst_alpha_one);
349                 }
350
351                 cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
352                         if (blend->independent_blend_enable) {
353                                 mask.render_target_0_per_colour_component_write_masks =
354                                         (~blend->rt[0].colormask) & 0xf;
355                                 mask.render_target_1_per_colour_component_write_masks =
356                                         (~blend->rt[1].colormask) & 0xf;
357                                 mask.render_target_2_per_colour_component_write_masks =
358                                         (~blend->rt[2].colormask) & 0xf;
359                                 mask.render_target_3_per_colour_component_write_masks =
360                                         (~blend->rt[3].colormask) & 0xf;
361                         } else {
362                                 uint8_t colormask = (~blend->rt[0].colormask) & 0xf;
363                                 mask.render_target_0_per_colour_component_write_masks = colormask;
364                                 mask.render_target_1_per_colour_component_write_masks = colormask;
365                                 mask.render_target_2_per_colour_component_write_masks = colormask;
366                                 mask.render_target_3_per_colour_component_write_masks = colormask;
367                         }
368                 }
369         }
370
371         if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) {
372                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
373                         colour.red_f16 = (vc5->swap_color_rb ?
374                                           vc5->blend_color.hf[2] :
375                                           vc5->blend_color.hf[0]);
376                         colour.green_f16 = vc5->blend_color.hf[1];
377                         colour.blue_f16 = (vc5->swap_color_rb ?
378                                            vc5->blend_color.hf[0] :
379                                            vc5->blend_color.hf[2]);
380                         colour.alpha_f16 = vc5->blend_color.hf[3];
381                 }
382         }
383
384         if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
385                 struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
386                 struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
387
388                 cl_emit(&job->bcl, STENCIL_CONFIG, config) {
389                         config.front_config = true;
390                         config.back_config = !back->enabled;
391
392                         config.stencil_write_mask = front->writemask;
393                         config.stencil_test_mask = front->valuemask;
394
395                         config.stencil_test_function = front->func;
396                         config.stencil_pass_op = front->zpass_op;
397                         config.depth_test_fail_op = front->zfail_op;
398                         config.stencil_test_fail_op = front->fail_op;
399
400                         config.stencil_ref_value = vc5->stencil_ref.ref_value[0];
401                 }
402
403                 if (back->enabled) {
404                         cl_emit(&job->bcl, STENCIL_CONFIG, config) {
405                                 config.front_config = false;
406                                 config.back_config = true;
407
408                                 config.stencil_write_mask = back->writemask;
409                                 config.stencil_test_mask = back->valuemask;
410
411                                 config.stencil_test_function = back->func;
412                                 config.stencil_pass_op = back->zpass_op;
413                                 config.depth_test_fail_op = back->zfail_op;
414                                 config.stencil_test_fail_op = back->fail_op;
415
416                                 config.stencil_ref_value =
417                                         vc5->stencil_ref.ref_value[1];
418                         }
419                 }
420         }
421
422         if (vc5->dirty & VC5_DIRTY_FRAGTEX)
423                 emit_textures(vc5, &vc5->fragtex);
424
425         if (vc5->dirty & VC5_DIRTY_VERTTEX)
426                 emit_textures(vc5, &vc5->verttex);
427
428         if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
429                 /* XXX: Need to handle more than 24 entries. */
430                 cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
431                         flags.varying_offset_v0 = 0;
432
433                         flags.flat_shade_flags_for_varyings_v024 =
434                                 vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff;
435
436                         if (vc5->rasterizer->base.flatshade) {
437                                 flags.flat_shade_flags_for_varyings_v024 |=
438                                         vc5->prog.fs->prog_data.fs->shade_model_flags[0] & 0xfffff;
439                         }
440                 }
441         }
442
443         if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
444                 struct vc5_streamout_stateobj *so = &vc5->streamout;
445
446                 if (so->num_targets) {
447                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
448                                 tfe.number_of_32_bit_output_buffer_address_following =
449                                         so->num_targets;
450                                 tfe.number_of_16_bit_output_data_specs_following =
451                                         vc5->prog.bind_vs->num_tf_specs;
452                         };
453
454                         for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
455                                 cl_emit_prepacked(&job->bcl,
456                                                   &vc5->prog.bind_vs->tf_specs[i]);
457                         }
458
459                         for (int i = 0; i < so->num_targets; i++) {
460                                 const struct pipe_stream_output_target *target =
461                                         so->targets[i];
462                                 struct vc5_resource *rsc =
463                                         vc5_resource(target->buffer);
464
465                                 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
466                                         output.address =
467                                                 cl_address(rsc->bo,
468                                                            target->buffer_offset);
469                                 };
470
471                                 vc5_job_add_write_resource(vc5->job,
472                                                            target->buffer);
473                                 /* XXX: buffer_size? */
474                         }
475                 } else {
476                         /* XXX? */
477                 }
478         }
479 }