OSDN Git Service

53f860d2d7ddc9b6b38477c0c52d555b69b8c676
[android-x86/external-mesa.git] / src / gallium / drivers / radeonsi / si_state.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Christian König <christian.koenig@amd.com>
25  */
26
27 #include "si_pipe.h"
28 #include "sid.h"
29 #include "radeon/r600_cs.h"
30 #include "radeon/r600_query.h"
31
32 #include "util/u_dual_blend.h"
33 #include "util/u_format.h"
34 #include "util/u_format_s3tc.h"
35 #include "util/u_memory.h"
36 #include "util/u_resource.h"
37
38 /* Initialize an external atom (owned by ../radeon). */
39 static void
40 si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
41                       struct r600_atom **list_elem)
42 {
43         atom->id = list_elem - sctx->atoms.array + 1;
44         *list_elem = atom;
45 }
46
47 /* Initialize an atom owned by radeonsi.  */
48 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
49                   struct r600_atom **list_elem,
50                   void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
51 {
52         atom->emit = (void*)emit_func;
53         atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
54         *list_elem = atom;
55 }
56
57 static unsigned si_map_swizzle(unsigned swizzle)
58 {
59         switch (swizzle) {
60         case PIPE_SWIZZLE_Y:
61                 return V_008F0C_SQ_SEL_Y;
62         case PIPE_SWIZZLE_Z:
63                 return V_008F0C_SQ_SEL_Z;
64         case PIPE_SWIZZLE_W:
65                 return V_008F0C_SQ_SEL_W;
66         case PIPE_SWIZZLE_0:
67                 return V_008F0C_SQ_SEL_0;
68         case PIPE_SWIZZLE_1:
69                 return V_008F0C_SQ_SEL_1;
70         default: /* PIPE_SWIZZLE_X */
71                 return V_008F0C_SQ_SEL_X;
72         }
73 }
74
75 static uint32_t S_FIXED(float value, uint32_t frac_bits)
76 {
77         return value * (1 << frac_bits);
78 }
79
80 /* 12.4 fixed-point */
81 static unsigned si_pack_float_12p4(float x)
82 {
83         return x <= 0    ? 0 :
84                x >= 4096 ? 0xffff : x * 16;
85 }
86
87 /*
88  * Inferred framebuffer and blender state.
89  *
90  * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
91  * if there is not enough PS outputs.
92  */
93 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
94 {
95         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
96         struct si_state_blend *blend = sctx->queued.named.blend;
97         uint32_t cb_target_mask, i;
98
99         /* CB_COLORn_INFO.FORMAT=INVALID disables empty colorbuffer slots. */
100         if (blend)
101                 cb_target_mask = blend->cb_target_mask;
102         else
103                 cb_target_mask = 0xffffffff;
104
105         /* Avoid a hang that happens when dual source blending is enabled
106          * but there is not enough color outputs. This is undefined behavior,
107          * so disable color writes completely.
108          *
109          * Reproducible with Unigine Heaven 4.0 and drirc missing.
110          */
111         if (blend && blend->dual_src_blend &&
112             sctx->ps_shader.cso &&
113             (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
114                 cb_target_mask = 0;
115
116         radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
117
118         /* STONEY-specific register settings. */
119         if (sctx->b.family == CHIP_STONEY) {
120                 unsigned spi_shader_col_format =
121                         sctx->ps_shader.cso ?
122                         sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0;
123                 unsigned sx_ps_downconvert = 0;
124                 unsigned sx_blend_opt_epsilon = 0;
125                 unsigned sx_blend_opt_control = 0;
126
127                 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
128                         struct r600_surface *surf =
129                                 (struct r600_surface*)sctx->framebuffer.state.cbufs[i];
130                         unsigned format, swap, spi_format, colormask;
131                         bool has_alpha, has_rgb;
132
133                         if (!surf)
134                                 continue;
135
136                         format = G_028C70_FORMAT(surf->cb_color_info);
137                         swap = G_028C70_COMP_SWAP(surf->cb_color_info);
138                         spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
139                         colormask = (cb_target_mask >> (i * 4)) & 0xf;
140
141                         /* Set if RGB and A are present. */
142                         has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
143
144                         if (format == V_028C70_COLOR_8 ||
145                             format == V_028C70_COLOR_16 ||
146                             format == V_028C70_COLOR_32)
147                                 has_rgb = !has_alpha;
148                         else
149                                 has_rgb = true;
150
151                         /* Check the colormask and export format. */
152                         if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
153                                 has_rgb = false;
154                         if (!(colormask & PIPE_MASK_A))
155                                 has_alpha = false;
156
157                         if (spi_format == V_028714_SPI_SHADER_ZERO) {
158                                 has_rgb = false;
159                                 has_alpha = false;
160                         }
161
162                         /* Disable value checking for disabled channels. */
163                         if (!has_rgb)
164                                 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
165                         if (!has_alpha)
166                                 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
167
168                         /* Enable down-conversion for 32bpp and smaller formats. */
169                         switch (format) {
170                         case V_028C70_COLOR_8:
171                         case V_028C70_COLOR_8_8:
172                         case V_028C70_COLOR_8_8_8_8:
173                                 /* For 1 and 2-channel formats, use the superset thereof. */
174                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
175                                     spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
176                                     spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
177                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
178                                         sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
179                                 }
180                                 break;
181
182                         case V_028C70_COLOR_5_6_5:
183                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
184                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
185                                         sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
186                                 }
187                                 break;
188
189                         case V_028C70_COLOR_1_5_5_5:
190                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
191                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
192                                         sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
193                                 }
194                                 break;
195
196                         case V_028C70_COLOR_4_4_4_4:
197                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
198                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
199                                         sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
200                                 }
201                                 break;
202
203                         case V_028C70_COLOR_32:
204                                 if (swap == V_0280A0_SWAP_STD &&
205                                     spi_format == V_028714_SPI_SHADER_32_R)
206                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
207                                 else if (swap == V_0280A0_SWAP_ALT_REV &&
208                                          spi_format == V_028714_SPI_SHADER_32_AR)
209                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
210                                 break;
211
212                         case V_028C70_COLOR_16:
213                         case V_028C70_COLOR_16_16:
214                                 /* For 1-channel formats, use the superset thereof. */
215                                 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
216                                     spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
217                                     spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
218                                     spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
219                                         if (swap == V_0280A0_SWAP_STD ||
220                                             swap == V_0280A0_SWAP_STD_REV)
221                                                 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
222                                         else
223                                                 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
224                                 }
225                                 break;
226
227                         case V_028C70_COLOR_10_11_11:
228                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
229                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
230                                         sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
231                                 }
232                                 break;
233
234                         case V_028C70_COLOR_2_10_10_10:
235                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
236                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
237                                         sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
238                                 }
239                                 break;
240                         }
241                 }
242
243                 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
244                         sx_ps_downconvert = 0;
245                         sx_blend_opt_epsilon = 0;
246                         sx_blend_opt_control = 0;
247                 }
248
249                 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
250                 radeon_emit(cs, sx_ps_downconvert);     /* R_028754_SX_PS_DOWNCONVERT */
251                 radeon_emit(cs, sx_blend_opt_epsilon);  /* R_028758_SX_BLEND_OPT_EPSILON */
252                 radeon_emit(cs, sx_blend_opt_control);  /* R_02875C_SX_BLEND_OPT_CONTROL */
253         }
254 }
255
256 /*
257  * Blender functions
258  */
259
260 static uint32_t si_translate_blend_function(int blend_func)
261 {
262         switch (blend_func) {
263         case PIPE_BLEND_ADD:
264                 return V_028780_COMB_DST_PLUS_SRC;
265         case PIPE_BLEND_SUBTRACT:
266                 return V_028780_COMB_SRC_MINUS_DST;
267         case PIPE_BLEND_REVERSE_SUBTRACT:
268                 return V_028780_COMB_DST_MINUS_SRC;
269         case PIPE_BLEND_MIN:
270                 return V_028780_COMB_MIN_DST_SRC;
271         case PIPE_BLEND_MAX:
272                 return V_028780_COMB_MAX_DST_SRC;
273         default:
274                 R600_ERR("Unknown blend function %d\n", blend_func);
275                 assert(0);
276                 break;
277         }
278         return 0;
279 }
280
281 static uint32_t si_translate_blend_factor(int blend_fact)
282 {
283         switch (blend_fact) {
284         case PIPE_BLENDFACTOR_ONE:
285                 return V_028780_BLEND_ONE;
286         case PIPE_BLENDFACTOR_SRC_COLOR:
287                 return V_028780_BLEND_SRC_COLOR;
288         case PIPE_BLENDFACTOR_SRC_ALPHA:
289                 return V_028780_BLEND_SRC_ALPHA;
290         case PIPE_BLENDFACTOR_DST_ALPHA:
291                 return V_028780_BLEND_DST_ALPHA;
292         case PIPE_BLENDFACTOR_DST_COLOR:
293                 return V_028780_BLEND_DST_COLOR;
294         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
295                 return V_028780_BLEND_SRC_ALPHA_SATURATE;
296         case PIPE_BLENDFACTOR_CONST_COLOR:
297                 return V_028780_BLEND_CONSTANT_COLOR;
298         case PIPE_BLENDFACTOR_CONST_ALPHA:
299                 return V_028780_BLEND_CONSTANT_ALPHA;
300         case PIPE_BLENDFACTOR_ZERO:
301                 return V_028780_BLEND_ZERO;
302         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
303                 return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
304         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
305                 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
306         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
307                 return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
308         case PIPE_BLENDFACTOR_INV_DST_COLOR:
309                 return V_028780_BLEND_ONE_MINUS_DST_COLOR;
310         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
311                 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
312         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
313                 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
314         case PIPE_BLENDFACTOR_SRC1_COLOR:
315                 return V_028780_BLEND_SRC1_COLOR;
316         case PIPE_BLENDFACTOR_SRC1_ALPHA:
317                 return V_028780_BLEND_SRC1_ALPHA;
318         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
319                 return V_028780_BLEND_INV_SRC1_COLOR;
320         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
321                 return V_028780_BLEND_INV_SRC1_ALPHA;
322         default:
323                 R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
324                 assert(0);
325                 break;
326         }
327         return 0;
328 }
329
330 static uint32_t si_translate_blend_opt_function(int blend_func)
331 {
332         switch (blend_func) {
333         case PIPE_BLEND_ADD:
334                 return V_028760_OPT_COMB_ADD;
335         case PIPE_BLEND_SUBTRACT:
336                 return V_028760_OPT_COMB_SUBTRACT;
337         case PIPE_BLEND_REVERSE_SUBTRACT:
338                 return V_028760_OPT_COMB_REVSUBTRACT;
339         case PIPE_BLEND_MIN:
340                 return V_028760_OPT_COMB_MIN;
341         case PIPE_BLEND_MAX:
342                 return V_028760_OPT_COMB_MAX;
343         default:
344                 return V_028760_OPT_COMB_BLEND_DISABLED;
345         }
346 }
347
348 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
349 {
350         switch (blend_fact) {
351         case PIPE_BLENDFACTOR_ZERO:
352                 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
353         case PIPE_BLENDFACTOR_ONE:
354                 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
355         case PIPE_BLENDFACTOR_SRC_COLOR:
356                 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
357                                 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
358         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
359                 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
360                                 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
361         case PIPE_BLENDFACTOR_SRC_ALPHA:
362                 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
363         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
364                 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
365         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
366                 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
367                                 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
368         default:
369                 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
370         }
371 }
372
373 /**
374  * Get rid of DST in the blend factors by commuting the operands:
375  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
376  */
377 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
378                                 unsigned *dst_factor, unsigned expected_dst,
379                                 unsigned replacement_src)
380 {
381         if (*src_factor == expected_dst &&
382             *dst_factor == PIPE_BLENDFACTOR_ZERO) {
383                 *src_factor = PIPE_BLENDFACTOR_ZERO;
384                 *dst_factor = replacement_src;
385
386                 /* Commuting the operands requires reversing subtractions. */
387                 if (*func == PIPE_BLEND_SUBTRACT)
388                         *func = PIPE_BLEND_REVERSE_SUBTRACT;
389                 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
390                         *func = PIPE_BLEND_SUBTRACT;
391         }
392 }
393
394 static bool si_blend_factor_uses_dst(unsigned factor)
395 {
396         return factor == PIPE_BLENDFACTOR_DST_COLOR ||
397                 factor == PIPE_BLENDFACTOR_DST_ALPHA ||
398                 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
399                 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
400                 factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
401 }
402
403 static void *si_create_blend_state_mode(struct pipe_context *ctx,
404                                         const struct pipe_blend_state *state,
405                                         unsigned mode)
406 {
407         struct si_context *sctx = (struct si_context*)ctx;
408         struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
409         struct si_pm4_state *pm4 = &blend->pm4;
410         uint32_t sx_mrt_blend_opt[8] = {0};
411         uint32_t color_control = 0;
412
413         if (!blend)
414                 return NULL;
415
416         blend->alpha_to_coverage = state->alpha_to_coverage;
417         blend->alpha_to_one = state->alpha_to_one;
418         blend->dual_src_blend = util_blend_state_is_dual(state, 0);
419
420         if (state->logicop_enable) {
421                 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
422         } else {
423                 color_control |= S_028808_ROP3(0xcc);
424         }
425
426         si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
427                        S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
428                        S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
429                        S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
430                        S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
431                        S_028B70_ALPHA_TO_MASK_OFFSET3(2));
432
433         if (state->alpha_to_coverage)
434                 blend->need_src_alpha_4bit |= 0xf;
435
436         blend->cb_target_mask = 0;
437         for (int i = 0; i < 8; i++) {
438                 /* state->rt entries > 0 only written if independent blending */
439                 const int j = state->independent_blend_enable ? i : 0;
440
441                 unsigned eqRGB = state->rt[j].rgb_func;
442                 unsigned srcRGB = state->rt[j].rgb_src_factor;
443                 unsigned dstRGB = state->rt[j].rgb_dst_factor;
444                 unsigned eqA = state->rt[j].alpha_func;
445                 unsigned srcA = state->rt[j].alpha_src_factor;
446                 unsigned dstA = state->rt[j].alpha_dst_factor;
447
448                 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
449                 unsigned blend_cntl = 0;
450
451                 sx_mrt_blend_opt[i] =
452                         S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
453                         S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
454
455                 /* Only set dual source blending for MRT0 to avoid a hang. */
456                 if (i >= 1 && blend->dual_src_blend) {
457                         /* Vulkan does this for dual source blending. */
458                         if (i == 1)
459                                 blend_cntl |= S_028780_ENABLE(1);
460
461                         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
462                         continue;
463                 }
464
465                 /* Only addition and subtraction equations are supported with
466                  * dual source blending.
467                  */
468                 if (blend->dual_src_blend &&
469                     (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
470                      eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
471                         assert(!"Unsupported equation for dual source blending");
472                         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
473                         continue;
474                 }
475
476                 /* cb_render_state will disable unused ones */
477                 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
478
479                 if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
480                         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
481                         continue;
482                 }
483
484                 /* Blending optimizations for Stoney.
485                  * These transformations don't change the behavior.
486                  *
487                  * First, get rid of DST in the blend factors:
488                  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
489                  */
490                 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
491                                     PIPE_BLENDFACTOR_DST_COLOR,
492                                     PIPE_BLENDFACTOR_SRC_COLOR);
493                 si_blend_remove_dst(&eqA, &srcA, &dstA,
494                                     PIPE_BLENDFACTOR_DST_COLOR,
495                                     PIPE_BLENDFACTOR_SRC_COLOR);
496                 si_blend_remove_dst(&eqA, &srcA, &dstA,
497                                     PIPE_BLENDFACTOR_DST_ALPHA,
498                                     PIPE_BLENDFACTOR_SRC_ALPHA);
499
500                 /* Look up the ideal settings from tables. */
501                 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
502                 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
503                 srcA_opt = si_translate_blend_opt_factor(srcA, true);
504                 dstA_opt = si_translate_blend_opt_factor(dstA, true);
505
506                 /* Handle interdependencies. */
507                 if (si_blend_factor_uses_dst(srcRGB))
508                         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
509                 if (si_blend_factor_uses_dst(srcA))
510                         dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
511
512                 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
513                     (dstRGB == PIPE_BLENDFACTOR_ZERO ||
514                      dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
515                      dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
516                         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
517
518                 /* Set the final value. */
519                 sx_mrt_blend_opt[i] =
520                         S_028760_COLOR_SRC_OPT(srcRGB_opt) |
521                         S_028760_COLOR_DST_OPT(dstRGB_opt) |
522                         S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
523                         S_028760_ALPHA_SRC_OPT(srcA_opt) |
524                         S_028760_ALPHA_DST_OPT(dstA_opt) |
525                         S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
526
527                 /* Set blend state. */
528                 blend_cntl |= S_028780_ENABLE(1);
529                 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
530                 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
531                 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
532
533                 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
534                         blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
535                         blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
536                         blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
537                         blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
538                 }
539                 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
540
541                 blend->blend_enable_4bit |= 0xfu << (i * 4);
542
543                 /* This is only important for formats without alpha. */
544                 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
545                     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
546                     srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
547                     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
548                     srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
549                     dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
550                         blend->need_src_alpha_4bit |= 0xfu << (i * 4);
551         }
552
553         if (blend->cb_target_mask) {
554                 color_control |= S_028808_MODE(mode);
555         } else {
556                 color_control |= S_028808_MODE(V_028808_CB_DISABLE);
557         }
558
559         if (sctx->b.family == CHIP_STONEY) {
560                 for (int i = 0; i < 8; i++)
561                         si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
562                                        sx_mrt_blend_opt[i]);
563
564                 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
565                 if (blend->dual_src_blend || state->logicop_enable ||
566                     mode == V_028808_CB_RESOLVE)
567                         color_control |= S_028808_DISABLE_DUAL_QUAD(1);
568         }
569
570         si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
571         return blend;
572 }
573
574 static void *si_create_blend_state(struct pipe_context *ctx,
575                                    const struct pipe_blend_state *state)
576 {
577         return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
578 }
579
580 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
581 {
582         struct si_context *sctx = (struct si_context *)ctx;
583         si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
584         si_mark_atom_dirty(sctx, &sctx->cb_render_state);
585         sctx->do_update_shaders = true;
586 }
587
588 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
589 {
590         struct si_context *sctx = (struct si_context *)ctx;
591         si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
592 }
593
594 static void si_set_blend_color(struct pipe_context *ctx,
595                                const struct pipe_blend_color *state)
596 {
597         struct si_context *sctx = (struct si_context *)ctx;
598
599         if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
600                 return;
601
602         sctx->blend_color.state = *state;
603         si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
604 }
605
606 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
607 {
608         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
609
610         radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
611         radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
612 }
613
614 /*
615  * Clipping
616  */
617
618 static void si_set_clip_state(struct pipe_context *ctx,
619                               const struct pipe_clip_state *state)
620 {
621         struct si_context *sctx = (struct si_context *)ctx;
622         struct pipe_constant_buffer cb;
623
624         if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
625                 return;
626
627         sctx->clip_state.state = *state;
628         si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
629
630         cb.buffer = NULL;
631         cb.user_buffer = state->ucp;
632         cb.buffer_offset = 0;
633         cb.buffer_size = 4*4*8;
634         si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
635         pipe_resource_reference(&cb.buffer, NULL);
636 }
637
638 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
639 {
640         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
641
642         radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
643         radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
644 }
645
646 #define SIX_BITS 0x3F
647
648 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
649 {
650         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
651         struct tgsi_shader_info *info = si_get_vs_info(sctx);
652         unsigned window_space =
653            info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
654         unsigned clipdist_mask =
655                 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
656         unsigned total_mask = clipdist_mask | (info->culldist_writemask << info->num_written_clipdistance);
657
658         radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
659                 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
660                 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
661                 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
662                 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
663                 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
664                 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
665                 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
666                                             info->writes_edgeflag ||
667                                             info->writes_layer ||
668                                              info->writes_viewport_index) |
669                 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
670                 (sctx->queued.named.rasterizer->clip_plane_enable &
671                  clipdist_mask) | (info->culldist_writemask << 8));
672         radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
673                 sctx->queued.named.rasterizer->pa_cl_clip_cntl |
674                 (clipdist_mask ? 0 :
675                  sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
676                 S_028810_CLIP_DISABLE(window_space));
677
678         /* reuse needs to be set off if we write oViewport */
679         radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
680                                S_028AB4_REUSE_OFF(info->writes_viewport_index));
681 }
682
683 /*
684  * inferred state between framebuffer and rasterizer
685  */
686 static void si_update_poly_offset_state(struct si_context *sctx)
687 {
688         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
689
690         if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
691                 return;
692
693         /* Use the user format, not db_render_format, so that the polygon
694          * offset behaves as expected by applications.
695          */
696         switch (sctx->framebuffer.state.zsbuf->texture->format) {
697         case PIPE_FORMAT_Z16_UNORM:
698                 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
699                 break;
700         default: /* 24-bit */
701                 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
702                 break;
703         case PIPE_FORMAT_Z32_FLOAT:
704         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
705                 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
706                 break;
707         }
708 }
709
710 /*
711  * Rasterizer
712  */
713
714 static uint32_t si_translate_fill(uint32_t func)
715 {
716         switch(func) {
717         case PIPE_POLYGON_MODE_FILL:
718                 return V_028814_X_DRAW_TRIANGLES;
719         case PIPE_POLYGON_MODE_LINE:
720                 return V_028814_X_DRAW_LINES;
721         case PIPE_POLYGON_MODE_POINT:
722                 return V_028814_X_DRAW_POINTS;
723         default:
724                 assert(0);
725                 return V_028814_X_DRAW_POINTS;
726         }
727 }
728
729 static void *si_create_rs_state(struct pipe_context *ctx,
730                                 const struct pipe_rasterizer_state *state)
731 {
732         struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
733         struct si_pm4_state *pm4 = &rs->pm4;
734         unsigned tmp, i;
735         float psize_min, psize_max;
736
737         if (!rs) {
738                 return NULL;
739         }
740
741         rs->scissor_enable = state->scissor;
742         rs->clip_halfz = state->clip_halfz;
743         rs->two_side = state->light_twoside;
744         rs->multisample_enable = state->multisample;
745         rs->force_persample_interp = state->force_persample_interp;
746         rs->clip_plane_enable = state->clip_plane_enable;
747         rs->line_stipple_enable = state->line_stipple_enable;
748         rs->poly_stipple_enable = state->poly_stipple_enable;
749         rs->line_smooth = state->line_smooth;
750         rs->poly_smooth = state->poly_smooth;
751         rs->uses_poly_offset = state->offset_point || state->offset_line ||
752                                state->offset_tri;
753         rs->clamp_fragment_color = state->clamp_fragment_color;
754         rs->flatshade = state->flatshade;
755         rs->sprite_coord_enable = state->sprite_coord_enable;
756         rs->rasterizer_discard = state->rasterizer_discard;
757         rs->pa_sc_line_stipple = state->line_stipple_enable ?
758                                 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
759                                 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
760         rs->pa_cl_clip_cntl =
761                 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
762                 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
763                 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
764                 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
765                 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
766
767         si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
768                 S_0286D4_FLAT_SHADE_ENA(1) |
769                 S_0286D4_PNT_SPRITE_ENA(1) |
770                 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
771                 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
772                 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
773                 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
774                 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
775
776         /* point size 12.4 fixed point */
777         tmp = (unsigned)(state->point_size * 8.0);
778         si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
779
780         if (state->point_size_per_vertex) {
781                 psize_min = util_get_min_point_size(state);
782                 psize_max = 8192;
783         } else {
784                 /* Force the point size to be as if the vertex output was disabled. */
785                 psize_min = state->point_size;
786                 psize_max = state->point_size;
787         }
788         /* Divide by two, because 0.5 = 1 pixel. */
789         si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
790                         S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
791                         S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
792
793         tmp = (unsigned)state->line_width * 8;
794         si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
795         si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
796                        S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
797                        S_028A48_MSAA_ENABLE(state->multisample ||
798                                             state->poly_smooth ||
799                                             state->line_smooth) |
800                        S_028A48_VPORT_SCISSOR_ENABLE(1));
801
802         si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
803                        S_028BE4_PIX_CENTER(state->half_pixel_center) |
804                        S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
805
806         si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
807         si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
808                 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
809                 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
810                 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
811                 S_028814_FACE(!state->front_ccw) |
812                 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
813                 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
814                 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
815                 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
816                                    state->fill_back != PIPE_POLYGON_MODE_FILL) |
817                 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
818                 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
819         si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 +
820                        SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color);
821
822         /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
823         for (i = 0; i < 3; i++) {
824                 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
825                 float offset_units = state->offset_units;
826                 float offset_scale = state->offset_scale * 16.0f;
827                 uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
828
829                 if (!state->offset_units_unscaled) {
830                         switch (i) {
831                         case 0: /* 16-bit zbuffer */
832                                 offset_units *= 4.0f;
833                                 pa_su_poly_offset_db_fmt_cntl =
834                                         S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
835                                 break;
836                         case 1: /* 24-bit zbuffer */
837                                 offset_units *= 2.0f;
838                                 pa_su_poly_offset_db_fmt_cntl =
839                                         S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
840                                 break;
841                         case 2: /* 32-bit zbuffer */
842                                 offset_units *= 1.0f;
843                                 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
844                                                                 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
845                                 break;
846                         }
847                 }
848
849                 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
850                                fui(offset_scale));
851                 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
852                                fui(offset_units));
853                 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
854                                fui(offset_scale));
855                 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
856                                fui(offset_units));
857                 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
858                                pa_su_poly_offset_db_fmt_cntl);
859         }
860
861         return rs;
862 }
863
864 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
865 {
866         struct si_context *sctx = (struct si_context *)ctx;
867         struct si_state_rasterizer *old_rs =
868                 (struct si_state_rasterizer*)sctx->queued.named.rasterizer;
869         struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
870
871         if (!state)
872                 return;
873
874         if (sctx->framebuffer.nr_samples > 1 &&
875             (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) {
876                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
877
878                 if (sctx->b.family >= CHIP_POLARIS10)
879                         si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
880         }
881
882         r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
883
884         si_pm4_bind_state(sctx, rasterizer, rs);
885         si_update_poly_offset_state(sctx);
886
887         si_mark_atom_dirty(sctx, &sctx->clip_regs);
888         sctx->do_update_shaders = true;
889 }
890
891 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
892 {
893         struct si_context *sctx = (struct si_context *)ctx;
894
895         if (sctx->queued.named.rasterizer == state)
896                 si_pm4_bind_state(sctx, poly_offset, NULL);
897         si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
898 }
899
900 /*
901  * infeered state between dsa and stencil ref
902  */
903 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
904 {
905         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
906         struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
907         struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
908
909         radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
910         radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
911                         S_028430_STENCILMASK(dsa->valuemask[0]) |
912                         S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
913                         S_028430_STENCILOPVAL(1));
914         radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
915                         S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
916                         S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
917                         S_028434_STENCILOPVAL_BF(1));
918 }
919
920 static void si_set_stencil_ref(struct pipe_context *ctx,
921                                const struct pipe_stencil_ref *state)
922 {
923         struct si_context *sctx = (struct si_context *)ctx;
924
925         if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
926                 return;
927
928         sctx->stencil_ref.state = *state;
929         si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
930 }
931
932
933 /*
934  * DSA
935  */
936
937 static uint32_t si_translate_stencil_op(int s_op)
938 {
939         switch (s_op) {
940         case PIPE_STENCIL_OP_KEEP:
941                 return V_02842C_STENCIL_KEEP;
942         case PIPE_STENCIL_OP_ZERO:
943                 return V_02842C_STENCIL_ZERO;
944         case PIPE_STENCIL_OP_REPLACE:
945                 return V_02842C_STENCIL_REPLACE_TEST;
946         case PIPE_STENCIL_OP_INCR:
947                 return V_02842C_STENCIL_ADD_CLAMP;
948         case PIPE_STENCIL_OP_DECR:
949                 return V_02842C_STENCIL_SUB_CLAMP;
950         case PIPE_STENCIL_OP_INCR_WRAP:
951                 return V_02842C_STENCIL_ADD_WRAP;
952         case PIPE_STENCIL_OP_DECR_WRAP:
953                 return V_02842C_STENCIL_SUB_WRAP;
954         case PIPE_STENCIL_OP_INVERT:
955                 return V_02842C_STENCIL_INVERT;
956         default:
957                 R600_ERR("Unknown stencil op %d", s_op);
958                 assert(0);
959                 break;
960         }
961         return 0;
962 }
963
964 static void *si_create_dsa_state(struct pipe_context *ctx,
965                                  const struct pipe_depth_stencil_alpha_state *state)
966 {
967         struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
968         struct si_pm4_state *pm4 = &dsa->pm4;
969         unsigned db_depth_control;
970         uint32_t db_stencil_control = 0;
971
972         if (!dsa) {
973                 return NULL;
974         }
975
976         dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
977         dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
978         dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
979         dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
980
981         db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
982                 S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
983                 S_028800_ZFUNC(state->depth.func) |
984                 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
985
986         /* stencil */
987         if (state->stencil[0].enabled) {
988                 db_depth_control |= S_028800_STENCIL_ENABLE(1);
989                 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
990                 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
991                 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
992                 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
993
994                 if (state->stencil[1].enabled) {
995                         db_depth_control |= S_028800_BACKFACE_ENABLE(1);
996                         db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
997                         db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
998                         db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
999                         db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
1000                 }
1001         }
1002
1003         /* alpha */
1004         if (state->alpha.enabled) {
1005                 dsa->alpha_func = state->alpha.func;
1006
1007                 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
1008                                SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
1009         } else {
1010                 dsa->alpha_func = PIPE_FUNC_ALWAYS;
1011         }
1012
1013         si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
1014         si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
1015         if (state->depth.bounds_test) {
1016                 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
1017                 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
1018         }
1019
1020         return dsa;
1021 }
1022
1023 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
1024 {
1025         struct si_context *sctx = (struct si_context *)ctx;
1026         struct si_state_dsa *dsa = state;
1027
1028         if (!state)
1029                 return;
1030
1031         si_pm4_bind_state(sctx, dsa, dsa);
1032
1033         if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
1034                    sizeof(struct si_dsa_stencil_ref_part)) != 0) {
1035                 sctx->stencil_ref.dsa_part = dsa->stencil_ref;
1036                 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
1037         }
1038         sctx->do_update_shaders = true;
1039 }
1040
1041 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1042 {
1043         struct si_context *sctx = (struct si_context *)ctx;
1044         si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
1045 }
1046
1047 static void *si_create_db_flush_dsa(struct si_context *sctx)
1048 {
1049         struct pipe_depth_stencil_alpha_state dsa = {};
1050
1051         return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
1052 }
1053
1054 /* DB RENDER STATE */
1055
1056 static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
1057 {
1058         struct si_context *sctx = (struct si_context*)ctx;
1059
1060         /* Pipeline stat & streamout queries. */
1061         if (enable) {
1062                 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
1063                 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
1064         } else {
1065                 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
1066                 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
1067         }
1068
1069         /* Occlusion queries. */
1070         if (sctx->occlusion_queries_disabled != !enable) {
1071                 sctx->occlusion_queries_disabled = !enable;
1072                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
1073         }
1074 }
1075
1076 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
1077 {
1078         struct si_context *sctx = (struct si_context*)ctx;
1079
1080         si_mark_atom_dirty(sctx, &sctx->db_render_state);
1081 }
1082
1083 static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
1084 {
1085         struct si_context *sctx = (struct si_context*)ctx;
1086
1087         st->saved_compute = sctx->cs_shader_state.program;
1088
1089         si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
1090         si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
1091 }
1092
1093 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
1094 {
1095         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1096         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1097         unsigned db_shader_control;
1098
1099         radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
1100
1101         /* DB_RENDER_CONTROL */
1102         if (sctx->dbcb_depth_copy_enabled ||
1103             sctx->dbcb_stencil_copy_enabled) {
1104                 radeon_emit(cs,
1105                             S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
1106                             S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
1107                             S_028000_COPY_CENTROID(1) |
1108                             S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
1109         } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
1110                 radeon_emit(cs,
1111                             S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
1112                             S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
1113         } else {
1114                 radeon_emit(cs,
1115                             S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
1116                             S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
1117         }
1118
1119         /* DB_COUNT_CONTROL (occlusion queries) */
1120         if (sctx->b.num_occlusion_queries > 0 &&
1121             !sctx->occlusion_queries_disabled) {
1122                 bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
1123
1124                 if (sctx->b.chip_class >= CIK) {
1125                         radeon_emit(cs,
1126                                     S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1127                                     S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
1128                                     S_028004_ZPASS_ENABLE(1) |
1129                                     S_028004_SLICE_EVEN_ENABLE(1) |
1130                                     S_028004_SLICE_ODD_ENABLE(1));
1131                 } else {
1132                         radeon_emit(cs,
1133                                     S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1134                                     S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
1135                 }
1136         } else {
1137                 /* Disable occlusion queries. */
1138                 if (sctx->b.chip_class >= CIK) {
1139                         radeon_emit(cs, 0);
1140                 } else {
1141                         radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
1142                 }
1143         }
1144
1145         /* DB_RENDER_OVERRIDE2 */
1146         radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
1147                 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
1148                 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
1149                 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
1150
1151         db_shader_control = sctx->ps_db_shader_control;
1152
1153         /* Bug workaround for smoothing (overrasterization) on SI. */
1154         if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
1155                 db_shader_control &= C_02880C_Z_ORDER;
1156                 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
1157         }
1158
1159         /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
1160         if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
1161                 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
1162
1163         if (sctx->b.family == CHIP_STONEY &&
1164             sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
1165                 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
1166
1167         radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
1168                                db_shader_control);
1169 }
1170
1171 /*
1172  * format translation
1173  */
1174 static uint32_t si_translate_colorformat(enum pipe_format format)
1175 {
1176         const struct util_format_description *desc = util_format_description(format);
1177
1178 #define HAS_SIZE(x,y,z,w) \
1179         (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
1180          desc->channel[2].size == (z) && desc->channel[3].size == (w))
1181
1182         if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1183                 return V_028C70_COLOR_10_11_11;
1184
1185         if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1186                 return V_028C70_COLOR_INVALID;
1187
1188         /* hw cannot support mixed formats (except depth/stencil, since
1189          * stencil is not written to). */
1190         if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1191                 return V_028C70_COLOR_INVALID;
1192
1193         switch (desc->nr_channels) {
1194         case 1:
1195                 switch (desc->channel[0].size) {
1196                 case 8:
1197                         return V_028C70_COLOR_8;
1198                 case 16:
1199                         return V_028C70_COLOR_16;
1200                 case 32:
1201                         return V_028C70_COLOR_32;
1202                 }
1203                 break;
1204         case 2:
1205                 if (desc->channel[0].size == desc->channel[1].size) {
1206                         switch (desc->channel[0].size) {
1207                         case 8:
1208                                 return V_028C70_COLOR_8_8;
1209                         case 16:
1210                                 return V_028C70_COLOR_16_16;
1211                         case 32:
1212                                 return V_028C70_COLOR_32_32;
1213                         }
1214                 } else if (HAS_SIZE(8,24,0,0)) {
1215                         return V_028C70_COLOR_24_8;
1216                 } else if (HAS_SIZE(24,8,0,0)) {
1217                         return V_028C70_COLOR_8_24;
1218                 }
1219                 break;
1220         case 3:
1221                 if (HAS_SIZE(5,6,5,0)) {
1222                         return V_028C70_COLOR_5_6_5;
1223                 } else if (HAS_SIZE(32,8,24,0)) {
1224                         return V_028C70_COLOR_X24_8_32_FLOAT;
1225                 }
1226                 break;
1227         case 4:
1228                 if (desc->channel[0].size == desc->channel[1].size &&
1229                     desc->channel[0].size == desc->channel[2].size &&
1230                     desc->channel[0].size == desc->channel[3].size) {
1231                         switch (desc->channel[0].size) {
1232                         case 4:
1233                                 return V_028C70_COLOR_4_4_4_4;
1234                         case 8:
1235                                 return V_028C70_COLOR_8_8_8_8;
1236                         case 16:
1237                                 return V_028C70_COLOR_16_16_16_16;
1238                         case 32:
1239                                 return V_028C70_COLOR_32_32_32_32;
1240                         }
1241                 } else if (HAS_SIZE(5,5,5,1)) {
1242                         return V_028C70_COLOR_1_5_5_5;
1243                 } else if (HAS_SIZE(10,10,10,2)) {
1244                         return V_028C70_COLOR_2_10_10_10;
1245                 }
1246                 break;
1247         }
1248         return V_028C70_COLOR_INVALID;
1249 }
1250
1251 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1252 {
1253         if (SI_BIG_ENDIAN) {
1254                 switch(colorformat) {
1255                 /* 8-bit buffers. */
1256                 case V_028C70_COLOR_8:
1257                         return V_028C70_ENDIAN_NONE;
1258
1259                 /* 16-bit buffers. */
1260                 case V_028C70_COLOR_5_6_5:
1261                 case V_028C70_COLOR_1_5_5_5:
1262                 case V_028C70_COLOR_4_4_4_4:
1263                 case V_028C70_COLOR_16:
1264                 case V_028C70_COLOR_8_8:
1265                         return V_028C70_ENDIAN_8IN16;
1266
1267                 /* 32-bit buffers. */
1268                 case V_028C70_COLOR_8_8_8_8:
1269                 case V_028C70_COLOR_2_10_10_10:
1270                 case V_028C70_COLOR_8_24:
1271                 case V_028C70_COLOR_24_8:
1272                 case V_028C70_COLOR_16_16:
1273                         return V_028C70_ENDIAN_8IN32;
1274
1275                 /* 64-bit buffers. */
1276                 case V_028C70_COLOR_16_16_16_16:
1277                         return V_028C70_ENDIAN_8IN16;
1278
1279                 case V_028C70_COLOR_32_32:
1280                         return V_028C70_ENDIAN_8IN32;
1281
1282                 /* 128-bit buffers. */
1283                 case V_028C70_COLOR_32_32_32_32:
1284                         return V_028C70_ENDIAN_8IN32;
1285                 default:
1286                         return V_028C70_ENDIAN_NONE; /* Unsupported. */
1287                 }
1288         } else {
1289                 return V_028C70_ENDIAN_NONE;
1290         }
1291 }
1292
1293 static uint32_t si_translate_dbformat(enum pipe_format format)
1294 {
1295         switch (format) {
1296         case PIPE_FORMAT_Z16_UNORM:
1297                 return V_028040_Z_16;
1298         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1299         case PIPE_FORMAT_X8Z24_UNORM:
1300         case PIPE_FORMAT_Z24X8_UNORM:
1301         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1302                 return V_028040_Z_24; /* deprecated on SI */
1303         case PIPE_FORMAT_Z32_FLOAT:
1304         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1305                 return V_028040_Z_32_FLOAT;
1306         default:
1307                 return V_028040_Z_INVALID;
1308         }
1309 }
1310
1311 /*
1312  * Texture translation
1313  */
1314
1315 static uint32_t si_translate_texformat(struct pipe_screen *screen,
1316                                        enum pipe_format format,
1317                                        const struct util_format_description *desc,
1318                                        int first_non_void)
1319 {
1320         struct si_screen *sscreen = (struct si_screen*)screen;
1321         bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
1322                                           sscreen->b.info.drm_minor >= 31) ||
1323                                          sscreen->b.info.drm_major == 3;
1324         bool uniform = true;
1325         int i;
1326
1327         /* Colorspace (return non-RGB formats directly). */
1328         switch (desc->colorspace) {
1329         /* Depth stencil formats */
1330         case UTIL_FORMAT_COLORSPACE_ZS:
1331                 switch (format) {
1332                 case PIPE_FORMAT_Z16_UNORM:
1333                         return V_008F14_IMG_DATA_FORMAT_16;
1334                 case PIPE_FORMAT_X24S8_UINT:
1335                 case PIPE_FORMAT_Z24X8_UNORM:
1336                 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1337                         return V_008F14_IMG_DATA_FORMAT_8_24;
1338                 case PIPE_FORMAT_X8Z24_UNORM:
1339                 case PIPE_FORMAT_S8X24_UINT:
1340                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1341                         return V_008F14_IMG_DATA_FORMAT_24_8;
1342                 case PIPE_FORMAT_S8_UINT:
1343                         return V_008F14_IMG_DATA_FORMAT_8;
1344                 case PIPE_FORMAT_Z32_FLOAT:
1345                         return V_008F14_IMG_DATA_FORMAT_32;
1346                 case PIPE_FORMAT_X32_S8X24_UINT:
1347                 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1348                         return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1349                 default:
1350                         goto out_unknown;
1351                 }
1352
1353         case UTIL_FORMAT_COLORSPACE_YUV:
1354                 goto out_unknown; /* TODO */
1355
1356         case UTIL_FORMAT_COLORSPACE_SRGB:
1357                 if (desc->nr_channels != 4 && desc->nr_channels != 1)
1358                         goto out_unknown;
1359                 break;
1360
1361         default:
1362                 break;
1363         }
1364
1365         if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1366                 if (!enable_compressed_formats)
1367                         goto out_unknown;
1368
1369                 switch (format) {
1370                 case PIPE_FORMAT_RGTC1_SNORM:
1371                 case PIPE_FORMAT_LATC1_SNORM:
1372                 case PIPE_FORMAT_RGTC1_UNORM:
1373                 case PIPE_FORMAT_LATC1_UNORM:
1374                         return V_008F14_IMG_DATA_FORMAT_BC4;
1375                 case PIPE_FORMAT_RGTC2_SNORM:
1376                 case PIPE_FORMAT_LATC2_SNORM:
1377                 case PIPE_FORMAT_RGTC2_UNORM:
1378                 case PIPE_FORMAT_LATC2_UNORM:
1379                         return V_008F14_IMG_DATA_FORMAT_BC5;
1380                 default:
1381                         goto out_unknown;
1382                 }
1383         }
1384
1385         if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
1386             sscreen->b.family == CHIP_STONEY) {
1387                 switch (format) {
1388                 case PIPE_FORMAT_ETC1_RGB8:
1389                 case PIPE_FORMAT_ETC2_RGB8:
1390                 case PIPE_FORMAT_ETC2_SRGB8:
1391                         return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
1392                 case PIPE_FORMAT_ETC2_RGB8A1:
1393                 case PIPE_FORMAT_ETC2_SRGB8A1:
1394                         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
1395                 case PIPE_FORMAT_ETC2_RGBA8:
1396                 case PIPE_FORMAT_ETC2_SRGBA8:
1397                         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
1398                 case PIPE_FORMAT_ETC2_R11_UNORM:
1399                 case PIPE_FORMAT_ETC2_R11_SNORM:
1400                         return V_008F14_IMG_DATA_FORMAT_ETC2_R;
1401                 case PIPE_FORMAT_ETC2_RG11_UNORM:
1402                 case PIPE_FORMAT_ETC2_RG11_SNORM:
1403                         return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
1404                 default:
1405                         goto out_unknown;
1406                 }
1407         }
1408
1409         if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1410                 if (!enable_compressed_formats)
1411                         goto out_unknown;
1412
1413                 switch (format) {
1414                 case PIPE_FORMAT_BPTC_RGBA_UNORM:
1415                 case PIPE_FORMAT_BPTC_SRGBA:
1416                         return V_008F14_IMG_DATA_FORMAT_BC7;
1417                 case PIPE_FORMAT_BPTC_RGB_FLOAT:
1418                 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1419                         return V_008F14_IMG_DATA_FORMAT_BC6;
1420                 default:
1421                         goto out_unknown;
1422                 }
1423         }
1424
1425         if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1426                 switch (format) {
1427                 case PIPE_FORMAT_R8G8_B8G8_UNORM:
1428                 case PIPE_FORMAT_G8R8_B8R8_UNORM:
1429                         return V_008F14_IMG_DATA_FORMAT_GB_GR;
1430                 case PIPE_FORMAT_G8R8_G8B8_UNORM:
1431                 case PIPE_FORMAT_R8G8_R8B8_UNORM:
1432                         return V_008F14_IMG_DATA_FORMAT_BG_RG;
1433                 default:
1434                         goto out_unknown;
1435                 }
1436         }
1437
1438         if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1439                 if (!enable_compressed_formats)
1440                         goto out_unknown;
1441
1442                 if (!util_format_s3tc_enabled) {
1443                         goto out_unknown;
1444                 }
1445
1446                 switch (format) {
1447                 case PIPE_FORMAT_DXT1_RGB:
1448                 case PIPE_FORMAT_DXT1_RGBA:
1449                 case PIPE_FORMAT_DXT1_SRGB:
1450                 case PIPE_FORMAT_DXT1_SRGBA:
1451                         return V_008F14_IMG_DATA_FORMAT_BC1;
1452                 case PIPE_FORMAT_DXT3_RGBA:
1453                 case PIPE_FORMAT_DXT3_SRGBA:
1454                         return V_008F14_IMG_DATA_FORMAT_BC2;
1455                 case PIPE_FORMAT_DXT5_RGBA:
1456                 case PIPE_FORMAT_DXT5_SRGBA:
1457                         return V_008F14_IMG_DATA_FORMAT_BC3;
1458                 default:
1459                         goto out_unknown;
1460                 }
1461         }
1462
1463         if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1464                 return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1465         } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1466                 return V_008F14_IMG_DATA_FORMAT_10_11_11;
1467         }
1468
1469         /* R8G8Bx_SNORM - TODO CxV8U8 */
1470
1471         /* hw cannot support mixed formats (except depth/stencil, since only
1472          * depth is read).*/
1473         if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1474                 goto out_unknown;
1475
1476         /* See whether the components are of the same size. */
1477         for (i = 1; i < desc->nr_channels; i++) {
1478                 uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1479         }
1480
1481         /* Non-uniform formats. */
1482         if (!uniform) {
1483                 switch(desc->nr_channels) {
1484                 case 3:
1485                         if (desc->channel[0].size == 5 &&
1486                             desc->channel[1].size == 6 &&
1487                             desc->channel[2].size == 5) {
1488                                 return V_008F14_IMG_DATA_FORMAT_5_6_5;
1489                         }
1490                         goto out_unknown;
1491                 case 4:
1492                         if (desc->channel[0].size == 5 &&
1493                             desc->channel[1].size == 5 &&
1494                             desc->channel[2].size == 5 &&
1495                             desc->channel[3].size == 1) {
1496                                 return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1497                         }
1498                         if (desc->channel[0].size == 10 &&
1499                             desc->channel[1].size == 10 &&
1500                             desc->channel[2].size == 10 &&
1501                             desc->channel[3].size == 2) {
1502                                 return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1503                         }
1504                         goto out_unknown;
1505                 }
1506                 goto out_unknown;
1507         }
1508
1509         if (first_non_void < 0 || first_non_void > 3)
1510                 goto out_unknown;
1511
1512         /* uniform formats */
1513         switch (desc->channel[first_non_void].size) {
1514         case 4:
1515                 switch (desc->nr_channels) {
1516 #if 0 /* Not supported for render targets */
1517                 case 2:
1518                         return V_008F14_IMG_DATA_FORMAT_4_4;
1519 #endif
1520                 case 4:
1521                         return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1522                 }
1523                 break;
1524         case 8:
1525                 switch (desc->nr_channels) {
1526                 case 1:
1527                         return V_008F14_IMG_DATA_FORMAT_8;
1528                 case 2:
1529                         return V_008F14_IMG_DATA_FORMAT_8_8;
1530                 case 4:
1531                         return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1532                 }
1533                 break;
1534         case 16:
1535                 switch (desc->nr_channels) {
1536                 case 1:
1537                         return V_008F14_IMG_DATA_FORMAT_16;
1538                 case 2:
1539                         return V_008F14_IMG_DATA_FORMAT_16_16;
1540                 case 4:
1541                         return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1542                 }
1543                 break;
1544         case 32:
1545                 switch (desc->nr_channels) {
1546                 case 1:
1547                         return V_008F14_IMG_DATA_FORMAT_32;
1548                 case 2:
1549                         return V_008F14_IMG_DATA_FORMAT_32_32;
1550 #if 0 /* Not supported for render targets */
1551                 case 3:
1552                         return V_008F14_IMG_DATA_FORMAT_32_32_32;
1553 #endif
1554                 case 4:
1555                         return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1556                 }
1557         }
1558
1559 out_unknown:
1560         /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1561         return ~0;
1562 }
1563
1564 static unsigned si_tex_wrap(unsigned wrap)
1565 {
1566         switch (wrap) {
1567         default:
1568         case PIPE_TEX_WRAP_REPEAT:
1569                 return V_008F30_SQ_TEX_WRAP;
1570         case PIPE_TEX_WRAP_CLAMP:
1571                 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1572         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1573                 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1574         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1575                 return V_008F30_SQ_TEX_CLAMP_BORDER;
1576         case PIPE_TEX_WRAP_MIRROR_REPEAT:
1577                 return V_008F30_SQ_TEX_MIRROR;
1578         case PIPE_TEX_WRAP_MIRROR_CLAMP:
1579                 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1580         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1581                 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1582         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1583                 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1584         }
1585 }
1586
1587 static unsigned si_tex_mipfilter(unsigned filter)
1588 {
1589         switch (filter) {
1590         case PIPE_TEX_MIPFILTER_NEAREST:
1591                 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1592         case PIPE_TEX_MIPFILTER_LINEAR:
1593                 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1594         default:
1595         case PIPE_TEX_MIPFILTER_NONE:
1596                 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1597         }
1598 }
1599
1600 static unsigned si_tex_compare(unsigned compare)
1601 {
1602         switch (compare) {
1603         default:
1604         case PIPE_FUNC_NEVER:
1605                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1606         case PIPE_FUNC_LESS:
1607                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1608         case PIPE_FUNC_EQUAL:
1609                 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1610         case PIPE_FUNC_LEQUAL:
1611                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1612         case PIPE_FUNC_GREATER:
1613                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1614         case PIPE_FUNC_NOTEQUAL:
1615                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1616         case PIPE_FUNC_GEQUAL:
1617                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1618         case PIPE_FUNC_ALWAYS:
1619                 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1620         }
1621 }
1622
1623 static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
1624                            unsigned nr_samples)
1625 {
1626         if (view_target == PIPE_TEXTURE_CUBE ||
1627             view_target == PIPE_TEXTURE_CUBE_ARRAY)
1628                 res_target = view_target;
1629         /* If interpreting cubemaps as something else, set 2D_ARRAY. */
1630         else if (res_target == PIPE_TEXTURE_CUBE ||
1631                  res_target == PIPE_TEXTURE_CUBE_ARRAY)
1632                 res_target = PIPE_TEXTURE_2D_ARRAY;
1633
1634         switch (res_target) {
1635         default:
1636         case PIPE_TEXTURE_1D:
1637                 return V_008F1C_SQ_RSRC_IMG_1D;
1638         case PIPE_TEXTURE_1D_ARRAY:
1639                 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1640         case PIPE_TEXTURE_2D:
1641         case PIPE_TEXTURE_RECT:
1642                 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1643                                         V_008F1C_SQ_RSRC_IMG_2D;
1644         case PIPE_TEXTURE_2D_ARRAY:
1645                 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1646                                         V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1647         case PIPE_TEXTURE_3D:
1648                 return V_008F1C_SQ_RSRC_IMG_3D;
1649         case PIPE_TEXTURE_CUBE:
1650         case PIPE_TEXTURE_CUBE_ARRAY:
1651                 return V_008F1C_SQ_RSRC_IMG_CUBE;
1652         }
1653 }
1654
1655 /*
1656  * Format support testing
1657  */
1658
1659 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1660 {
1661         return si_translate_texformat(screen, format, util_format_description(format),
1662                                       util_format_get_first_non_void_channel(format)) != ~0U;
1663 }
1664
1665 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1666                                                const struct util_format_description *desc,
1667                                                int first_non_void)
1668 {
1669         unsigned type;
1670         int i;
1671
1672         if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1673                 return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1674
1675         assert(first_non_void >= 0);
1676         type = desc->channel[first_non_void].type;
1677
1678         if (type == UTIL_FORMAT_TYPE_FIXED)
1679                 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1680
1681         if (desc->nr_channels == 4 &&
1682             desc->channel[0].size == 10 &&
1683             desc->channel[1].size == 10 &&
1684             desc->channel[2].size == 10 &&
1685             desc->channel[3].size == 2)
1686                 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1687
1688         /* See whether the components are of the same size. */
1689         for (i = 0; i < desc->nr_channels; i++) {
1690                 if (desc->channel[first_non_void].size != desc->channel[i].size)
1691                         return V_008F0C_BUF_DATA_FORMAT_INVALID;
1692         }
1693
1694         switch (desc->channel[first_non_void].size) {
1695         case 8:
1696                 switch (desc->nr_channels) {
1697                 case 1:
1698                         return V_008F0C_BUF_DATA_FORMAT_8;
1699                 case 2:
1700                         return V_008F0C_BUF_DATA_FORMAT_8_8;
1701                 case 3:
1702                 case 4:
1703                         return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1704                 }
1705                 break;
1706         case 16:
1707                 switch (desc->nr_channels) {
1708                 case 1:
1709                         return V_008F0C_BUF_DATA_FORMAT_16;
1710                 case 2:
1711                         return V_008F0C_BUF_DATA_FORMAT_16_16;
1712                 case 3:
1713                 case 4:
1714                         return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1715                 }
1716                 break;
1717         case 32:
1718                 /* From the Southern Islands ISA documentation about MTBUF:
1719                  * 'Memory reads of data in memory that is 32 or 64 bits do not
1720                  * undergo any format conversion.'
1721                  */
1722                 if (type != UTIL_FORMAT_TYPE_FLOAT &&
1723                     !desc->channel[first_non_void].pure_integer)
1724                         return V_008F0C_BUF_DATA_FORMAT_INVALID;
1725
1726                 switch (desc->nr_channels) {
1727                 case 1:
1728                         return V_008F0C_BUF_DATA_FORMAT_32;
1729                 case 2:
1730                         return V_008F0C_BUF_DATA_FORMAT_32_32;
1731                 case 3:
1732                         return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1733                 case 4:
1734                         return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1735                 }
1736                 break;
1737         }
1738
1739         return V_008F0C_BUF_DATA_FORMAT_INVALID;
1740 }
1741
1742 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1743                                               const struct util_format_description *desc,
1744                                               int first_non_void)
1745 {
1746         if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1747                 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1748
1749         assert(first_non_void >= 0);
1750
1751         switch (desc->channel[first_non_void].type) {
1752         case UTIL_FORMAT_TYPE_SIGNED:
1753                 if (desc->channel[first_non_void].normalized)
1754                         return V_008F0C_BUF_NUM_FORMAT_SNORM;
1755                 else if (desc->channel[first_non_void].pure_integer)
1756                         return V_008F0C_BUF_NUM_FORMAT_SINT;
1757                 else
1758                         return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1759                 break;
1760         case UTIL_FORMAT_TYPE_UNSIGNED:
1761                 if (desc->channel[first_non_void].normalized)
1762                         return V_008F0C_BUF_NUM_FORMAT_UNORM;
1763                 else if (desc->channel[first_non_void].pure_integer)
1764                         return V_008F0C_BUF_NUM_FORMAT_UINT;
1765                 else
1766                         return V_008F0C_BUF_NUM_FORMAT_USCALED;
1767                 break;
1768         case UTIL_FORMAT_TYPE_FLOAT:
1769         default:
1770                 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1771         }
1772 }
1773
1774 static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1775 {
1776         const struct util_format_description *desc;
1777         int first_non_void;
1778         unsigned data_format;
1779
1780         desc = util_format_description(format);
1781         first_non_void = util_format_get_first_non_void_channel(format);
1782         data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1783         return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1784 }
1785
1786 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1787 {
1788         return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1789                 r600_translate_colorswap(format, false) != ~0U;
1790 }
1791
1792 static bool si_is_zs_format_supported(enum pipe_format format)
1793 {
1794         return si_translate_dbformat(format) != V_028040_Z_INVALID;
1795 }
1796
1797 static boolean si_is_format_supported(struct pipe_screen *screen,
1798                                       enum pipe_format format,
1799                                       enum pipe_texture_target target,
1800                                       unsigned sample_count,
1801                                       unsigned usage)
1802 {
1803         unsigned retval = 0;
1804
1805         if (target >= PIPE_MAX_TEXTURE_TYPES) {
1806                 R600_ERR("r600: unsupported texture type %d\n", target);
1807                 return false;
1808         }
1809
1810         if (!util_format_is_supported(format, usage))
1811                 return false;
1812
1813         if (sample_count > 1) {
1814                 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
1815                         return false;
1816
1817                 if (usage & PIPE_BIND_SHADER_IMAGE)
1818                         return false;
1819
1820                 switch (sample_count) {
1821                 case 2:
1822                 case 4:
1823                 case 8:
1824                         break;
1825                 case 16:
1826                         if (format == PIPE_FORMAT_NONE)
1827                                 return true;
1828                         else
1829                                 return false;
1830                 default:
1831                         return false;
1832                 }
1833         }
1834
1835         if (usage & (PIPE_BIND_SAMPLER_VIEW |
1836                      PIPE_BIND_SHADER_IMAGE)) {
1837                 if (target == PIPE_BUFFER) {
1838                         if (si_is_vertex_format_supported(screen, format))
1839                                 retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
1840                                                    PIPE_BIND_SHADER_IMAGE);
1841                 } else {
1842                         if (si_is_sampler_format_supported(screen, format))
1843                                 retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
1844                                                    PIPE_BIND_SHADER_IMAGE);
1845                 }
1846         }
1847
1848         if ((usage & (PIPE_BIND_RENDER_TARGET |
1849                       PIPE_BIND_DISPLAY_TARGET |
1850                       PIPE_BIND_SCANOUT |
1851                       PIPE_BIND_SHARED |
1852                       PIPE_BIND_BLENDABLE)) &&
1853             si_is_colorbuffer_format_supported(format)) {
1854                 retval |= usage &
1855                           (PIPE_BIND_RENDER_TARGET |
1856                            PIPE_BIND_DISPLAY_TARGET |
1857                            PIPE_BIND_SCANOUT |
1858                            PIPE_BIND_SHARED);
1859                 if (!util_format_is_pure_integer(format) &&
1860                     !util_format_is_depth_or_stencil(format))
1861                         retval |= usage & PIPE_BIND_BLENDABLE;
1862         }
1863
1864         if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1865             si_is_zs_format_supported(format)) {
1866                 retval |= PIPE_BIND_DEPTH_STENCIL;
1867         }
1868
1869         if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1870             si_is_vertex_format_supported(screen, format)) {
1871                 retval |= PIPE_BIND_VERTEX_BUFFER;
1872         }
1873
1874         if ((usage & PIPE_BIND_LINEAR) &&
1875             !util_format_is_compressed(format) &&
1876             !(usage & PIPE_BIND_DEPTH_STENCIL))
1877                 retval |= PIPE_BIND_LINEAR;
1878
1879         return retval == usage;
1880 }
1881
1882 /*
1883  * framebuffer handling
1884  */
1885
1886 static void si_choose_spi_color_formats(struct r600_surface *surf,
1887                                         unsigned format, unsigned swap,
1888                                         unsigned ntype, bool is_depth)
1889 {
1890         /* Alpha is needed for alpha-to-coverage.
1891          * Blending may be with or without alpha.
1892          */
1893         unsigned normal = 0; /* most optimal, may not support blending or export alpha */
1894         unsigned alpha = 0; /* exports alpha, but may not support blending */
1895         unsigned blend = 0; /* supports blending, but may not export alpha */
1896         unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
1897
1898         /* Choose the SPI color formats. These are required values for Stoney/RB+.
1899          * Other chips have multiple choices, though they are not necessarily better.
1900          */
1901         switch (format) {
1902         case V_028C70_COLOR_5_6_5:
1903         case V_028C70_COLOR_1_5_5_5:
1904         case V_028C70_COLOR_5_5_5_1:
1905         case V_028C70_COLOR_4_4_4_4:
1906         case V_028C70_COLOR_10_11_11:
1907         case V_028C70_COLOR_11_11_10:
1908         case V_028C70_COLOR_8:
1909         case V_028C70_COLOR_8_8:
1910         case V_028C70_COLOR_8_8_8_8:
1911         case V_028C70_COLOR_10_10_10_2:
1912         case V_028C70_COLOR_2_10_10_10:
1913                 if (ntype == V_028C70_NUMBER_UINT)
1914                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
1915                 else if (ntype == V_028C70_NUMBER_SINT)
1916                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
1917                 else
1918                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
1919                 break;
1920
1921         case V_028C70_COLOR_16:
1922         case V_028C70_COLOR_16_16:
1923         case V_028C70_COLOR_16_16_16_16:
1924                 if (ntype == V_028C70_NUMBER_UNORM ||
1925                     ntype == V_028C70_NUMBER_SNORM) {
1926                         /* UNORM16 and SNORM16 don't support blending */
1927                         if (ntype == V_028C70_NUMBER_UNORM)
1928                                 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
1929                         else
1930                                 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
1931
1932                         /* Use 32 bits per channel for blending. */
1933                         if (format == V_028C70_COLOR_16) {
1934                                 if (swap == V_028C70_SWAP_STD) { /* R */
1935                                         blend = V_028714_SPI_SHADER_32_R;
1936                                         blend_alpha = V_028714_SPI_SHADER_32_AR;
1937                                 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
1938                                         blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1939                                 else
1940                                         assert(0);
1941                         } else if (format == V_028C70_COLOR_16_16) {
1942                                 if (swap == V_028C70_SWAP_STD) { /* RG */
1943                                         blend = V_028714_SPI_SHADER_32_GR;
1944                                         blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1945                                 } else if (swap == V_028C70_SWAP_ALT) /* RA */
1946                                         blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1947                                 else
1948                                         assert(0);
1949                         } else /* 16_16_16_16 */
1950                                 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1951                 } else if (ntype == V_028C70_NUMBER_UINT)
1952                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
1953                 else if (ntype == V_028C70_NUMBER_SINT)
1954                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
1955                 else if (ntype == V_028C70_NUMBER_FLOAT)
1956                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
1957                 else
1958                         assert(0);
1959                 break;
1960
1961         case V_028C70_COLOR_32:
1962                 if (swap == V_028C70_SWAP_STD) { /* R */
1963                         blend = normal = V_028714_SPI_SHADER_32_R;
1964                         alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
1965                 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
1966                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
1967                 else
1968                         assert(0);
1969                 break;
1970
1971         case V_028C70_COLOR_32_32:
1972                 if (swap == V_028C70_SWAP_STD) { /* RG */
1973                         blend = normal = V_028714_SPI_SHADER_32_GR;
1974                         alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1975                 } else if (swap == V_028C70_SWAP_ALT) /* RA */
1976                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
1977                 else
1978                         assert(0);
1979                 break;
1980
1981         case V_028C70_COLOR_32_32_32_32:
1982         case V_028C70_COLOR_8_24:
1983         case V_028C70_COLOR_24_8:
1984         case V_028C70_COLOR_X24_8_32_FLOAT:
1985                 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
1986                 break;
1987
1988         default:
1989                 assert(0);
1990                 return;
1991         }
1992
1993         /* The DB->CB copy needs 32_ABGR. */
1994         if (is_depth)
1995                 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
1996
1997         surf->spi_shader_col_format = normal;
1998         surf->spi_shader_col_format_alpha = alpha;
1999         surf->spi_shader_col_format_blend = blend;
2000         surf->spi_shader_col_format_blend_alpha = blend_alpha;
2001 }
2002
2003 static void si_initialize_color_surface(struct si_context *sctx,
2004                                         struct r600_surface *surf)
2005 {
2006         struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2007         unsigned color_info, color_attrib, color_view;
2008         unsigned format, swap, ntype, endian;
2009         const struct util_format_description *desc;
2010         int i;
2011         unsigned blend_clamp = 0, blend_bypass = 0;
2012
2013         color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
2014                      S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
2015
2016         desc = util_format_description(surf->base.format);
2017         for (i = 0; i < 4; i++) {
2018                 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2019                         break;
2020                 }
2021         }
2022         if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
2023                 ntype = V_028C70_NUMBER_FLOAT;
2024         } else {
2025                 ntype = V_028C70_NUMBER_UNORM;
2026                 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
2027                         ntype = V_028C70_NUMBER_SRGB;
2028                 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2029                         if (desc->channel[i].pure_integer) {
2030                                 ntype = V_028C70_NUMBER_SINT;
2031                         } else {
2032                                 assert(desc->channel[i].normalized);
2033                                 ntype = V_028C70_NUMBER_SNORM;
2034                         }
2035                 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2036                         if (desc->channel[i].pure_integer) {
2037                                 ntype = V_028C70_NUMBER_UINT;
2038                         } else {
2039                                 assert(desc->channel[i].normalized);
2040                                 ntype = V_028C70_NUMBER_UNORM;
2041                         }
2042                 }
2043         }
2044
2045         format = si_translate_colorformat(surf->base.format);
2046         if (format == V_028C70_COLOR_INVALID) {
2047                 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
2048         }
2049         assert(format != V_028C70_COLOR_INVALID);
2050         swap = r600_translate_colorswap(surf->base.format, false);
2051         endian = si_colorformat_endian_swap(format);
2052
2053         /* blend clamp should be set for all NORM/SRGB types */
2054         if (ntype == V_028C70_NUMBER_UNORM ||
2055             ntype == V_028C70_NUMBER_SNORM ||
2056             ntype == V_028C70_NUMBER_SRGB)
2057                 blend_clamp = 1;
2058
2059         /* set blend bypass according to docs if SINT/UINT or
2060            8/24 COLOR variants */
2061         if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2062             format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2063             format == V_028C70_COLOR_X24_8_32_FLOAT) {
2064                 blend_clamp = 0;
2065                 blend_bypass = 1;
2066         }
2067
2068         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2069             (format == V_028C70_COLOR_8 ||
2070              format == V_028C70_COLOR_8_8 ||
2071              format == V_028C70_COLOR_8_8_8_8))
2072                 surf->color_is_int8 = true;
2073
2074         color_info = S_028C70_FORMAT(format) |
2075                 S_028C70_COMP_SWAP(swap) |
2076                 S_028C70_BLEND_CLAMP(blend_clamp) |
2077                 S_028C70_BLEND_BYPASS(blend_bypass) |
2078                 S_028C70_SIMPLE_FLOAT(1) |
2079                 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2080                                     ntype != V_028C70_NUMBER_SNORM &&
2081                                     ntype != V_028C70_NUMBER_SRGB &&
2082                                     format != V_028C70_COLOR_8_24 &&
2083                                     format != V_028C70_COLOR_24_8) |
2084                 S_028C70_NUMBER_TYPE(ntype) |
2085                 S_028C70_ENDIAN(endian);
2086
2087         /* Intensity is implemented as Red, so treat it that way. */
2088         color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
2089                                                   util_format_is_intensity(surf->base.format));
2090
2091         if (rtex->resource.b.b.nr_samples > 1) {
2092                 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
2093
2094                 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2095                                 S_028C74_NUM_FRAGMENTS(log_samples);
2096
2097                 if (rtex->fmask.size) {
2098                         color_info |= S_028C70_COMPRESSION(1);
2099                         unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
2100
2101                         if (sctx->b.chip_class == SI) {
2102                                 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
2103                                 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
2104                         }
2105                 }
2106         }
2107
2108         surf->cb_color_view = color_view;
2109         surf->cb_color_info = color_info;
2110         surf->cb_color_attrib = color_attrib;
2111
2112         if (sctx->b.chip_class >= VI) {
2113                 unsigned max_uncompressed_block_size = 2;
2114
2115                 if (rtex->surface.nsamples > 1) {
2116                         if (rtex->surface.bpe == 1)
2117                                 max_uncompressed_block_size = 0;
2118                         else if (rtex->surface.bpe == 2)
2119                                 max_uncompressed_block_size = 1;
2120                 }
2121
2122                 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2123                                        S_028C78_INDEPENDENT_64B_BLOCKS(1);
2124         }
2125
2126         /* This must be set for fast clear to work without FMASK. */
2127         if (!rtex->fmask.size && sctx->b.chip_class == SI) {
2128                 unsigned bankh = util_logbase2(rtex->surface.bankh);
2129                 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2130         }
2131
2132         /* Determine pixel shader export format */
2133         si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
2134
2135         surf->color_initialized = true;
2136 }
2137
2138 static void si_init_depth_surface(struct si_context *sctx,
2139                                   struct r600_surface *surf)
2140 {
2141         struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2142         unsigned level = surf->base.u.tex.level;
2143         struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
2144         unsigned format;
2145         uint32_t z_info, s_info, db_depth_info;
2146         uint64_t z_offs, s_offs;
2147         uint32_t db_htile_data_base, db_htile_surface;
2148
2149         format = si_translate_dbformat(rtex->db_render_format);
2150
2151         if (format == V_028040_Z_INVALID) {
2152                 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
2153         }
2154         assert(format != V_028040_Z_INVALID);
2155
2156         s_offs = z_offs = rtex->resource.gpu_address;
2157         z_offs += rtex->surface.level[level].offset;
2158         s_offs += rtex->surface.stencil_level[level].offset;
2159
2160         db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
2161
2162         z_info = S_028040_FORMAT(format);
2163         if (rtex->resource.b.b.nr_samples > 1) {
2164                 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
2165         }
2166
2167         if (rtex->surface.flags & RADEON_SURF_SBUFFER)
2168                 s_info = S_028044_FORMAT(V_028044_STENCIL_8);
2169         else
2170                 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2171
2172         if (sctx->b.chip_class >= CIK) {
2173                 struct radeon_info *info = &sctx->screen->b.info;
2174                 unsigned index = rtex->surface.tiling_index[level];
2175                 unsigned stencil_index = rtex->surface.stencil_tiling_index[level];
2176                 unsigned macro_index = rtex->surface.macro_tile_index;
2177                 unsigned tile_mode = info->si_tile_mode_array[index];
2178                 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2179                 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2180
2181                 db_depth_info |=
2182                         S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2183                         S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2184                         S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2185                         S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2186                         S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2187                         S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2188                 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2189                 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2190         } else {
2191                 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false);
2192                 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2193                 tile_mode_index = si_tile_mode_index(rtex, level, true);
2194                 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2195         }
2196
2197         /* HiZ aka depth buffer htile */
2198         /* use htile only for first level */
2199         if (rtex->htile_buffer && !level) {
2200                 z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2201                           S_028040_ALLOW_EXPCLEAR(1);
2202
2203                 if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
2204                         /* Workaround: For a not yet understood reason, the
2205                          * combination of MSAA, fast stencil clear and stencil
2206                          * decompress messes with subsequent stencil buffer
2207                          * uses. Problem was reproduced on Verde, Bonaire,
2208                          * Tonga, and Carrizo.
2209                          *
2210                          * Disabling EXPCLEAR works around the problem.
2211                          *
2212                          * Check piglit's arb_texture_multisample-stencil-clear
2213                          * test if you want to try changing this.
2214                          */
2215                         if (rtex->resource.b.b.nr_samples <= 1)
2216                                 s_info |= S_028044_ALLOW_EXPCLEAR(1);
2217                 } else if (!rtex->tc_compatible_htile) {
2218                         /* Use all of the htile_buffer for depth if there's no stencil.
2219                          * This must not be set when TC-compatible HTILE is enabled
2220                          * due to a hw bug.
2221                          */
2222                         s_info |= S_028044_TILE_STENCIL_DISABLE(1);
2223                 }
2224
2225                 uint64_t va = rtex->htile_buffer->gpu_address;
2226                 db_htile_data_base = va >> 8;
2227                 db_htile_surface = S_028ABC_FULL_CACHE(1);
2228
2229                 if (rtex->tc_compatible_htile) {
2230                         db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
2231
2232                         switch (rtex->resource.b.b.nr_samples) {
2233                         case 0:
2234                         case 1:
2235                                 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
2236                                 break;
2237                         case 2:
2238                         case 4:
2239                                 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
2240                                 break;
2241                         case 8:
2242                                 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
2243                                 break;
2244                         default:
2245                                 assert(0);
2246                         }
2247                 }
2248         } else {
2249                 db_htile_data_base = 0;
2250                 db_htile_surface = 0;
2251         }
2252
2253         assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2254
2255         surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2256                               S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2257         surf->db_htile_data_base = db_htile_data_base;
2258         surf->db_depth_info = db_depth_info;
2259         surf->db_z_info = z_info;
2260         surf->db_stencil_info = s_info;
2261         surf->db_depth_base = z_offs >> 8;
2262         surf->db_stencil_base = s_offs >> 8;
2263         surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2264                               S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2265         surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
2266                                                         levelinfo->nblk_y) / 64 - 1);
2267         surf->db_htile_surface = db_htile_surface;
2268
2269         surf->depth_initialized = true;
2270 }
2271
2272 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
2273 {
2274         for (int i = 0; i < state->nr_cbufs; ++i) {
2275                 struct r600_surface *surf = NULL;
2276                 struct r600_texture *rtex;
2277
2278                 if (!state->cbufs[i])
2279                         continue;
2280                 surf = (struct r600_surface*)state->cbufs[i];
2281                 rtex = (struct r600_texture*)surf->base.texture;
2282
2283                 p_atomic_dec(&rtex->framebuffers_bound);
2284         }
2285 }
2286
2287 static void si_set_framebuffer_state(struct pipe_context *ctx,
2288                                      const struct pipe_framebuffer_state *state)
2289 {
2290         struct si_context *sctx = (struct si_context *)ctx;
2291         struct pipe_constant_buffer constbuf = {0};
2292         struct r600_surface *surf = NULL;
2293         struct r600_texture *rtex;
2294         bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
2295         unsigned old_nr_samples = sctx->framebuffer.nr_samples;
2296         int i;
2297
2298         for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
2299                 if (!sctx->framebuffer.state.cbufs[i])
2300                         continue;
2301
2302                 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
2303                 if (rtex->dcc_gather_statistics)
2304                         vi_separate_dcc_stop_query(ctx, rtex);
2305         }
2306
2307         /* Only flush TC when changing the framebuffer state, because
2308          * the only client not using TC that can change textures is
2309          * the framebuffer.
2310          *
2311          * Flush all CB and DB caches here because all buffers can be used
2312          * for write by both TC (with shader image stores) and CB/DB.
2313          */
2314         sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
2315                          SI_CONTEXT_INV_GLOBAL_L2 |
2316                          SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
2317                          SI_CONTEXT_CS_PARTIAL_FLUSH;
2318
2319         /* Take the maximum of the old and new count. If the new count is lower,
2320          * dirtying is needed to disable the unbound colorbuffers.
2321          */
2322         sctx->framebuffer.dirty_cbufs |=
2323                 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
2324         sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
2325
2326         si_dec_framebuffer_counters(&sctx->framebuffer.state);
2327         util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2328
2329         sctx->framebuffer.spi_shader_col_format = 0;
2330         sctx->framebuffer.spi_shader_col_format_alpha = 0;
2331         sctx->framebuffer.spi_shader_col_format_blend = 0;
2332         sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
2333         sctx->framebuffer.color_is_int8 = 0;
2334
2335         sctx->framebuffer.compressed_cb_mask = 0;
2336         sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2337         sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2338         sctx->framebuffer.any_dst_linear = false;
2339
2340         for (i = 0; i < state->nr_cbufs; i++) {
2341                 if (!state->cbufs[i])
2342                         continue;
2343
2344                 surf = (struct r600_surface*)state->cbufs[i];
2345                 rtex = (struct r600_texture*)surf->base.texture;
2346
2347                 if (!surf->color_initialized) {
2348                         si_initialize_color_surface(sctx, surf);
2349                 }
2350
2351                 sctx->framebuffer.spi_shader_col_format |=
2352                         surf->spi_shader_col_format << (i * 4);
2353                 sctx->framebuffer.spi_shader_col_format_alpha |=
2354                         surf->spi_shader_col_format_alpha << (i * 4);
2355                 sctx->framebuffer.spi_shader_col_format_blend |=
2356                         surf->spi_shader_col_format_blend << (i * 4);
2357                 sctx->framebuffer.spi_shader_col_format_blend_alpha |=
2358                         surf->spi_shader_col_format_blend_alpha << (i * 4);
2359
2360                 if (surf->color_is_int8)
2361                         sctx->framebuffer.color_is_int8 |= 1 << i;
2362
2363                 if (rtex->fmask.size && rtex->cmask.size) {
2364                         sctx->framebuffer.compressed_cb_mask |= 1 << i;
2365                 }
2366
2367                 if (surf->level_info->mode == RADEON_SURF_MODE_LINEAR_ALIGNED)
2368                         sctx->framebuffer.any_dst_linear = true;
2369
2370                 r600_context_add_resource_size(ctx, surf->base.texture);
2371
2372                 p_atomic_inc(&rtex->framebuffers_bound);
2373
2374                 if (rtex->dcc_gather_statistics) {
2375                         /* Dirty tracking must be enabled for DCC usage analysis. */
2376                         sctx->framebuffer.compressed_cb_mask |= 1 << i;
2377                         vi_separate_dcc_start_query(ctx, rtex);
2378                 }
2379         }
2380
2381         if (state->zsbuf) {
2382                 surf = (struct r600_surface*)state->zsbuf;
2383                 rtex = (struct r600_texture*)surf->base.texture;
2384
2385                 if (!surf->depth_initialized) {
2386                         si_init_depth_surface(sctx, surf);
2387                 }
2388                 r600_context_add_resource_size(ctx, surf->base.texture);
2389         }
2390
2391         si_update_poly_offset_state(sctx);
2392         si_mark_atom_dirty(sctx, &sctx->cb_render_state);
2393         si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
2394
2395         if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
2396                 si_mark_atom_dirty(sctx, &sctx->msaa_config);
2397
2398         if (sctx->framebuffer.nr_samples != old_nr_samples) {
2399                 si_mark_atom_dirty(sctx, &sctx->msaa_config);
2400                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
2401
2402                 /* Set sample locations as fragment shader constants. */
2403                 switch (sctx->framebuffer.nr_samples) {
2404                 case 1:
2405                         constbuf.user_buffer = sctx->b.sample_locations_1x;
2406                         break;
2407                 case 2:
2408                         constbuf.user_buffer = sctx->b.sample_locations_2x;
2409                         break;
2410                 case 4:
2411                         constbuf.user_buffer = sctx->b.sample_locations_4x;
2412                         break;
2413                 case 8:
2414                         constbuf.user_buffer = sctx->b.sample_locations_8x;
2415                         break;
2416                 case 16:
2417                         constbuf.user_buffer = sctx->b.sample_locations_16x;
2418                         break;
2419                 default:
2420                         R600_ERR("Requested an invalid number of samples %i.\n",
2421                                  sctx->framebuffer.nr_samples);
2422                         assert(0);
2423                 }
2424                 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2425                 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
2426
2427                 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
2428         }
2429
2430         sctx->need_check_render_feedback = true;
2431         sctx->do_update_shaders = true;
2432 }
2433
2434 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2435 {
2436         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2437         struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2438         unsigned i, nr_cbufs = state->nr_cbufs;
2439         struct r600_texture *tex = NULL;
2440         struct r600_surface *cb = NULL;
2441         unsigned cb_color_info = 0;
2442
2443         /* Colorbuffers. */
2444         for (i = 0; i < nr_cbufs; i++) {
2445                 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2446                 unsigned cb_color_base, cb_color_fmask, cb_color_attrib;
2447                 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
2448
2449                 if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
2450                         continue;
2451
2452                 cb = (struct r600_surface*)state->cbufs[i];
2453                 if (!cb) {
2454                         radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2455                                                S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2456                         continue;
2457                 }
2458
2459                 tex = (struct r600_texture *)cb->base.texture;
2460                 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2461                                       &tex->resource, RADEON_USAGE_READWRITE,
2462                                       tex->surface.nsamples > 1 ?
2463                                               RADEON_PRIO_COLOR_BUFFER_MSAA :
2464                                               RADEON_PRIO_COLOR_BUFFER);
2465
2466                 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2467                         radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2468                                 tex->cmask_buffer, RADEON_USAGE_READWRITE,
2469                                 RADEON_PRIO_CMASK);
2470                 }
2471
2472                 if (tex->dcc_separate_buffer)
2473                         radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2474                                                   tex->dcc_separate_buffer,
2475                                                   RADEON_USAGE_READWRITE,
2476                                                   RADEON_PRIO_DCC);
2477
2478                 /* Compute mutable surface parameters. */
2479                 pitch_tile_max = cb->level_info->nblk_x / 8 - 1;
2480                 slice_tile_max = cb->level_info->nblk_x *
2481                                  cb->level_info->nblk_y / 64 - 1;
2482                 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
2483
2484                 cb_color_base = (tex->resource.gpu_address + cb->level_info->offset) >> 8;
2485                 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2486                 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2487                 cb_color_attrib = cb->cb_color_attrib |
2488                                   S_028C74_TILE_MODE_INDEX(tile_mode_index);
2489
2490                 if (tex->fmask.size) {
2491                         if (sctx->b.chip_class >= CIK)
2492                                 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1);
2493                         cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index);
2494                         cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8;
2495                         cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max);
2496                 } else {
2497                         /* This must be set for fast clear to work without FMASK. */
2498                         if (sctx->b.chip_class >= CIK)
2499                                 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2500                         cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2501                         cb_color_fmask = cb_color_base;
2502                         cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2503                 }
2504
2505                 cb_color_info = cb->cb_color_info | tex->cb_color_info;
2506
2507                 if (tex->dcc_offset && cb->level_info->dcc_enabled) {
2508                         bool is_msaa_resolve_dst = state->cbufs[0] &&
2509                                                    state->cbufs[0]->texture->nr_samples > 1 &&
2510                                                    state->cbufs[1] == &cb->base &&
2511                                                    state->cbufs[1]->texture->nr_samples <= 1;
2512
2513                         if (!is_msaa_resolve_dst)
2514                                 cb_color_info |= S_028C70_DCC_ENABLE(1);
2515                 }
2516
2517                 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
2518                                            sctx->b.chip_class >= VI ? 14 : 13);
2519                 radeon_emit(cs, cb_color_base);         /* R_028C60_CB_COLOR0_BASE */
2520                 radeon_emit(cs, cb_color_pitch);        /* R_028C64_CB_COLOR0_PITCH */
2521                 radeon_emit(cs, cb_color_slice);        /* R_028C68_CB_COLOR0_SLICE */
2522                 radeon_emit(cs, cb->cb_color_view);     /* R_028C6C_CB_COLOR0_VIEW */
2523                 radeon_emit(cs, cb_color_info);         /* R_028C70_CB_COLOR0_INFO */
2524                 radeon_emit(cs, cb_color_attrib);       /* R_028C74_CB_COLOR0_ATTRIB */
2525                 radeon_emit(cs, cb->cb_dcc_control);    /* R_028C78_CB_COLOR0_DCC_CONTROL */
2526                 radeon_emit(cs, tex->cmask.base_address_reg);   /* R_028C7C_CB_COLOR0_CMASK */
2527                 radeon_emit(cs, tex->cmask.slice_tile_max);     /* R_028C80_CB_COLOR0_CMASK_SLICE */
2528                 radeon_emit(cs, cb_color_fmask);                /* R_028C84_CB_COLOR0_FMASK */
2529                 radeon_emit(cs, cb_color_fmask_slice);          /* R_028C88_CB_COLOR0_FMASK_SLICE */
2530                 radeon_emit(cs, tex->color_clear_value[0]);     /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2531                 radeon_emit(cs, tex->color_clear_value[1]);     /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2532
2533                 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
2534                         radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
2535                                          tex->dcc_offset +
2536                                          tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
2537         }
2538         for (; i < 8 ; i++)
2539                 if (sctx->framebuffer.dirty_cbufs & (1 << i))
2540                         radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2541
2542         /* ZS buffer. */
2543         if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
2544                 struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2545                 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2546
2547                 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2548                                       &rtex->resource, RADEON_USAGE_READWRITE,
2549                                       zb->base.texture->nr_samples > 1 ?
2550                                               RADEON_PRIO_DEPTH_BUFFER_MSAA :
2551                                               RADEON_PRIO_DEPTH_BUFFER);
2552
2553                 if (zb->db_htile_data_base) {
2554                         radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2555                                               rtex->htile_buffer, RADEON_USAGE_READWRITE,
2556                                               RADEON_PRIO_HTILE);
2557                 }
2558
2559                 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2560                 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2561
2562                 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2563                 radeon_emit(cs, zb->db_depth_info);     /* R_02803C_DB_DEPTH_INFO */
2564                 radeon_emit(cs, zb->db_z_info |         /* R_028040_DB_Z_INFO */
2565                             S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
2566                 radeon_emit(cs, zb->db_stencil_info);   /* R_028044_DB_STENCIL_INFO */
2567                 radeon_emit(cs, zb->db_depth_base);     /* R_028048_DB_Z_READ_BASE */
2568                 radeon_emit(cs, zb->db_stencil_base);   /* R_02804C_DB_STENCIL_READ_BASE */
2569                 radeon_emit(cs, zb->db_depth_base);     /* R_028050_DB_Z_WRITE_BASE */
2570                 radeon_emit(cs, zb->db_stencil_base);   /* R_028054_DB_STENCIL_WRITE_BASE */
2571                 radeon_emit(cs, zb->db_depth_size);     /* R_028058_DB_DEPTH_SIZE */
2572                 radeon_emit(cs, zb->db_depth_slice);    /* R_02805C_DB_DEPTH_SLICE */
2573
2574                 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
2575                 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
2576                 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
2577
2578                 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2579         } else if (sctx->framebuffer.dirty_zsbuf) {
2580                 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2581                 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2582                 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2583         }
2584
2585         /* Framebuffer dimensions. */
2586         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2587         radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2588                                S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2589
2590         sctx->framebuffer.dirty_cbufs = 0;
2591         sctx->framebuffer.dirty_zsbuf = false;
2592 }
2593
2594 static void si_emit_msaa_sample_locs(struct si_context *sctx,
2595                                      struct r600_atom *atom)
2596 {
2597         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2598         unsigned nr_samples = sctx->framebuffer.nr_samples;
2599
2600         /* Smoothing (only possible with nr_samples == 1) uses the same
2601          * sample locations as the MSAA it simulates.
2602          */
2603         if (nr_samples <= 1 && sctx->smoothing_enabled)
2604                 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
2605
2606         /* On Polaris, the small primitive filter uses the sample locations
2607          * even when MSAA is off, so we need to make sure they're set to 0.
2608          */
2609         if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
2610             (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
2611                 sctx->msaa_sample_locs.nr_samples = nr_samples;
2612                 cayman_emit_msaa_sample_locs(cs, nr_samples);
2613         }
2614
2615         if (sctx->b.family >= CHIP_POLARIS10) {
2616                 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
2617                 unsigned small_prim_filter_cntl =
2618                         S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
2619                         S_028830_LINE_FILTER_DISABLE(1); /* line bug */
2620
2621                 /* The alternative of setting sample locations to 0 would
2622                  * require a DB flush to avoid Z errors, see
2623                  * https://bugs.freedesktop.org/show_bug.cgi?id=96908
2624                  */
2625                 if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
2626                         small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
2627
2628                 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
2629                                        small_prim_filter_cntl);
2630         }
2631 }
2632
2633 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
2634 {
2635         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2636         unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
2637         /* 33% faster rendering to linear color buffers */
2638         bool dst_is_linear = sctx->framebuffer.any_dst_linear;
2639         unsigned sc_mode_cntl_1 =
2640                 S_028A4C_WALK_SIZE(dst_is_linear) |
2641                 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
2642                 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
2643                 /* always 1: */
2644                 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
2645                 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
2646                 S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
2647                 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
2648                 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
2649                 S_028A4C_FORCE_EOV_REZ_ENABLE(1);
2650
2651         cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2652                                 sctx->ps_iter_samples,
2653                                 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
2654                                 sc_mode_cntl_1);
2655 }
2656
2657 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2658 {
2659         struct si_context *sctx = (struct si_context *)ctx;
2660
2661         if (sctx->ps_iter_samples == min_samples)
2662                 return;
2663
2664         sctx->ps_iter_samples = min_samples;
2665         sctx->do_update_shaders = true;
2666
2667         if (sctx->framebuffer.nr_samples > 1)
2668                 si_mark_atom_dirty(sctx, &sctx->msaa_config);
2669 }
2670
2671 /*
2672  * Samplers
2673  */
2674
2675 /**
2676  * Build the sampler view descriptor for a buffer texture.
2677  * @param state 256-bit descriptor; only the high 128 bits are filled in
2678  */
2679 void
2680 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
2681                           enum pipe_format format,
2682                           unsigned offset, unsigned size,
2683                           uint32_t *state)
2684 {
2685         const struct util_format_description *desc;
2686         int first_non_void;
2687         unsigned stride;
2688         unsigned num_records;
2689         unsigned num_format, data_format;
2690
2691         desc = util_format_description(format);
2692         first_non_void = util_format_get_first_non_void_channel(format);
2693         stride = desc->block.bits / 8;
2694         num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
2695         data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
2696
2697         num_records = size / stride;
2698         num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
2699
2700         if (screen->b.chip_class >= VI)
2701                 num_records *= stride;
2702
2703         state[4] = 0;
2704         state[5] = S_008F04_STRIDE(stride);
2705         state[6] = num_records;
2706         state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2707                    S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2708                    S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2709                    S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2710                    S_008F0C_NUM_FORMAT(num_format) |
2711                    S_008F0C_DATA_FORMAT(data_format);
2712 }
2713
2714 /**
2715  * Build the sampler view descriptor for a texture.
2716  */
2717 void
2718 si_make_texture_descriptor(struct si_screen *screen,
2719                            struct r600_texture *tex,
2720                            bool sampler,
2721                            enum pipe_texture_target target,
2722                            enum pipe_format pipe_format,
2723                            const unsigned char state_swizzle[4],
2724                            unsigned first_level, unsigned last_level,
2725                            unsigned first_layer, unsigned last_layer,
2726                            unsigned width, unsigned height, unsigned depth,
2727                            uint32_t *state,
2728                            uint32_t *fmask_state)
2729 {
2730         struct pipe_resource *res = &tex->resource.b.b;
2731         const struct util_format_description *desc;
2732         unsigned char swizzle[4];
2733         int first_non_void;
2734         unsigned num_format, data_format, type;
2735         uint64_t va;
2736
2737         desc = util_format_description(pipe_format);
2738
2739         if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2740                 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2741                 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2742
2743                 switch (pipe_format) {
2744                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2745                 case PIPE_FORMAT_X24S8_UINT:
2746                 case PIPE_FORMAT_X32_S8X24_UINT:
2747                 case PIPE_FORMAT_X8Z24_UNORM:
2748                         util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2749                         break;
2750                 default:
2751                         util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2752                 }
2753         } else {
2754                 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2755         }
2756
2757         first_non_void = util_format_get_first_non_void_channel(pipe_format);
2758
2759         switch (pipe_format) {
2760         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2761                 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2762                 break;
2763         default:
2764                 if (first_non_void < 0) {
2765                         if (util_format_is_compressed(pipe_format)) {
2766                                 switch (pipe_format) {
2767                                 case PIPE_FORMAT_DXT1_SRGB:
2768                                 case PIPE_FORMAT_DXT1_SRGBA:
2769                                 case PIPE_FORMAT_DXT3_SRGBA:
2770                                 case PIPE_FORMAT_DXT5_SRGBA:
2771                                 case PIPE_FORMAT_BPTC_SRGBA:
2772                                 case PIPE_FORMAT_ETC2_SRGB8:
2773                                 case PIPE_FORMAT_ETC2_SRGB8A1:
2774                                 case PIPE_FORMAT_ETC2_SRGBA8:
2775                                         num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2776                                         break;
2777                                 case PIPE_FORMAT_RGTC1_SNORM:
2778                                 case PIPE_FORMAT_LATC1_SNORM:
2779                                 case PIPE_FORMAT_RGTC2_SNORM:
2780                                 case PIPE_FORMAT_LATC2_SNORM:
2781                                 case PIPE_FORMAT_ETC2_R11_SNORM:
2782                                 case PIPE_FORMAT_ETC2_RG11_SNORM:
2783                                 /* implies float, so use SNORM/UNORM to determine
2784                                    whether data is signed or not */
2785                                 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2786                                         num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2787                                         break;
2788                                 default:
2789                                         num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2790                                         break;
2791                                 }
2792                         } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2793                                 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2794                         } else {
2795                                 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2796                         }
2797                 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2798                         num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2799                 } else {
2800                         num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2801
2802                         switch (desc->channel[first_non_void].type) {
2803                         case UTIL_FORMAT_TYPE_FLOAT:
2804                                 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2805                                 break;
2806                         case UTIL_FORMAT_TYPE_SIGNED:
2807                                 if (desc->channel[first_non_void].normalized)
2808                                         num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2809                                 else if (desc->channel[first_non_void].pure_integer)
2810                                         num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2811                                 else
2812                                         num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2813                                 break;
2814                         case UTIL_FORMAT_TYPE_UNSIGNED:
2815                                 if (desc->channel[first_non_void].normalized)
2816                                         num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2817                                 else if (desc->channel[first_non_void].pure_integer)
2818                                         num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2819                                 else
2820                                         num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2821                         }
2822                 }
2823         }
2824
2825         data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void);
2826         if (data_format == ~0) {
2827                 data_format = 0;
2828         }
2829
2830         if (!sampler &&
2831             (res->target == PIPE_TEXTURE_CUBE ||
2832              res->target == PIPE_TEXTURE_CUBE_ARRAY ||
2833              res->target == PIPE_TEXTURE_3D)) {
2834                 /* For the purpose of shader images, treat cube maps and 3D
2835                  * textures as 2D arrays. For 3D textures, the address
2836                  * calculations for mipmaps are different, so we rely on the
2837                  * caller to effectively disable mipmaps.
2838                  */
2839                 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
2840
2841                 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
2842         } else {
2843                 type = si_tex_dim(res->target, target, res->nr_samples);
2844         }
2845
2846         if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
2847                 height = 1;
2848                 depth = res->array_size;
2849         } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
2850                    type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2851                 if (sampler || res->target != PIPE_TEXTURE_3D)
2852                         depth = res->array_size;
2853         } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
2854                 depth = res->array_size / 6;
2855
2856         state[0] = 0;
2857         state[1] = (S_008F14_DATA_FORMAT(data_format) |
2858                     S_008F14_NUM_FORMAT(num_format));
2859         state[2] = (S_008F18_WIDTH(width - 1) |
2860                     S_008F18_HEIGHT(height - 1) |
2861                     S_008F18_PERF_MOD(4));
2862         state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2863                     S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2864                     S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2865                     S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2866                     S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
2867                                         0 : first_level) |
2868                     S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
2869                                         util_logbase2(res->nr_samples) :
2870                                         last_level) |
2871                     S_008F1C_POW2_PAD(res->last_level > 0) |
2872                     S_008F1C_TYPE(type));
2873         state[4] = S_008F20_DEPTH(depth - 1);
2874         state[5] = (S_008F24_BASE_ARRAY(first_layer) |
2875                     S_008F24_LAST_ARRAY(last_layer));
2876         state[6] = 0;
2877         state[7] = 0;
2878
2879         if (tex->dcc_offset) {
2880                 unsigned swap = r600_translate_colorswap(pipe_format, false);
2881
2882                 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
2883         } else {
2884                 /* The last dword is unused by hw. The shader uses it to clear
2885                  * bits in the first dword of sampler state.
2886                  */
2887                 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
2888                         if (first_level == last_level)
2889                                 state[7] = C_008F30_MAX_ANISO_RATIO;
2890                         else
2891                                 state[7] = 0xffffffff;
2892                 }
2893         }
2894
2895         /* Initialize the sampler view for FMASK. */
2896         if (tex->fmask.size) {
2897                 uint32_t fmask_format;
2898
2899                 va = tex->resource.gpu_address + tex->fmask.offset;
2900
2901                 switch (res->nr_samples) {
2902                 case 2:
2903                         fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2904                         break;
2905                 case 4:
2906                         fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2907                         break;
2908                 case 8:
2909                         fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2910                         break;
2911                 default:
2912                         assert(0);
2913                         fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2914                 }
2915
2916                 fmask_state[0] = va >> 8;
2917                 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2918                                  S_008F14_DATA_FORMAT(fmask_format) |
2919                                  S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2920                 fmask_state[2] = S_008F18_WIDTH(width - 1) |
2921                                  S_008F18_HEIGHT(height - 1);
2922                 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2923                                  S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2924                                  S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2925                                  S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2926                                  S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) |
2927                                  S_008F1C_TYPE(si_tex_dim(res->target, target, 0));
2928                 fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2929                                  S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1);
2930                 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
2931                                  S_008F24_LAST_ARRAY(last_layer);
2932                 fmask_state[6] = 0;
2933                 fmask_state[7] = 0;
2934         }
2935 }
2936
2937 /**
2938  * Create a sampler view.
2939  *
2940  * @param ctx           context
2941  * @param texture       texture
2942  * @param state         sampler view template
2943  * @param width0        width0 override (for compressed textures as int)
2944  * @param height0       height0 override (for compressed textures as int)
2945  * @param force_level   set the base address to the level (for compressed textures)
2946  */
2947 struct pipe_sampler_view *
2948 si_create_sampler_view_custom(struct pipe_context *ctx,
2949                               struct pipe_resource *texture,
2950                               const struct pipe_sampler_view *state,
2951                               unsigned width0, unsigned height0,
2952                               unsigned force_level)
2953 {
2954         struct si_context *sctx = (struct si_context*)ctx;
2955         struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
2956         struct r600_texture *tmp = (struct r600_texture*)texture;
2957         unsigned base_level, first_level, last_level;
2958         unsigned char state_swizzle[4];
2959         unsigned height, depth, width;
2960         unsigned last_layer = state->u.tex.last_layer;
2961         enum pipe_format pipe_format;
2962         const struct radeon_surf_level *surflevel;
2963
2964         if (!view)
2965                 return NULL;
2966
2967         /* initialize base object */
2968         view->base = *state;
2969         view->base.texture = NULL;
2970         view->base.reference.count = 1;
2971         view->base.context = ctx;
2972
2973         assert(texture);
2974         pipe_resource_reference(&view->base.texture, texture);
2975
2976         if (state->format == PIPE_FORMAT_X24S8_UINT ||
2977             state->format == PIPE_FORMAT_S8X24_UINT ||
2978             state->format == PIPE_FORMAT_X32_S8X24_UINT ||
2979             state->format == PIPE_FORMAT_S8_UINT)
2980                 view->is_stencil_sampler = true;
2981
2982         /* Buffer resource. */
2983         if (texture->target == PIPE_BUFFER) {
2984                 si_make_buffer_descriptor(sctx->screen,
2985                                           (struct r600_resource *)texture,
2986                                           state->format,
2987                                           state->u.buf.offset,
2988                                           state->u.buf.size,
2989                                           view->state);
2990                 return &view->base;
2991         }
2992
2993         state_swizzle[0] = state->swizzle_r;
2994         state_swizzle[1] = state->swizzle_g;
2995         state_swizzle[2] = state->swizzle_b;
2996         state_swizzle[3] = state->swizzle_a;
2997
2998         base_level = 0;
2999         first_level = state->u.tex.first_level;
3000         last_level = state->u.tex.last_level;
3001         width = width0;
3002         height = height0;
3003         depth = texture->depth0;
3004
3005         if (force_level) {
3006                 assert(force_level == first_level &&
3007                        force_level == last_level);
3008                 base_level = force_level;
3009                 first_level = 0;
3010                 last_level = 0;
3011                 width = u_minify(width, force_level);
3012                 height = u_minify(height, force_level);
3013                 depth = u_minify(depth, force_level);
3014         }
3015
3016         /* This is not needed if state trackers set last_layer correctly. */
3017         if (state->target == PIPE_TEXTURE_1D ||
3018             state->target == PIPE_TEXTURE_2D ||
3019             state->target == PIPE_TEXTURE_RECT ||
3020             state->target == PIPE_TEXTURE_CUBE)
3021                 last_layer = state->u.tex.first_layer;
3022
3023         /* Texturing with separate depth and stencil. */
3024         pipe_format = state->format;
3025
3026         /* Depth/stencil texturing sometimes needs separate texture. */
3027         if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
3028                 if (!tmp->flushed_depth_texture &&
3029                     !r600_init_flushed_depth_texture(ctx, texture, NULL)) {
3030                         pipe_resource_reference(&view->base.texture, NULL);
3031                         FREE(view);
3032                         return NULL;
3033                 }
3034
3035                 assert(tmp->flushed_depth_texture);
3036
3037                 /* Override format for the case where the flushed texture
3038                  * contains only Z or only S.
3039                  */
3040                 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
3041                         pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
3042
3043                 tmp = tmp->flushed_depth_texture;
3044         }
3045
3046         surflevel = tmp->surface.level;
3047
3048         if (tmp->db_compatible) {
3049                 if (!view->is_stencil_sampler)
3050                         pipe_format = tmp->db_render_format;
3051
3052                 switch (pipe_format) {
3053                 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
3054                         pipe_format = PIPE_FORMAT_Z32_FLOAT;
3055                         break;
3056                 case PIPE_FORMAT_X8Z24_UNORM:
3057                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
3058                         /* Z24 is always stored like this for DB
3059                          * compatibility.
3060                          */
3061                         pipe_format = PIPE_FORMAT_Z24X8_UNORM;
3062                         break;
3063                 case PIPE_FORMAT_X24S8_UINT:
3064                 case PIPE_FORMAT_S8X24_UINT:
3065                 case PIPE_FORMAT_X32_S8X24_UINT:
3066                         pipe_format = PIPE_FORMAT_S8_UINT;
3067                         surflevel = tmp->surface.stencil_level;
3068                         break;
3069                 default:;
3070                 }
3071         }
3072
3073         vi_dcc_disable_if_incompatible_format(&sctx->b, texture,
3074                                               state->u.tex.first_level,
3075                                               state->format);
3076
3077         si_make_texture_descriptor(sctx->screen, tmp, true,
3078                                    state->target, pipe_format, state_swizzle,
3079                                    first_level, last_level,
3080                                    state->u.tex.first_layer, last_layer,
3081                                    width, height, depth,
3082                                    view->state, view->fmask_state);
3083
3084         view->base_level_info = &surflevel[base_level];
3085         view->base_level = base_level;
3086         view->block_width = util_format_get_blockwidth(pipe_format);
3087         return &view->base;
3088 }
3089
3090 static struct pipe_sampler_view *
3091 si_create_sampler_view(struct pipe_context *ctx,
3092                        struct pipe_resource *texture,
3093                        const struct pipe_sampler_view *state)
3094 {
3095         return si_create_sampler_view_custom(ctx, texture, state,
3096                                              texture ? texture->width0 : 0,
3097                                              texture ? texture->height0 : 0, 0);
3098 }
3099
3100 static void si_sampler_view_destroy(struct pipe_context *ctx,
3101                                     struct pipe_sampler_view *state)
3102 {
3103         struct si_sampler_view *view = (struct si_sampler_view *)state;
3104
3105         pipe_resource_reference(&state->texture, NULL);
3106         FREE(view);
3107 }
3108
3109 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
3110 {
3111         return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
3112                wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
3113                (linear_filter &&
3114                 (wrap == PIPE_TEX_WRAP_CLAMP ||
3115                  wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
3116 }
3117
3118 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
3119 {
3120         bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
3121                              state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
3122
3123         return (state->border_color.ui[0] || state->border_color.ui[1] ||
3124                 state->border_color.ui[2] || state->border_color.ui[3]) &&
3125                (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
3126                 wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
3127                 wrap_mode_uses_border_color(state->wrap_r, linear_filter));
3128 }
3129
3130 static void *si_create_sampler_state(struct pipe_context *ctx,
3131                                      const struct pipe_sampler_state *state)
3132 {
3133         struct si_context *sctx = (struct si_context *)ctx;
3134         struct r600_common_screen *rscreen = sctx->b.screen;
3135         struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
3136         unsigned border_color_type, border_color_index = 0;
3137         unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
3138                                                        : state->max_anisotropy;
3139         unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
3140
3141         if (!rstate) {
3142                 return NULL;
3143         }
3144
3145         if (!sampler_state_needs_border_color(state))
3146                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3147         else if (state->border_color.f[0] == 0 &&
3148                  state->border_color.f[1] == 0 &&
3149                  state->border_color.f[2] == 0 &&
3150                  state->border_color.f[3] == 0)
3151                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3152         else if (state->border_color.f[0] == 0 &&
3153                  state->border_color.f[1] == 0 &&
3154                  state->border_color.f[2] == 0 &&
3155                  state->border_color.f[3] == 1)
3156                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3157         else if (state->border_color.f[0] == 1 &&
3158                  state->border_color.f[1] == 1 &&
3159                  state->border_color.f[2] == 1 &&
3160                  state->border_color.f[3] == 1)
3161                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3162         else {
3163                 int i;
3164
3165                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
3166
3167                 /* Check if the border has been uploaded already. */
3168                 for (i = 0; i < sctx->border_color_count; i++)
3169                         if (memcmp(&sctx->border_color_table[i], &state->border_color,
3170                                    sizeof(state->border_color)) == 0)
3171                                 break;
3172
3173                 if (i >= SI_MAX_BORDER_COLORS) {
3174                         /* Getting 4096 unique border colors is very unlikely. */
3175                         fprintf(stderr, "radeonsi: The border color table is full. "
3176                                 "Any new border colors will be just black. "
3177                                 "Please file a bug.\n");
3178                         border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3179                 } else {
3180                         if (i == sctx->border_color_count) {
3181                                 /* Upload a new border color. */
3182                                 memcpy(&sctx->border_color_table[i], &state->border_color,
3183                                        sizeof(state->border_color));
3184                                 util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
3185                                                         &state->border_color,
3186                                                         sizeof(state->border_color));
3187                                 sctx->border_color_count++;
3188                         }
3189
3190                         border_color_index = i;
3191                 }
3192         }
3193
3194         rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
3195                           S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
3196                           S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
3197                           S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3198                           S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
3199                           S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
3200                           S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3201                           S_008F30_ANISO_BIAS(max_aniso_ratio) |
3202                           S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
3203                           S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
3204         rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
3205                           S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
3206                           S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3207         rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
3208                           S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
3209                           S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
3210                           S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
3211                           S_008F38_MIP_POINT_PRECLAMP(1) |
3212                           S_008F38_DISABLE_LSB_CEIL(1) |
3213                           S_008F38_FILTER_PREC_FIX(1) |
3214                           S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
3215         rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
3216                          S_008F3C_BORDER_COLOR_TYPE(border_color_type);
3217         return rstate;
3218 }
3219
3220 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
3221 {
3222         struct si_context *sctx = (struct si_context *)ctx;
3223
3224         if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
3225                 return;
3226
3227         sctx->sample_mask.sample_mask = sample_mask;
3228         si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
3229 }
3230
3231 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
3232 {
3233         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
3234         unsigned mask = sctx->sample_mask.sample_mask;
3235
3236         /* Needed for line and polygon smoothing as well as for the Polaris
3237          * small primitive filter. We expect the state tracker to take care of
3238          * this for us.
3239          */
3240         assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
3241                (mask & 1 && sctx->blitter->running));
3242
3243         radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
3244         radeon_emit(cs, mask | (mask << 16));
3245         radeon_emit(cs, mask | (mask << 16));
3246 }
3247
3248 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
3249 {
3250         free(state);
3251 }
3252
3253 /*
3254  * Vertex elements & buffers
3255  */
3256
3257 static void *si_create_vertex_elements(struct pipe_context *ctx,
3258                                        unsigned count,
3259                                        const struct pipe_vertex_element *elements)
3260 {
3261         struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3262         int i;
3263
3264         assert(count <= SI_MAX_ATTRIBS);
3265         if (!v)
3266                 return NULL;
3267
3268         v->count = count;
3269         for (i = 0; i < count; ++i) {
3270                 const struct util_format_description *desc;
3271                 unsigned data_format, num_format;
3272                 int first_non_void;
3273
3274                 desc = util_format_description(elements[i].src_format);
3275                 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3276                 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
3277                 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
3278
3279                 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3280                                    S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3281                                    S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3282                                    S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3283                                    S_008F0C_NUM_FORMAT(num_format) |
3284                                    S_008F0C_DATA_FORMAT(data_format);
3285                 v->format_size[i] = desc->block.bits / 8;
3286         }
3287         memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3288
3289         return v;
3290 }
3291
3292 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3293 {
3294         struct si_context *sctx = (struct si_context *)ctx;
3295         struct si_vertex_element *v = (struct si_vertex_element*)state;
3296
3297         sctx->vertex_elements = v;
3298         sctx->vertex_buffers_dirty = true;
3299         sctx->do_update_shaders = true;
3300 }
3301
3302 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3303 {
3304         struct si_context *sctx = (struct si_context *)ctx;
3305
3306         if (sctx->vertex_elements == state)
3307                 sctx->vertex_elements = NULL;
3308         FREE(state);
3309 }
3310
3311 static void si_set_vertex_buffers(struct pipe_context *ctx,
3312                                   unsigned start_slot, unsigned count,
3313                                   const struct pipe_vertex_buffer *buffers)
3314 {
3315         struct si_context *sctx = (struct si_context *)ctx;
3316         struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
3317         int i;
3318
3319         assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
3320
3321         if (buffers) {
3322                 for (i = 0; i < count; i++) {
3323                         const struct pipe_vertex_buffer *src = buffers + i;
3324                         struct pipe_vertex_buffer *dsti = dst + i;
3325                         struct pipe_resource *buf = src->buffer;
3326
3327                         pipe_resource_reference(&dsti->buffer, buf);
3328                         dsti->buffer_offset = src->buffer_offset;
3329                         dsti->stride = src->stride;
3330                         r600_context_add_resource_size(ctx, buf);
3331                         if (buf)
3332                                 r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
3333                 }
3334         } else {
3335                 for (i = 0; i < count; i++) {
3336                         pipe_resource_reference(&dst[i].buffer, NULL);
3337                 }
3338         }
3339         sctx->vertex_buffers_dirty = true;
3340 }
3341
3342 static void si_set_index_buffer(struct pipe_context *ctx,
3343                                 const struct pipe_index_buffer *ib)
3344 {
3345         struct si_context *sctx = (struct si_context *)ctx;
3346
3347         if (ib) {
3348                 struct pipe_resource *buf = ib->buffer;
3349
3350                 pipe_resource_reference(&sctx->index_buffer.buffer, buf);
3351                 memcpy(&sctx->index_buffer, ib, sizeof(*ib));
3352                 r600_context_add_resource_size(ctx, buf);
3353                 if (buf)
3354                         r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER;
3355         } else {
3356                 pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
3357         }
3358 }
3359
3360 /*
3361  * Misc
3362  */
3363
3364 static void si_set_tess_state(struct pipe_context *ctx,
3365                               const float default_outer_level[4],
3366                               const float default_inner_level[2])
3367 {
3368         struct si_context *sctx = (struct si_context *)ctx;
3369         struct pipe_constant_buffer cb;
3370         float array[8];
3371
3372         memcpy(array, default_outer_level, sizeof(float) * 4);
3373         memcpy(array+4, default_inner_level, sizeof(float) * 2);
3374
3375         cb.buffer = NULL;
3376         cb.user_buffer = NULL;
3377         cb.buffer_size = sizeof(array);
3378
3379         si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
3380                                (void*)array, sizeof(array),
3381                                &cb.buffer_offset);
3382
3383         si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
3384         pipe_resource_reference(&cb.buffer, NULL);
3385 }
3386
3387 static void si_texture_barrier(struct pipe_context *ctx)
3388 {
3389         struct si_context *sctx = (struct si_context *)ctx;
3390
3391         sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
3392                          SI_CONTEXT_INV_GLOBAL_L2 |
3393                          SI_CONTEXT_FLUSH_AND_INV_CB |
3394                          SI_CONTEXT_CS_PARTIAL_FLUSH;
3395 }
3396
3397 /* This only ensures coherency for shader image/buffer stores. */
3398 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
3399 {
3400         struct si_context *sctx = (struct si_context *)ctx;
3401
3402         /* Subsequent commands must wait for all shader invocations to
3403          * complete. */
3404         sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
3405                          SI_CONTEXT_CS_PARTIAL_FLUSH;
3406
3407         if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
3408                 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
3409                                  SI_CONTEXT_INV_VMEM_L1;
3410
3411         if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
3412                      PIPE_BARRIER_SHADER_BUFFER |
3413                      PIPE_BARRIER_TEXTURE |
3414                      PIPE_BARRIER_IMAGE |
3415                      PIPE_BARRIER_STREAMOUT_BUFFER |
3416                      PIPE_BARRIER_GLOBAL_BUFFER)) {
3417                 /* As far as I can tell, L1 contents are written back to L2
3418                  * automatically at end of shader, but the contents of other
3419                  * L1 caches might still be stale. */
3420                 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
3421         }
3422
3423         if (flags & PIPE_BARRIER_INDEX_BUFFER) {
3424                 /* Indices are read through TC L2 since VI.
3425                  * L1 isn't used.
3426                  */
3427                 if (sctx->screen->b.chip_class <= CIK)
3428                         sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3429         }
3430
3431         if (flags & PIPE_BARRIER_FRAMEBUFFER)
3432                 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
3433
3434         if (flags & (PIPE_BARRIER_FRAMEBUFFER |
3435                      PIPE_BARRIER_INDIRECT_BUFFER))
3436                 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
3437 }
3438
3439 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
3440 {
3441         struct pipe_blend_state blend;
3442
3443         memset(&blend, 0, sizeof(blend));
3444         blend.independent_blend_enable = true;
3445         blend.rt[0].colormask = 0xf;
3446         return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3447 }
3448
3449 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3450                                  bool include_draw_vbo)
3451 {
3452         si_need_cs_space((struct si_context*)ctx);
3453 }
3454
3455 static void si_init_config(struct si_context *sctx);
3456
3457 void si_init_state_functions(struct si_context *sctx)
3458 {
3459         si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
3460         si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
3461         si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
3462         si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
3463         si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
3464
3465         si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
3466         si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
3467         si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
3468         si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
3469         si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
3470         si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
3471         si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
3472         si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
3473         si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
3474         si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
3475
3476         sctx->b.b.create_blend_state = si_create_blend_state;
3477         sctx->b.b.bind_blend_state = si_bind_blend_state;
3478         sctx->b.b.delete_blend_state = si_delete_blend_state;
3479         sctx->b.b.set_blend_color = si_set_blend_color;
3480
3481         sctx->b.b.create_rasterizer_state = si_create_rs_state;
3482         sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3483         sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3484
3485         sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3486         sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3487         sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3488
3489         sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
3490         sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3491         sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3492         sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
3493         sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
3494
3495         sctx->b.b.set_clip_state = si_set_clip_state;
3496         sctx->b.b.set_stencil_ref = si_set_stencil_ref;
3497
3498         sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3499         sctx->b.b.get_sample_position = cayman_get_sample_position;
3500
3501         sctx->b.b.create_sampler_state = si_create_sampler_state;
3502         sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3503
3504         sctx->b.b.create_sampler_view = si_create_sampler_view;
3505         sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3506
3507         sctx->b.b.set_sample_mask = si_set_sample_mask;
3508
3509         sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3510         sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3511         sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3512         sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3513         sctx->b.b.set_index_buffer = si_set_index_buffer;
3514
3515         sctx->b.b.texture_barrier = si_texture_barrier;
3516         sctx->b.b.memory_barrier = si_memory_barrier;
3517         sctx->b.b.set_min_samples = si_set_min_samples;
3518         sctx->b.b.set_tess_state = si_set_tess_state;
3519
3520         sctx->b.b.set_active_query_state = si_set_active_query_state;
3521         sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3522         sctx->b.save_qbo_state = si_save_qbo_state;
3523         sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3524
3525         sctx->b.b.draw_vbo = si_draw_vbo;
3526
3527         si_init_config(sctx);
3528 }
3529
3530 static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen)
3531 {
3532         return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
3533 }
3534
3535 static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
3536                                      struct r600_texture *rtex,
3537                                      struct radeon_bo_metadata *md)
3538 {
3539         struct si_screen *sscreen = (struct si_screen*)rscreen;
3540         struct pipe_resource *res = &rtex->resource.b.b;
3541         static const unsigned char swizzle[] = {
3542                 PIPE_SWIZZLE_X,
3543                 PIPE_SWIZZLE_Y,
3544                 PIPE_SWIZZLE_Z,
3545                 PIPE_SWIZZLE_W
3546         };
3547         uint32_t desc[8], i;
3548         bool is_array = util_resource_is_array_texture(res);
3549
3550         /* DRM 2.x.x doesn't support this. */
3551         if (rscreen->info.drm_major != 3)
3552                 return;
3553
3554         assert(rtex->dcc_separate_buffer == NULL);
3555         assert(rtex->fmask.size == 0);
3556
3557         /* Metadata image format format version 1:
3558          * [0] = 1 (metadata format identifier)
3559          * [1] = (VENDOR_ID << 16) | PCI_ID
3560          * [2:9] = image descriptor for the whole resource
3561          *         [2] is always 0, because the base address is cleared
3562          *         [9] is the DCC offset bits [39:8] from the beginning of
3563          *             the buffer
3564          * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
3565          */
3566
3567         md->metadata[0] = 1; /* metadata image format version 1 */
3568
3569         /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
3570         md->metadata[1] = si_get_bo_metadata_word1(rscreen);
3571
3572         si_make_texture_descriptor(sscreen, rtex, true,
3573                                    res->target, res->format,
3574                                    swizzle, 0, res->last_level, 0,
3575                                    is_array ? res->array_size - 1 : 0,
3576                                    res->width0, res->height0, res->depth0,
3577                                    desc, NULL);
3578
3579         si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0,
3580                                        rtex->surface.blk_w, false, desc);
3581
3582         /* Clear the base address and set the relative DCC offset. */
3583         desc[0] = 0;
3584         desc[1] &= C_008F14_BASE_ADDRESS_HI;
3585         desc[7] = rtex->dcc_offset >> 8;
3586
3587         /* Dwords [2:9] contain the image descriptor. */
3588         memcpy(&md->metadata[2], desc, sizeof(desc));
3589
3590         /* Dwords [10:..] contain the mipmap level offsets. */
3591         for (i = 0; i <= res->last_level; i++)
3592                 md->metadata[10+i] = rtex->surface.level[i].offset >> 8;
3593
3594         md->size_metadata = (11 + res->last_level) * 4;
3595 }
3596
3597 static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
3598                                      struct r600_texture *rtex,
3599                                      struct radeon_bo_metadata *md)
3600 {
3601         uint32_t *desc = &md->metadata[2];
3602
3603         if (rscreen->chip_class < VI)
3604                 return;
3605
3606         /* Return if DCC is enabled. The texture should be set up with it
3607          * already.
3608          */
3609         if (md->size_metadata >= 11 * 4 &&
3610             md->metadata[0] != 0 &&
3611             md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
3612             G_008F28_COMPRESSION_EN(desc[6])) {
3613                 assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8));
3614                 return;
3615         }
3616
3617         /* Disable DCC. These are always set by texture_from_handle and must
3618          * be cleared here.
3619          */
3620         rtex->dcc_offset = 0;
3621 }
3622
3623 void si_init_screen_state_functions(struct si_screen *sscreen)
3624 {
3625         sscreen->b.b.is_format_supported = si_is_format_supported;
3626         sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
3627         sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
3628 }
3629
3630 static void
3631 si_write_harvested_raster_configs(struct si_context *sctx,
3632                                   struct si_pm4_state *pm4,
3633                                   unsigned raster_config,
3634                                   unsigned raster_config_1)
3635 {
3636         unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
3637         unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
3638         unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3639         unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3640         unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
3641         unsigned rb_per_se = num_rb / num_se;
3642         unsigned se_mask[4];
3643         unsigned se;
3644
3645         se_mask[0] = ((1 << rb_per_se) - 1);
3646         se_mask[1] = (se_mask[0] << rb_per_se);
3647         se_mask[2] = (se_mask[1] << rb_per_se);
3648         se_mask[3] = (se_mask[2] << rb_per_se);
3649
3650         se_mask[0] &= rb_mask;
3651         se_mask[1] &= rb_mask;
3652         se_mask[2] &= rb_mask;
3653         se_mask[3] &= rb_mask;
3654
3655         assert(num_se == 1 || num_se == 2 || num_se == 4);
3656         assert(sh_per_se == 1 || sh_per_se == 2);
3657         assert(rb_per_pkr == 1 || rb_per_pkr == 2);
3658
3659         /* XXX: I can't figure out what the *_XSEL and *_YSEL
3660          * fields are for, so I'm leaving them as their default
3661          * values. */
3662
3663         for (se = 0; se < num_se; se++) {
3664                 unsigned raster_config_se = raster_config;
3665                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3666                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3667                 int idx = (se / 2) * 2;
3668
3669                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3670                         raster_config_se &= C_028350_SE_MAP;
3671
3672                         if (!se_mask[idx]) {
3673                                 raster_config_se |=
3674                                         S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
3675                         } else {
3676                                 raster_config_se |=
3677                                         S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
3678                         }
3679                 }
3680
3681                 pkr0_mask &= rb_mask;
3682                 pkr1_mask &= rb_mask;
3683                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684                         raster_config_se &= C_028350_PKR_MAP;
3685
3686                         if (!pkr0_mask) {
3687                                 raster_config_se |=
3688                                         S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
3689                         } else {
3690                                 raster_config_se |=
3691                                         S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
3692                         }
3693                 }
3694
3695                 if (rb_per_se >= 2) {
3696                         unsigned rb0_mask = 1 << (se * rb_per_se);
3697                         unsigned rb1_mask = rb0_mask << 1;
3698
3699                         rb0_mask &= rb_mask;
3700                         rb1_mask &= rb_mask;
3701                         if (!rb0_mask || !rb1_mask) {
3702                                 raster_config_se &= C_028350_RB_MAP_PKR0;
3703
3704                                 if (!rb0_mask) {
3705                                         raster_config_se |=
3706                                                 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
3707                                 } else {
3708                                         raster_config_se |=
3709                                                 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
3710                                 }
3711                         }
3712
3713                         if (rb_per_se > 2) {
3714                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3715                                 rb1_mask = rb0_mask << 1;
3716                                 rb0_mask &= rb_mask;
3717                                 rb1_mask &= rb_mask;
3718                                 if (!rb0_mask || !rb1_mask) {
3719                                         raster_config_se &= C_028350_RB_MAP_PKR1;
3720
3721                                         if (!rb0_mask) {
3722                                                 raster_config_se |=
3723                                                         S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3724                                         } else {
3725                                                 raster_config_se |=
3726                                                         S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3727                                         }
3728                                 }
3729                         }
3730                 }
3731
3732                 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3733                 if (sctx->b.chip_class < CIK)
3734                         si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3735                                        SE_INDEX(se) | SH_BROADCAST_WRITES |
3736                                        INSTANCE_BROADCAST_WRITES);
3737                 else
3738                         si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3739                                        S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
3740                                        S_030800_INSTANCE_BROADCAST_WRITES(1));
3741                 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3742         }
3743
3744         /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3745         if (sctx->b.chip_class < CIK)
3746                 si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3747                                SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3748                                INSTANCE_BROADCAST_WRITES);
3749         else {
3750                 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3751                                S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
3752                                S_030800_INSTANCE_BROADCAST_WRITES(1));
3753
3754                 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3755                                      (!se_mask[2] && !se_mask[3]))) {
3756                         raster_config_1 &= C_028354_SE_PAIR_MAP;
3757
3758                         if (!se_mask[0] && !se_mask[1]) {
3759                                 raster_config_1 |=
3760                                         S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
3761                         } else {
3762                                 raster_config_1 |=
3763                                         S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
3764                         }
3765                 }
3766
3767                 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
3768         }
3769 }
3770
3771 static void si_init_config(struct si_context *sctx)
3772 {
3773         struct si_screen *sscreen = sctx->screen;
3774         unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3775         unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3776         unsigned raster_config, raster_config_1;
3777         uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
3778         struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3779
3780         if (!pm4)
3781                 return;
3782
3783         si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
3784         si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
3785         si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
3786         si_pm4_cmd_end(pm4, false);
3787
3788         si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
3789         si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
3790
3791         /* FIXME calculate these values somehow ??? */
3792         si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
3793         si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3794         si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3795
3796         si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3797         si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3798
3799         si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3800         si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3801         if (sctx->b.chip_class < CIK)
3802                 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3803                                S_008A14_CLIP_VTX_REORDER_ENA(1));
3804
3805         si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3806         si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3807
3808         si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3809
3810         switch (sctx->screen->b.family) {
3811         case CHIP_TAHITI:
3812         case CHIP_PITCAIRN:
3813                 raster_config = 0x2a00126a;
3814                 raster_config_1 = 0x00000000;
3815                 break;
3816         case CHIP_VERDE:
3817                 raster_config = 0x0000124a;
3818                 raster_config_1 = 0x00000000;
3819                 break;
3820         case CHIP_OLAND:
3821                 raster_config = 0x00000082;
3822                 raster_config_1 = 0x00000000;
3823                 break;
3824         case CHIP_HAINAN:
3825                 raster_config = 0x00000000;
3826                 raster_config_1 = 0x00000000;
3827                 break;
3828         case CHIP_BONAIRE:
3829                 raster_config = 0x16000012;
3830                 raster_config_1 = 0x00000000;
3831                 break;
3832         case CHIP_HAWAII:
3833                 raster_config = 0x3a00161a;
3834                 raster_config_1 = 0x0000002e;
3835                 break;
3836         case CHIP_FIJI:
3837                 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
3838                         /* old kernels with old tiling config */
3839                         raster_config = 0x16000012;
3840                         raster_config_1 = 0x0000002a;
3841                 } else {
3842                         raster_config = 0x3a00161a;
3843                         raster_config_1 = 0x0000002e;
3844                 }
3845                 break;
3846         case CHIP_POLARIS10:
3847                 raster_config = 0x16000012;
3848                 raster_config_1 = 0x0000002a;
3849                 break;
3850         case CHIP_POLARIS11:
3851                 raster_config = 0x16000012;
3852                 raster_config_1 = 0x00000000;
3853                 break;
3854         case CHIP_TONGA:
3855                 raster_config = 0x16000012;
3856                 raster_config_1 = 0x0000002a;
3857                 break;
3858         case CHIP_ICELAND:
3859                 if (num_rb == 1)
3860                         raster_config = 0x00000000;
3861                 else
3862                         raster_config = 0x00000002;
3863                 raster_config_1 = 0x00000000;
3864                 break;
3865         case CHIP_CARRIZO:
3866                 raster_config = 0x00000002;
3867                 raster_config_1 = 0x00000000;
3868                 break;
3869         case CHIP_KAVERI:
3870                 /* KV should be 0x00000002, but that causes problems with radeon */
3871                 raster_config = 0x00000000; /* 0x00000002 */
3872                 raster_config_1 = 0x00000000;
3873                 break;
3874         case CHIP_KABINI:
3875         case CHIP_MULLINS:
3876         case CHIP_STONEY:
3877                 raster_config = 0x00000000;
3878                 raster_config_1 = 0x00000000;
3879                 break;
3880         default:
3881                 fprintf(stderr,
3882                         "radeonsi: Unknown GPU, using 0 for raster_config\n");
3883                 raster_config = 0x00000000;
3884                 raster_config_1 = 0x00000000;
3885                 break;
3886         }
3887
3888         /* Always use the default config when all backends are enabled
3889          * (or when we failed to determine the enabled backends).
3890          */
3891         if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
3892                 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
3893                                raster_config);
3894                 if (sctx->b.chip_class >= CIK)
3895                         si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
3896                                        raster_config_1);
3897         } else {
3898                 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
3899         }
3900
3901         si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
3902         si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
3903         si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
3904                        S_028244_BR_X(16384) | S_028244_BR_Y(16384));
3905         si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
3906         si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
3907                        S_028034_BR_X(16384) | S_028034_BR_Y(16384));
3908
3909         si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3910         si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
3911                        S_028230_ER_TRI(0xA) |
3912                        S_028230_ER_POINT(0xA) |
3913                        S_028230_ER_RECT(0xA) |
3914                        /* Required by DX10_DIAMOND_TEST_ENA: */
3915                        S_028230_ER_LINE_LR(0x1A) |
3916                        S_028230_ER_LINE_RL(0x26) |
3917                        S_028230_ER_LINE_TB(0xA) |
3918                        S_028230_ER_LINE_BT(0xA));
3919         /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
3920         si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
3921         si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
3922         si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
3923         si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
3924         si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
3925         si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
3926
3927         si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
3928         si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
3929         si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
3930
3931         if (sctx->b.chip_class >= CIK) {
3932                 /* If this is 0, Bonaire can hang even if GS isn't being used.
3933                  * Other chips are unaffected. These are suboptimal values,
3934                  * but we don't use on-chip GS.
3935                  */
3936                 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
3937                                S_028A44_ES_VERTS_PER_SUBGRP(64) |
3938                                S_028A44_GS_PRIMS_PER_SUBGRP(4));
3939
3940                 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
3941                 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
3942                 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
3943                 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
3944
3945                 if (sscreen->b.info.num_good_compute_units /
3946                     (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
3947                         /* Too few available compute units per SH. Disallowing
3948                          * VS to run on CU0 could hurt us more than late VS
3949                          * allocation would help.
3950                          *
3951                          * LATE_ALLOC_VS = 2 is the highest safe number.
3952                          */
3953                         si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
3954                         si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
3955                 } else {
3956                         /* Set LATE_ALLOC_VS == 31. It should be less than
3957                          * the number of scratch waves. Limitations:
3958                          * - VS can't execute on CU0.
3959                          * - If HS writes outputs to LDS, LS can't execute on CU0.
3960                          */
3961                         si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
3962                         si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
3963                 }
3964
3965                 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
3966         }
3967
3968         if (sctx->b.chip_class >= VI) {
3969                 unsigned vgt_tess_distribution;
3970
3971                 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
3972                                S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
3973                                S_028424_OVERWRITE_COMBINER_WATERMARK(4));
3974                 if (sctx->b.family < CHIP_POLARIS10)
3975                         si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
3976                 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
3977
3978                 vgt_tess_distribution =
3979                         S_028B50_ACCUM_ISOLINE(32) |
3980                         S_028B50_ACCUM_TRI(11) |
3981                         S_028B50_ACCUM_QUAD(11) |
3982                         S_028B50_DONUT_SPLIT(16);
3983
3984                 /* Testing with Unigine Heaven extreme tesselation yielded best results
3985                  * with TRAP_SPLIT = 3.
3986                  */
3987                 if (sctx->b.family == CHIP_FIJI ||
3988                     sctx->b.family >= CHIP_POLARIS10)
3989                         vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
3990
3991                 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
3992         } else {
3993                 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
3994                 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
3995         }
3996
3997         if (sctx->b.family == CHIP_STONEY)
3998                 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
3999
4000         si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
4001         if (sctx->b.chip_class >= CIK)
4002                 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
4003         si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
4004                       RADEON_PRIO_BORDER_COLORS);
4005
4006         si_pm4_upload_indirect_buffer(sctx, pm4);
4007         sctx->init_config = pm4;
4008 }