OSDN Git Service

radeonsi: flush TC L2 cache for indirect draw data
[android-x86/external-mesa.git] / src / gallium / drivers / radeonsi / si_state.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Christian König <christian.koenig@amd.com>
25  */
26
27 #include "si_pipe.h"
28 #include "si_shader.h"
29 #include "sid.h"
30 #include "radeon/r600_cs.h"
31
32 #include "util/u_dual_blend.h"
33 #include "util/u_format.h"
34 #include "util/u_format_s3tc.h"
35 #include "util/u_memory.h"
36 #include "util/u_pstipple.h"
37 #include "util/u_resource.h"
38
39 /* Initialize an external atom (owned by ../radeon). */
40 static void
41 si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
42                       struct r600_atom **list_elem)
43 {
44         atom->id = list_elem - sctx->atoms.array + 1;
45         *list_elem = atom;
46 }
47
48 /* Initialize an atom owned by radeonsi.  */
49 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
50                   struct r600_atom **list_elem,
51                   void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
52 {
53         atom->emit = (void*)emit_func;
54         atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
55         *list_elem = atom;
56 }
57
58 static unsigned si_map_swizzle(unsigned swizzle)
59 {
60         switch (swizzle) {
61         case PIPE_SWIZZLE_Y:
62                 return V_008F0C_SQ_SEL_Y;
63         case PIPE_SWIZZLE_Z:
64                 return V_008F0C_SQ_SEL_Z;
65         case PIPE_SWIZZLE_W:
66                 return V_008F0C_SQ_SEL_W;
67         case PIPE_SWIZZLE_0:
68                 return V_008F0C_SQ_SEL_0;
69         case PIPE_SWIZZLE_1:
70                 return V_008F0C_SQ_SEL_1;
71         default: /* PIPE_SWIZZLE_X */
72                 return V_008F0C_SQ_SEL_X;
73         }
74 }
75
76 static uint32_t S_FIXED(float value, uint32_t frac_bits)
77 {
78         return value * (1 << frac_bits);
79 }
80
81 /* 12.4 fixed-point */
82 static unsigned si_pack_float_12p4(float x)
83 {
84         return x <= 0    ? 0 :
85                x >= 4096 ? 0xffff : x * 16;
86 }
87
88 /*
89  * Inferred framebuffer and blender state.
90  *
91  * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state
92  * is that:
93  * - The blend state mask is 0xf most of the time.
94  * - The COLOR1 format isn't INVALID because of possible dual-source blending,
95  *   so COLOR1 is enabled pretty much all the time.
96  * So CB_TARGET_MASK is the only register that can disable COLOR1.
97  *
98  * Another reason is to avoid a hang with dual source blending.
99  */
100 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
101 {
102         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
103         struct si_state_blend *blend = sctx->queued.named.blend;
104         uint32_t cb_target_mask = 0, i;
105
106         for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
107                 if (sctx->framebuffer.state.cbufs[i])
108                         cb_target_mask |= 0xf << (4*i);
109
110         if (blend)
111                 cb_target_mask &= blend->cb_target_mask;
112
113         /* Avoid a hang that happens when dual source blending is enabled
114          * but there is not enough color outputs. This is undefined behavior,
115          * so disable color writes completely.
116          *
117          * Reproducible with Unigine Heaven 4.0 and drirc missing.
118          */
119         if (blend && blend->dual_src_blend &&
120             sctx->ps_shader.cso &&
121             (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
122                 cb_target_mask = 0;
123
124         radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
125
126         /* STONEY-specific register settings. */
127         if (sctx->b.family == CHIP_STONEY) {
128                 unsigned spi_shader_col_format =
129                         sctx->ps_shader.cso ?
130                         sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0;
131                 unsigned sx_ps_downconvert = 0;
132                 unsigned sx_blend_opt_epsilon = 0;
133                 unsigned sx_blend_opt_control = 0;
134
135                 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
136                         struct r600_surface *surf =
137                                 (struct r600_surface*)sctx->framebuffer.state.cbufs[i];
138                         unsigned format, swap, spi_format, colormask;
139                         bool has_alpha, has_rgb;
140
141                         if (!surf)
142                                 continue;
143
144                         format = G_028C70_FORMAT(surf->cb_color_info);
145                         swap = G_028C70_COMP_SWAP(surf->cb_color_info);
146                         spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
147                         colormask = (cb_target_mask >> (i * 4)) & 0xf;
148
149                         /* Set if RGB and A are present. */
150                         has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
151
152                         if (format == V_028C70_COLOR_8 ||
153                             format == V_028C70_COLOR_16 ||
154                             format == V_028C70_COLOR_32)
155                                 has_rgb = !has_alpha;
156                         else
157                                 has_rgb = true;
158
159                         /* Check the colormask and export format. */
160                         if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
161                                 has_rgb = false;
162                         if (!(colormask & PIPE_MASK_A))
163                                 has_alpha = false;
164
165                         if (spi_format == V_028714_SPI_SHADER_ZERO) {
166                                 has_rgb = false;
167                                 has_alpha = false;
168                         }
169
170                         /* Disable value checking for disabled channels. */
171                         if (!has_rgb)
172                                 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
173                         if (!has_alpha)
174                                 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
175
176                         /* Enable down-conversion for 32bpp and smaller formats. */
177                         switch (format) {
178                         case V_028C70_COLOR_8:
179                         case V_028C70_COLOR_8_8:
180                         case V_028C70_COLOR_8_8_8_8:
181                                 /* For 1 and 2-channel formats, use the superset thereof. */
182                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
183                                     spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
184                                     spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
185                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
186                                         sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
187                                 }
188                                 break;
189
190                         case V_028C70_COLOR_5_6_5:
191                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
192                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
193                                         sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
194                                 }
195                                 break;
196
197                         case V_028C70_COLOR_1_5_5_5:
198                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
199                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
200                                         sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
201                                 }
202                                 break;
203
204                         case V_028C70_COLOR_4_4_4_4:
205                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
206                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
207                                         sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
208                                 }
209                                 break;
210
211                         case V_028C70_COLOR_32:
212                                 if (swap == V_0280A0_SWAP_STD &&
213                                     spi_format == V_028714_SPI_SHADER_32_R)
214                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
215                                 else if (swap == V_0280A0_SWAP_ALT_REV &&
216                                          spi_format == V_028714_SPI_SHADER_32_AR)
217                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
218                                 break;
219
220                         case V_028C70_COLOR_16:
221                         case V_028C70_COLOR_16_16:
222                                 /* For 1-channel formats, use the superset thereof. */
223                                 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
224                                     spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
225                                     spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
226                                     spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
227                                         if (swap == V_0280A0_SWAP_STD ||
228                                             swap == V_0280A0_SWAP_STD_REV)
229                                                 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
230                                         else
231                                                 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
232                                 }
233                                 break;
234
235                         case V_028C70_COLOR_10_11_11:
236                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
237                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
238                                         sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
239                                 }
240                                 break;
241
242                         case V_028C70_COLOR_2_10_10_10:
243                                 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
244                                         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
245                                         sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
246                                 }
247                                 break;
248                         }
249                 }
250
251                 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
252                         sx_ps_downconvert = 0;
253                         sx_blend_opt_epsilon = 0;
254                         sx_blend_opt_control = 0;
255                 }
256
257                 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
258                 radeon_emit(cs, sx_ps_downconvert);     /* R_028754_SX_PS_DOWNCONVERT */
259                 radeon_emit(cs, sx_blend_opt_epsilon);  /* R_028758_SX_BLEND_OPT_EPSILON */
260                 radeon_emit(cs, sx_blend_opt_control);  /* R_02875C_SX_BLEND_OPT_CONTROL */
261         }
262 }
263
264 /*
265  * Blender functions
266  */
267
268 static uint32_t si_translate_blend_function(int blend_func)
269 {
270         switch (blend_func) {
271         case PIPE_BLEND_ADD:
272                 return V_028780_COMB_DST_PLUS_SRC;
273         case PIPE_BLEND_SUBTRACT:
274                 return V_028780_COMB_SRC_MINUS_DST;
275         case PIPE_BLEND_REVERSE_SUBTRACT:
276                 return V_028780_COMB_DST_MINUS_SRC;
277         case PIPE_BLEND_MIN:
278                 return V_028780_COMB_MIN_DST_SRC;
279         case PIPE_BLEND_MAX:
280                 return V_028780_COMB_MAX_DST_SRC;
281         default:
282                 R600_ERR("Unknown blend function %d\n", blend_func);
283                 assert(0);
284                 break;
285         }
286         return 0;
287 }
288
289 static uint32_t si_translate_blend_factor(int blend_fact)
290 {
291         switch (blend_fact) {
292         case PIPE_BLENDFACTOR_ONE:
293                 return V_028780_BLEND_ONE;
294         case PIPE_BLENDFACTOR_SRC_COLOR:
295                 return V_028780_BLEND_SRC_COLOR;
296         case PIPE_BLENDFACTOR_SRC_ALPHA:
297                 return V_028780_BLEND_SRC_ALPHA;
298         case PIPE_BLENDFACTOR_DST_ALPHA:
299                 return V_028780_BLEND_DST_ALPHA;
300         case PIPE_BLENDFACTOR_DST_COLOR:
301                 return V_028780_BLEND_DST_COLOR;
302         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
303                 return V_028780_BLEND_SRC_ALPHA_SATURATE;
304         case PIPE_BLENDFACTOR_CONST_COLOR:
305                 return V_028780_BLEND_CONSTANT_COLOR;
306         case PIPE_BLENDFACTOR_CONST_ALPHA:
307                 return V_028780_BLEND_CONSTANT_ALPHA;
308         case PIPE_BLENDFACTOR_ZERO:
309                 return V_028780_BLEND_ZERO;
310         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
311                 return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
312         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
313                 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
314         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
315                 return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
316         case PIPE_BLENDFACTOR_INV_DST_COLOR:
317                 return V_028780_BLEND_ONE_MINUS_DST_COLOR;
318         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
319                 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
320         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
321                 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
322         case PIPE_BLENDFACTOR_SRC1_COLOR:
323                 return V_028780_BLEND_SRC1_COLOR;
324         case PIPE_BLENDFACTOR_SRC1_ALPHA:
325                 return V_028780_BLEND_SRC1_ALPHA;
326         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
327                 return V_028780_BLEND_INV_SRC1_COLOR;
328         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
329                 return V_028780_BLEND_INV_SRC1_ALPHA;
330         default:
331                 R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
332                 assert(0);
333                 break;
334         }
335         return 0;
336 }
337
338 static uint32_t si_translate_blend_opt_function(int blend_func)
339 {
340         switch (blend_func) {
341         case PIPE_BLEND_ADD:
342                 return V_028760_OPT_COMB_ADD;
343         case PIPE_BLEND_SUBTRACT:
344                 return V_028760_OPT_COMB_SUBTRACT;
345         case PIPE_BLEND_REVERSE_SUBTRACT:
346                 return V_028760_OPT_COMB_REVSUBTRACT;
347         case PIPE_BLEND_MIN:
348                 return V_028760_OPT_COMB_MIN;
349         case PIPE_BLEND_MAX:
350                 return V_028760_OPT_COMB_MAX;
351         default:
352                 return V_028760_OPT_COMB_BLEND_DISABLED;
353         }
354 }
355
356 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
357 {
358         switch (blend_fact) {
359         case PIPE_BLENDFACTOR_ZERO:
360                 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
361         case PIPE_BLENDFACTOR_ONE:
362                 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
363         case PIPE_BLENDFACTOR_SRC_COLOR:
364                 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
365                                 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
366         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
367                 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
368                                 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
369         case PIPE_BLENDFACTOR_SRC_ALPHA:
370                 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
371         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
372                 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
373         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
374                 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
375                                 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
376         default:
377                 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
378         }
379 }
380
381 /**
382  * Get rid of DST in the blend factors by commuting the operands:
383  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
384  */
385 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
386                                 unsigned *dst_factor, unsigned expected_dst,
387                                 unsigned replacement_src)
388 {
389         if (*src_factor == expected_dst &&
390             *dst_factor == PIPE_BLENDFACTOR_ZERO) {
391                 *src_factor = PIPE_BLENDFACTOR_ZERO;
392                 *dst_factor = replacement_src;
393
394                 /* Commuting the operands requires reversing subtractions. */
395                 if (*func == PIPE_BLEND_SUBTRACT)
396                         *func = PIPE_BLEND_REVERSE_SUBTRACT;
397                 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
398                         *func = PIPE_BLEND_SUBTRACT;
399         }
400 }
401
402 static bool si_blend_factor_uses_dst(unsigned factor)
403 {
404         return factor == PIPE_BLENDFACTOR_DST_COLOR ||
405                 factor == PIPE_BLENDFACTOR_DST_ALPHA ||
406                 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
407                 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
408                 factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
409 }
410
411 static void *si_create_blend_state_mode(struct pipe_context *ctx,
412                                         const struct pipe_blend_state *state,
413                                         unsigned mode)
414 {
415         struct si_context *sctx = (struct si_context*)ctx;
416         struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
417         struct si_pm4_state *pm4 = &blend->pm4;
418         uint32_t sx_mrt_blend_opt[8] = {0};
419         uint32_t color_control = 0;
420
421         if (!blend)
422                 return NULL;
423
424         blend->alpha_to_coverage = state->alpha_to_coverage;
425         blend->alpha_to_one = state->alpha_to_one;
426         blend->dual_src_blend = util_blend_state_is_dual(state, 0);
427
428         if (state->logicop_enable) {
429                 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
430         } else {
431                 color_control |= S_028808_ROP3(0xcc);
432         }
433
434         si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
435                        S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
436                        S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
437                        S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
438                        S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
439                        S_028B70_ALPHA_TO_MASK_OFFSET3(2));
440
441         if (state->alpha_to_coverage)
442                 blend->need_src_alpha_4bit |= 0xf;
443
444         blend->cb_target_mask = 0;
445         for (int i = 0; i < 8; i++) {
446                 /* state->rt entries > 0 only written if independent blending */
447                 const int j = state->independent_blend_enable ? i : 0;
448
449                 unsigned eqRGB = state->rt[j].rgb_func;
450                 unsigned srcRGB = state->rt[j].rgb_src_factor;
451                 unsigned dstRGB = state->rt[j].rgb_dst_factor;
452                 unsigned eqA = state->rt[j].alpha_func;
453                 unsigned srcA = state->rt[j].alpha_src_factor;
454                 unsigned dstA = state->rt[j].alpha_dst_factor;
455
456                 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
457                 unsigned blend_cntl = 0;
458
459                 sx_mrt_blend_opt[i] =
460                         S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
461                         S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
462
463                 if (!state->rt[j].colormask)
464                         continue;
465
466                 /* cb_render_state will disable unused ones */
467                 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
468
469                 if (!state->rt[j].blend_enable) {
470                         si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
471                         continue;
472                 }
473
474                 /* Blending optimizations for Stoney.
475                  * These transformations don't change the behavior.
476                  *
477                  * First, get rid of DST in the blend factors:
478                  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
479                  */
480                 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
481                                     PIPE_BLENDFACTOR_DST_COLOR,
482                                     PIPE_BLENDFACTOR_SRC_COLOR);
483                 si_blend_remove_dst(&eqA, &srcA, &dstA,
484                                     PIPE_BLENDFACTOR_DST_COLOR,
485                                     PIPE_BLENDFACTOR_SRC_COLOR);
486                 si_blend_remove_dst(&eqA, &srcA, &dstA,
487                                     PIPE_BLENDFACTOR_DST_ALPHA,
488                                     PIPE_BLENDFACTOR_SRC_ALPHA);
489
490                 /* Look up the ideal settings from tables. */
491                 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
492                 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
493                 srcA_opt = si_translate_blend_opt_factor(srcA, true);
494                 dstA_opt = si_translate_blend_opt_factor(dstA, true);
495
496                 /* Handle interdependencies. */
497                 if (si_blend_factor_uses_dst(srcRGB))
498                         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
499                 if (si_blend_factor_uses_dst(srcA))
500                         dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
501
502                 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
503                     (dstRGB == PIPE_BLENDFACTOR_ZERO ||
504                      dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
505                      dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
506                         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
507
508                 /* Set the final value. */
509                 sx_mrt_blend_opt[i] =
510                         S_028760_COLOR_SRC_OPT(srcRGB_opt) |
511                         S_028760_COLOR_DST_OPT(dstRGB_opt) |
512                         S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
513                         S_028760_ALPHA_SRC_OPT(srcA_opt) |
514                         S_028760_ALPHA_DST_OPT(dstA_opt) |
515                         S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
516
517                 /* Set blend state. */
518                 blend_cntl |= S_028780_ENABLE(1);
519                 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
520                 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
521                 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
522
523                 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
524                         blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
525                         blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
526                         blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
527                         blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
528                 }
529                 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
530
531                 blend->blend_enable_4bit |= 0xfu << (i * 4);
532
533                 /* This is only important for formats without alpha. */
534                 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
535                     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
536                     srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
537                     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
538                     srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
539                     dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
540                         blend->need_src_alpha_4bit |= 0xfu << (i * 4);
541         }
542
543         if (blend->cb_target_mask) {
544                 color_control |= S_028808_MODE(mode);
545         } else {
546                 color_control |= S_028808_MODE(V_028808_CB_DISABLE);
547         }
548
549         if (sctx->b.family == CHIP_STONEY) {
550                 for (int i = 0; i < 8; i++)
551                         si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
552                                        sx_mrt_blend_opt[i]);
553
554                 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
555                 if (blend->dual_src_blend || state->logicop_enable ||
556                     mode == V_028808_CB_RESOLVE)
557                         color_control |= S_028808_DISABLE_DUAL_QUAD(1);
558         }
559
560         si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
561         return blend;
562 }
563
564 static void *si_create_blend_state(struct pipe_context *ctx,
565                                    const struct pipe_blend_state *state)
566 {
567         return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
568 }
569
570 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
571 {
572         struct si_context *sctx = (struct si_context *)ctx;
573         si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
574         si_mark_atom_dirty(sctx, &sctx->cb_render_state);
575 }
576
577 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
578 {
579         struct si_context *sctx = (struct si_context *)ctx;
580         si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
581 }
582
583 static void si_set_blend_color(struct pipe_context *ctx,
584                                const struct pipe_blend_color *state)
585 {
586         struct si_context *sctx = (struct si_context *)ctx;
587
588         if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
589                 return;
590
591         sctx->blend_color.state = *state;
592         si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
593 }
594
595 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
596 {
597         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
598
599         radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
600         radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
601 }
602
603 /*
604  * Clipping
605  */
606
607 static void si_set_clip_state(struct pipe_context *ctx,
608                               const struct pipe_clip_state *state)
609 {
610         struct si_context *sctx = (struct si_context *)ctx;
611         struct pipe_constant_buffer cb;
612
613         if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
614                 return;
615
616         sctx->clip_state.state = *state;
617         si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
618
619         cb.buffer = NULL;
620         cb.user_buffer = state->ucp;
621         cb.buffer_offset = 0;
622         cb.buffer_size = 4*4*8;
623         si_set_constant_buffer(sctx, &sctx->rw_buffers,
624                                SI_VS_CONST_CLIP_PLANES, &cb);
625         pipe_resource_reference(&cb.buffer, NULL);
626 }
627
628 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
629 {
630         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
631
632         radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
633         radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
634 }
635
636 #define SIX_BITS 0x3F
637
638 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
639 {
640         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
641         struct tgsi_shader_info *info = si_get_vs_info(sctx);
642         unsigned window_space =
643            info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
644         unsigned clipdist_mask =
645                 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
646
647         radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
648                 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
649                 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
650                 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
651                 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
652                 S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
653                 S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
654                 S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
655                                             info->writes_edgeflag ||
656                                             info->writes_layer ||
657                                              info->writes_viewport_index) |
658                 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
659                 (sctx->queued.named.rasterizer->clip_plane_enable &
660                  clipdist_mask));
661         radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
662                 sctx->queued.named.rasterizer->pa_cl_clip_cntl |
663                 (clipdist_mask ? 0 :
664                  sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
665                 S_028810_CLIP_DISABLE(window_space));
666
667         /* reuse needs to be set off if we write oViewport */
668         radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
669                                S_028AB4_REUSE_OFF(info->writes_viewport_index));
670 }
671
672 /*
673  * inferred state between framebuffer and rasterizer
674  */
675 static void si_update_poly_offset_state(struct si_context *sctx)
676 {
677         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
678
679         if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
680                 return;
681
682         switch (sctx->framebuffer.state.zsbuf->texture->format) {
683         case PIPE_FORMAT_Z16_UNORM:
684                 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
685                 break;
686         default: /* 24-bit */
687                 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
688                 break;
689         case PIPE_FORMAT_Z32_FLOAT:
690         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
691                 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
692                 break;
693         }
694 }
695
696 /*
697  * Rasterizer
698  */
699
700 static uint32_t si_translate_fill(uint32_t func)
701 {
702         switch(func) {
703         case PIPE_POLYGON_MODE_FILL:
704                 return V_028814_X_DRAW_TRIANGLES;
705         case PIPE_POLYGON_MODE_LINE:
706                 return V_028814_X_DRAW_LINES;
707         case PIPE_POLYGON_MODE_POINT:
708                 return V_028814_X_DRAW_POINTS;
709         default:
710                 assert(0);
711                 return V_028814_X_DRAW_POINTS;
712         }
713 }
714
715 static void *si_create_rs_state(struct pipe_context *ctx,
716                                 const struct pipe_rasterizer_state *state)
717 {
718         struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
719         struct si_pm4_state *pm4 = &rs->pm4;
720         unsigned tmp, i;
721         float psize_min, psize_max;
722
723         if (!rs) {
724                 return NULL;
725         }
726
727         rs->scissor_enable = state->scissor;
728         rs->two_side = state->light_twoside;
729         rs->multisample_enable = state->multisample;
730         rs->force_persample_interp = state->force_persample_interp;
731         rs->clip_plane_enable = state->clip_plane_enable;
732         rs->line_stipple_enable = state->line_stipple_enable;
733         rs->poly_stipple_enable = state->poly_stipple_enable;
734         rs->line_smooth = state->line_smooth;
735         rs->poly_smooth = state->poly_smooth;
736         rs->uses_poly_offset = state->offset_point || state->offset_line ||
737                                state->offset_tri;
738         rs->clamp_fragment_color = state->clamp_fragment_color;
739         rs->flatshade = state->flatshade;
740         rs->sprite_coord_enable = state->sprite_coord_enable;
741         rs->rasterizer_discard = state->rasterizer_discard;
742         rs->pa_sc_line_stipple = state->line_stipple_enable ?
743                                 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
744                                 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
745         rs->pa_cl_clip_cntl =
746                 S_028810_PS_UCP_MODE(3) |
747                 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
748                 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
749                 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
750                 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
751                 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
752
753         si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
754                 S_0286D4_FLAT_SHADE_ENA(1) |
755                 S_0286D4_PNT_SPRITE_ENA(1) |
756                 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
757                 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
758                 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
759                 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
760                 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
761
762         /* point size 12.4 fixed point */
763         tmp = (unsigned)(state->point_size * 8.0);
764         si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
765
766         if (state->point_size_per_vertex) {
767                 psize_min = util_get_min_point_size(state);
768                 psize_max = 8192;
769         } else {
770                 /* Force the point size to be as if the vertex output was disabled. */
771                 psize_min = state->point_size;
772                 psize_max = state->point_size;
773         }
774         /* Divide by two, because 0.5 = 1 pixel. */
775         si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
776                         S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
777                         S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
778
779         tmp = (unsigned)state->line_width * 8;
780         si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
781         si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
782                        S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
783                        S_028A48_MSAA_ENABLE(state->multisample ||
784                                             state->poly_smooth ||
785                                             state->line_smooth) |
786                        S_028A48_VPORT_SCISSOR_ENABLE(1));
787
788         si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
789                        S_028BE4_PIX_CENTER(state->half_pixel_center) |
790                        S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
791
792         si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
793         si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
794                 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
795                 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
796                 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
797                 S_028814_FACE(!state->front_ccw) |
798                 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
799                 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
800                 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
801                 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
802                                    state->fill_back != PIPE_POLYGON_MODE_FILL) |
803                 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
804                 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
805         si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 +
806                        SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color);
807
808         /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
809         for (i = 0; i < 3; i++) {
810                 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
811                 float offset_units = state->offset_units;
812                 float offset_scale = state->offset_scale * 16.0f;
813
814                 switch (i) {
815                 case 0: /* 16-bit zbuffer */
816                         offset_units *= 4.0f;
817                         break;
818                 case 1: /* 24-bit zbuffer */
819                         offset_units *= 2.0f;
820                         break;
821                 case 2: /* 32-bit zbuffer */
822                         offset_units *= 1.0f;
823                         break;
824                 }
825
826                 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
827                                fui(offset_scale));
828                 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
829                                fui(offset_units));
830                 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
831                                fui(offset_scale));
832                 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
833                                fui(offset_units));
834         }
835
836         return rs;
837 }
838
839 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
840 {
841         struct si_context *sctx = (struct si_context *)ctx;
842         struct si_state_rasterizer *old_rs =
843                 (struct si_state_rasterizer*)sctx->queued.named.rasterizer;
844         struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
845
846         if (!state)
847                 return;
848
849         if (sctx->framebuffer.nr_samples > 1 &&
850             (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) {
851                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
852
853                 if (sctx->b.family >= CHIP_POLARIS10)
854                         si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
855         }
856
857         r600_set_scissor_enable(&sctx->b, rs->scissor_enable);
858
859         si_pm4_bind_state(sctx, rasterizer, rs);
860         si_update_poly_offset_state(sctx);
861
862         si_mark_atom_dirty(sctx, &sctx->clip_regs);
863 }
864
865 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
866 {
867         struct si_context *sctx = (struct si_context *)ctx;
868
869         if (sctx->queued.named.rasterizer == state)
870                 si_pm4_bind_state(sctx, poly_offset, NULL);
871         si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
872 }
873
874 /*
875  * infeered state between dsa and stencil ref
876  */
877 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
878 {
879         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
880         struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
881         struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
882
883         radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
884         radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
885                         S_028430_STENCILMASK(dsa->valuemask[0]) |
886                         S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
887                         S_028430_STENCILOPVAL(1));
888         radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
889                         S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
890                         S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
891                         S_028434_STENCILOPVAL_BF(1));
892 }
893
894 static void si_set_stencil_ref(struct pipe_context *ctx,
895                                const struct pipe_stencil_ref *state)
896 {
897         struct si_context *sctx = (struct si_context *)ctx;
898
899         if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
900                 return;
901
902         sctx->stencil_ref.state = *state;
903         si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
904 }
905
906
907 /*
908  * DSA
909  */
910
911 static uint32_t si_translate_stencil_op(int s_op)
912 {
913         switch (s_op) {
914         case PIPE_STENCIL_OP_KEEP:
915                 return V_02842C_STENCIL_KEEP;
916         case PIPE_STENCIL_OP_ZERO:
917                 return V_02842C_STENCIL_ZERO;
918         case PIPE_STENCIL_OP_REPLACE:
919                 return V_02842C_STENCIL_REPLACE_TEST;
920         case PIPE_STENCIL_OP_INCR:
921                 return V_02842C_STENCIL_ADD_CLAMP;
922         case PIPE_STENCIL_OP_DECR:
923                 return V_02842C_STENCIL_SUB_CLAMP;
924         case PIPE_STENCIL_OP_INCR_WRAP:
925                 return V_02842C_STENCIL_ADD_WRAP;
926         case PIPE_STENCIL_OP_DECR_WRAP:
927                 return V_02842C_STENCIL_SUB_WRAP;
928         case PIPE_STENCIL_OP_INVERT:
929                 return V_02842C_STENCIL_INVERT;
930         default:
931                 R600_ERR("Unknown stencil op %d", s_op);
932                 assert(0);
933                 break;
934         }
935         return 0;
936 }
937
938 static void *si_create_dsa_state(struct pipe_context *ctx,
939                                  const struct pipe_depth_stencil_alpha_state *state)
940 {
941         struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
942         struct si_pm4_state *pm4 = &dsa->pm4;
943         unsigned db_depth_control;
944         uint32_t db_stencil_control = 0;
945
946         if (!dsa) {
947                 return NULL;
948         }
949
950         dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
951         dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
952         dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
953         dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
954
955         db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
956                 S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
957                 S_028800_ZFUNC(state->depth.func) |
958                 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
959
960         /* stencil */
961         if (state->stencil[0].enabled) {
962                 db_depth_control |= S_028800_STENCIL_ENABLE(1);
963                 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
964                 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
965                 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
966                 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
967
968                 if (state->stencil[1].enabled) {
969                         db_depth_control |= S_028800_BACKFACE_ENABLE(1);
970                         db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
971                         db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
972                         db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
973                         db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
974                 }
975         }
976
977         /* alpha */
978         if (state->alpha.enabled) {
979                 dsa->alpha_func = state->alpha.func;
980
981                 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
982                                SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
983         } else {
984                 dsa->alpha_func = PIPE_FUNC_ALWAYS;
985         }
986
987         si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
988         si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
989         if (state->depth.bounds_test) {
990                 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
991                 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
992         }
993
994         return dsa;
995 }
996
997 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
998 {
999         struct si_context *sctx = (struct si_context *)ctx;
1000         struct si_state_dsa *dsa = state;
1001
1002         if (!state)
1003                 return;
1004
1005         si_pm4_bind_state(sctx, dsa, dsa);
1006
1007         if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
1008                    sizeof(struct si_dsa_stencil_ref_part)) != 0) {
1009                 sctx->stencil_ref.dsa_part = dsa->stencil_ref;
1010                 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
1011         }
1012 }
1013
1014 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
1015 {
1016         struct si_context *sctx = (struct si_context *)ctx;
1017         si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
1018 }
1019
1020 static void *si_create_db_flush_dsa(struct si_context *sctx)
1021 {
1022         struct pipe_depth_stencil_alpha_state dsa = {};
1023
1024         return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
1025 }
1026
1027 /* DB RENDER STATE */
1028
1029 static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
1030 {
1031         struct si_context *sctx = (struct si_context*)ctx;
1032
1033         /* Pipeline stat & streamout queries. */
1034         if (enable) {
1035                 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
1036                 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
1037         } else {
1038                 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
1039                 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
1040         }
1041
1042         /* Occlusion queries. */
1043         if (sctx->occlusion_queries_disabled != !enable) {
1044                 sctx->occlusion_queries_disabled = !enable;
1045                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
1046         }
1047 }
1048
1049 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
1050 {
1051         struct si_context *sctx = (struct si_context*)ctx;
1052
1053         si_mark_atom_dirty(sctx, &sctx->db_render_state);
1054 }
1055
1056 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
1057 {
1058         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
1059         struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
1060         unsigned db_shader_control;
1061
1062         radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
1063
1064         /* DB_RENDER_CONTROL */
1065         if (sctx->dbcb_depth_copy_enabled ||
1066             sctx->dbcb_stencil_copy_enabled) {
1067                 radeon_emit(cs,
1068                             S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
1069                             S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
1070                             S_028000_COPY_CENTROID(1) |
1071                             S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
1072         } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
1073                 radeon_emit(cs,
1074                             S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
1075                             S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
1076         } else {
1077                 radeon_emit(cs,
1078                             S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
1079                             S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
1080         }
1081
1082         /* DB_COUNT_CONTROL (occlusion queries) */
1083         if (sctx->b.num_occlusion_queries > 0 &&
1084             !sctx->occlusion_queries_disabled) {
1085                 bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
1086
1087                 if (sctx->b.chip_class >= CIK) {
1088                         radeon_emit(cs,
1089                                     S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1090                                     S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
1091                                     S_028004_ZPASS_ENABLE(1) |
1092                                     S_028004_SLICE_EVEN_ENABLE(1) |
1093                                     S_028004_SLICE_ODD_ENABLE(1));
1094                 } else {
1095                         radeon_emit(cs,
1096                                     S_028004_PERFECT_ZPASS_COUNTS(perfect) |
1097                                     S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
1098                 }
1099         } else {
1100                 /* Disable occlusion queries. */
1101                 if (sctx->b.chip_class >= CIK) {
1102                         radeon_emit(cs, 0);
1103                 } else {
1104                         radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
1105                 }
1106         }
1107
1108         /* DB_RENDER_OVERRIDE2 */
1109         radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
1110                 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
1111                 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
1112                 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
1113
1114         db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
1115                             sctx->ps_db_shader_control;
1116
1117         /* Bug workaround for smoothing (overrasterization) on SI. */
1118         if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
1119                 db_shader_control &= C_02880C_Z_ORDER;
1120                 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
1121         }
1122
1123         /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
1124         if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
1125                 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
1126
1127         if (sctx->b.family == CHIP_STONEY &&
1128             sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
1129                 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
1130
1131         radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
1132                                db_shader_control);
1133 }
1134
1135 /*
1136  * format translation
1137  */
1138 static uint32_t si_translate_colorformat(enum pipe_format format)
1139 {
1140         const struct util_format_description *desc = util_format_description(format);
1141
1142 #define HAS_SIZE(x,y,z,w) \
1143         (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
1144          desc->channel[2].size == (z) && desc->channel[3].size == (w))
1145
1146         if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1147                 return V_028C70_COLOR_10_11_11;
1148
1149         if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1150                 return V_028C70_COLOR_INVALID;
1151
1152         /* hw cannot support mixed formats (except depth/stencil, since
1153          * stencil is not written to). */
1154         if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1155                 return V_028C70_COLOR_INVALID;
1156
1157         switch (desc->nr_channels) {
1158         case 1:
1159                 switch (desc->channel[0].size) {
1160                 case 8:
1161                         return V_028C70_COLOR_8;
1162                 case 16:
1163                         return V_028C70_COLOR_16;
1164                 case 32:
1165                         return V_028C70_COLOR_32;
1166                 }
1167                 break;
1168         case 2:
1169                 if (desc->channel[0].size == desc->channel[1].size) {
1170                         switch (desc->channel[0].size) {
1171                         case 8:
1172                                 return V_028C70_COLOR_8_8;
1173                         case 16:
1174                                 return V_028C70_COLOR_16_16;
1175                         case 32:
1176                                 return V_028C70_COLOR_32_32;
1177                         }
1178                 } else if (HAS_SIZE(8,24,0,0)) {
1179                         return V_028C70_COLOR_24_8;
1180                 } else if (HAS_SIZE(24,8,0,0)) {
1181                         return V_028C70_COLOR_8_24;
1182                 }
1183                 break;
1184         case 3:
1185                 if (HAS_SIZE(5,6,5,0)) {
1186                         return V_028C70_COLOR_5_6_5;
1187                 } else if (HAS_SIZE(32,8,24,0)) {
1188                         return V_028C70_COLOR_X24_8_32_FLOAT;
1189                 }
1190                 break;
1191         case 4:
1192                 if (desc->channel[0].size == desc->channel[1].size &&
1193                     desc->channel[0].size == desc->channel[2].size &&
1194                     desc->channel[0].size == desc->channel[3].size) {
1195                         switch (desc->channel[0].size) {
1196                         case 4:
1197                                 return V_028C70_COLOR_4_4_4_4;
1198                         case 8:
1199                                 return V_028C70_COLOR_8_8_8_8;
1200                         case 16:
1201                                 return V_028C70_COLOR_16_16_16_16;
1202                         case 32:
1203                                 return V_028C70_COLOR_32_32_32_32;
1204                         }
1205                 } else if (HAS_SIZE(5,5,5,1)) {
1206                         return V_028C70_COLOR_1_5_5_5;
1207                 } else if (HAS_SIZE(10,10,10,2)) {
1208                         return V_028C70_COLOR_2_10_10_10;
1209                 }
1210                 break;
1211         }
1212         return V_028C70_COLOR_INVALID;
1213 }
1214
1215 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
1216 {
1217         if (SI_BIG_ENDIAN) {
1218                 switch(colorformat) {
1219                 /* 8-bit buffers. */
1220                 case V_028C70_COLOR_8:
1221                         return V_028C70_ENDIAN_NONE;
1222
1223                 /* 16-bit buffers. */
1224                 case V_028C70_COLOR_5_6_5:
1225                 case V_028C70_COLOR_1_5_5_5:
1226                 case V_028C70_COLOR_4_4_4_4:
1227                 case V_028C70_COLOR_16:
1228                 case V_028C70_COLOR_8_8:
1229                         return V_028C70_ENDIAN_8IN16;
1230
1231                 /* 32-bit buffers. */
1232                 case V_028C70_COLOR_8_8_8_8:
1233                 case V_028C70_COLOR_2_10_10_10:
1234                 case V_028C70_COLOR_8_24:
1235                 case V_028C70_COLOR_24_8:
1236                 case V_028C70_COLOR_16_16:
1237                         return V_028C70_ENDIAN_8IN32;
1238
1239                 /* 64-bit buffers. */
1240                 case V_028C70_COLOR_16_16_16_16:
1241                         return V_028C70_ENDIAN_8IN16;
1242
1243                 case V_028C70_COLOR_32_32:
1244                         return V_028C70_ENDIAN_8IN32;
1245
1246                 /* 128-bit buffers. */
1247                 case V_028C70_COLOR_32_32_32_32:
1248                         return V_028C70_ENDIAN_8IN32;
1249                 default:
1250                         return V_028C70_ENDIAN_NONE; /* Unsupported. */
1251                 }
1252         } else {
1253                 return V_028C70_ENDIAN_NONE;
1254         }
1255 }
1256
1257 static uint32_t si_translate_dbformat(enum pipe_format format)
1258 {
1259         switch (format) {
1260         case PIPE_FORMAT_Z16_UNORM:
1261                 return V_028040_Z_16;
1262         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1263         case PIPE_FORMAT_X8Z24_UNORM:
1264         case PIPE_FORMAT_Z24X8_UNORM:
1265         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1266                 return V_028040_Z_24; /* deprecated on SI */
1267         case PIPE_FORMAT_Z32_FLOAT:
1268         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1269                 return V_028040_Z_32_FLOAT;
1270         default:
1271                 return V_028040_Z_INVALID;
1272         }
1273 }
1274
1275 /*
1276  * Texture translation
1277  */
1278
1279 static uint32_t si_translate_texformat(struct pipe_screen *screen,
1280                                        enum pipe_format format,
1281                                        const struct util_format_description *desc,
1282                                        int first_non_void)
1283 {
1284         struct si_screen *sscreen = (struct si_screen*)screen;
1285         bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
1286                                           sscreen->b.info.drm_minor >= 31) ||
1287                                          sscreen->b.info.drm_major == 3;
1288         boolean uniform = TRUE;
1289         int i;
1290
1291         /* Colorspace (return non-RGB formats directly). */
1292         switch (desc->colorspace) {
1293         /* Depth stencil formats */
1294         case UTIL_FORMAT_COLORSPACE_ZS:
1295                 switch (format) {
1296                 case PIPE_FORMAT_Z16_UNORM:
1297                         return V_008F14_IMG_DATA_FORMAT_16;
1298                 case PIPE_FORMAT_X24S8_UINT:
1299                 case PIPE_FORMAT_Z24X8_UNORM:
1300                 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1301                         return V_008F14_IMG_DATA_FORMAT_8_24;
1302                 case PIPE_FORMAT_X8Z24_UNORM:
1303                 case PIPE_FORMAT_S8X24_UINT:
1304                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1305                         return V_008F14_IMG_DATA_FORMAT_24_8;
1306                 case PIPE_FORMAT_S8_UINT:
1307                         return V_008F14_IMG_DATA_FORMAT_8;
1308                 case PIPE_FORMAT_Z32_FLOAT:
1309                         return V_008F14_IMG_DATA_FORMAT_32;
1310                 case PIPE_FORMAT_X32_S8X24_UINT:
1311                 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1312                         return V_008F14_IMG_DATA_FORMAT_X24_8_32;
1313                 default:
1314                         goto out_unknown;
1315                 }
1316
1317         case UTIL_FORMAT_COLORSPACE_YUV:
1318                 goto out_unknown; /* TODO */
1319
1320         case UTIL_FORMAT_COLORSPACE_SRGB:
1321                 if (desc->nr_channels != 4 && desc->nr_channels != 1)
1322                         goto out_unknown;
1323                 break;
1324
1325         default:
1326                 break;
1327         }
1328
1329         if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
1330                 if (!enable_compressed_formats)
1331                         goto out_unknown;
1332
1333                 switch (format) {
1334                 case PIPE_FORMAT_RGTC1_SNORM:
1335                 case PIPE_FORMAT_LATC1_SNORM:
1336                 case PIPE_FORMAT_RGTC1_UNORM:
1337                 case PIPE_FORMAT_LATC1_UNORM:
1338                         return V_008F14_IMG_DATA_FORMAT_BC4;
1339                 case PIPE_FORMAT_RGTC2_SNORM:
1340                 case PIPE_FORMAT_LATC2_SNORM:
1341                 case PIPE_FORMAT_RGTC2_UNORM:
1342                 case PIPE_FORMAT_LATC2_UNORM:
1343                         return V_008F14_IMG_DATA_FORMAT_BC5;
1344                 default:
1345                         goto out_unknown;
1346                 }
1347         }
1348
1349         if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
1350             sscreen->b.family == CHIP_STONEY) {
1351                 switch (format) {
1352                 case PIPE_FORMAT_ETC1_RGB8:
1353                 case PIPE_FORMAT_ETC2_RGB8:
1354                 case PIPE_FORMAT_ETC2_SRGB8:
1355                         return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
1356                 case PIPE_FORMAT_ETC2_RGB8A1:
1357                 case PIPE_FORMAT_ETC2_SRGB8A1:
1358                         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
1359                 case PIPE_FORMAT_ETC2_RGBA8:
1360                 case PIPE_FORMAT_ETC2_SRGBA8:
1361                         return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
1362                 case PIPE_FORMAT_ETC2_R11_UNORM:
1363                 case PIPE_FORMAT_ETC2_R11_SNORM:
1364                         return V_008F14_IMG_DATA_FORMAT_ETC2_R;
1365                 case PIPE_FORMAT_ETC2_RG11_UNORM:
1366                 case PIPE_FORMAT_ETC2_RG11_SNORM:
1367                         return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
1368                 default:
1369                         goto out_unknown;
1370                 }
1371         }
1372
1373         if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
1374                 if (!enable_compressed_formats)
1375                         goto out_unknown;
1376
1377                 switch (format) {
1378                 case PIPE_FORMAT_BPTC_RGBA_UNORM:
1379                 case PIPE_FORMAT_BPTC_SRGBA:
1380                         return V_008F14_IMG_DATA_FORMAT_BC7;
1381                 case PIPE_FORMAT_BPTC_RGB_FLOAT:
1382                 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
1383                         return V_008F14_IMG_DATA_FORMAT_BC6;
1384                 default:
1385                         goto out_unknown;
1386                 }
1387         }
1388
1389         if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
1390                 switch (format) {
1391                 case PIPE_FORMAT_R8G8_B8G8_UNORM:
1392                 case PIPE_FORMAT_G8R8_B8R8_UNORM:
1393                         return V_008F14_IMG_DATA_FORMAT_GB_GR;
1394                 case PIPE_FORMAT_G8R8_G8B8_UNORM:
1395                 case PIPE_FORMAT_R8G8_R8B8_UNORM:
1396                         return V_008F14_IMG_DATA_FORMAT_BG_RG;
1397                 default:
1398                         goto out_unknown;
1399                 }
1400         }
1401
1402         if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1403                 if (!enable_compressed_formats)
1404                         goto out_unknown;
1405
1406                 if (!util_format_s3tc_enabled) {
1407                         goto out_unknown;
1408                 }
1409
1410                 switch (format) {
1411                 case PIPE_FORMAT_DXT1_RGB:
1412                 case PIPE_FORMAT_DXT1_RGBA:
1413                 case PIPE_FORMAT_DXT1_SRGB:
1414                 case PIPE_FORMAT_DXT1_SRGBA:
1415                         return V_008F14_IMG_DATA_FORMAT_BC1;
1416                 case PIPE_FORMAT_DXT3_RGBA:
1417                 case PIPE_FORMAT_DXT3_SRGBA:
1418                         return V_008F14_IMG_DATA_FORMAT_BC2;
1419                 case PIPE_FORMAT_DXT5_RGBA:
1420                 case PIPE_FORMAT_DXT5_SRGBA:
1421                         return V_008F14_IMG_DATA_FORMAT_BC3;
1422                 default:
1423                         goto out_unknown;
1424                 }
1425         }
1426
1427         if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
1428                 return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
1429         } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
1430                 return V_008F14_IMG_DATA_FORMAT_10_11_11;
1431         }
1432
1433         /* R8G8Bx_SNORM - TODO CxV8U8 */
1434
1435         /* hw cannot support mixed formats (except depth/stencil, since only
1436          * depth is read).*/
1437         if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1438                 goto out_unknown;
1439
1440         /* See whether the components are of the same size. */
1441         for (i = 1; i < desc->nr_channels; i++) {
1442                 uniform = uniform && desc->channel[0].size == desc->channel[i].size;
1443         }
1444
1445         /* Non-uniform formats. */
1446         if (!uniform) {
1447                 switch(desc->nr_channels) {
1448                 case 3:
1449                         if (desc->channel[0].size == 5 &&
1450                             desc->channel[1].size == 6 &&
1451                             desc->channel[2].size == 5) {
1452                                 return V_008F14_IMG_DATA_FORMAT_5_6_5;
1453                         }
1454                         goto out_unknown;
1455                 case 4:
1456                         if (desc->channel[0].size == 5 &&
1457                             desc->channel[1].size == 5 &&
1458                             desc->channel[2].size == 5 &&
1459                             desc->channel[3].size == 1) {
1460                                 return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
1461                         }
1462                         if (desc->channel[0].size == 10 &&
1463                             desc->channel[1].size == 10 &&
1464                             desc->channel[2].size == 10 &&
1465                             desc->channel[3].size == 2) {
1466                                 return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
1467                         }
1468                         goto out_unknown;
1469                 }
1470                 goto out_unknown;
1471         }
1472
1473         if (first_non_void < 0 || first_non_void > 3)
1474                 goto out_unknown;
1475
1476         /* uniform formats */
1477         switch (desc->channel[first_non_void].size) {
1478         case 4:
1479                 switch (desc->nr_channels) {
1480 #if 0 /* Not supported for render targets */
1481                 case 2:
1482                         return V_008F14_IMG_DATA_FORMAT_4_4;
1483 #endif
1484                 case 4:
1485                         return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
1486                 }
1487                 break;
1488         case 8:
1489                 switch (desc->nr_channels) {
1490                 case 1:
1491                         return V_008F14_IMG_DATA_FORMAT_8;
1492                 case 2:
1493                         return V_008F14_IMG_DATA_FORMAT_8_8;
1494                 case 4:
1495                         return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
1496                 }
1497                 break;
1498         case 16:
1499                 switch (desc->nr_channels) {
1500                 case 1:
1501                         return V_008F14_IMG_DATA_FORMAT_16;
1502                 case 2:
1503                         return V_008F14_IMG_DATA_FORMAT_16_16;
1504                 case 4:
1505                         return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
1506                 }
1507                 break;
1508         case 32:
1509                 switch (desc->nr_channels) {
1510                 case 1:
1511                         return V_008F14_IMG_DATA_FORMAT_32;
1512                 case 2:
1513                         return V_008F14_IMG_DATA_FORMAT_32_32;
1514 #if 0 /* Not supported for render targets */
1515                 case 3:
1516                         return V_008F14_IMG_DATA_FORMAT_32_32_32;
1517 #endif
1518                 case 4:
1519                         return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
1520                 }
1521         }
1522
1523 out_unknown:
1524         /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
1525         return ~0;
1526 }
1527
1528 static unsigned si_tex_wrap(unsigned wrap)
1529 {
1530         switch (wrap) {
1531         default:
1532         case PIPE_TEX_WRAP_REPEAT:
1533                 return V_008F30_SQ_TEX_WRAP;
1534         case PIPE_TEX_WRAP_CLAMP:
1535                 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
1536         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1537                 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1538         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1539                 return V_008F30_SQ_TEX_CLAMP_BORDER;
1540         case PIPE_TEX_WRAP_MIRROR_REPEAT:
1541                 return V_008F30_SQ_TEX_MIRROR;
1542         case PIPE_TEX_WRAP_MIRROR_CLAMP:
1543                 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
1544         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1545                 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1546         case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1547                 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
1548         }
1549 }
1550
1551 static unsigned si_tex_mipfilter(unsigned filter)
1552 {
1553         switch (filter) {
1554         case PIPE_TEX_MIPFILTER_NEAREST:
1555                 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1556         case PIPE_TEX_MIPFILTER_LINEAR:
1557                 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1558         default:
1559         case PIPE_TEX_MIPFILTER_NONE:
1560                 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1561         }
1562 }
1563
1564 static unsigned si_tex_compare(unsigned compare)
1565 {
1566         switch (compare) {
1567         default:
1568         case PIPE_FUNC_NEVER:
1569                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1570         case PIPE_FUNC_LESS:
1571                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1572         case PIPE_FUNC_EQUAL:
1573                 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1574         case PIPE_FUNC_LEQUAL:
1575                 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1576         case PIPE_FUNC_GREATER:
1577                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1578         case PIPE_FUNC_NOTEQUAL:
1579                 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1580         case PIPE_FUNC_GEQUAL:
1581                 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1582         case PIPE_FUNC_ALWAYS:
1583                 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1584         }
1585 }
1586
1587 static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
1588                            unsigned nr_samples)
1589 {
1590         if (view_target == PIPE_TEXTURE_CUBE ||
1591             view_target == PIPE_TEXTURE_CUBE_ARRAY)
1592                 res_target = view_target;
1593
1594         switch (res_target) {
1595         default:
1596         case PIPE_TEXTURE_1D:
1597                 return V_008F1C_SQ_RSRC_IMG_1D;
1598         case PIPE_TEXTURE_1D_ARRAY:
1599                 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
1600         case PIPE_TEXTURE_2D:
1601         case PIPE_TEXTURE_RECT:
1602                 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
1603                                         V_008F1C_SQ_RSRC_IMG_2D;
1604         case PIPE_TEXTURE_2D_ARRAY:
1605                 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
1606                                         V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
1607         case PIPE_TEXTURE_3D:
1608                 return V_008F1C_SQ_RSRC_IMG_3D;
1609         case PIPE_TEXTURE_CUBE:
1610         case PIPE_TEXTURE_CUBE_ARRAY:
1611                 return V_008F1C_SQ_RSRC_IMG_CUBE;
1612         }
1613 }
1614
1615 /*
1616  * Format support testing
1617  */
1618
1619 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
1620 {
1621         return si_translate_texformat(screen, format, util_format_description(format),
1622                                       util_format_get_first_non_void_channel(format)) != ~0U;
1623 }
1624
1625 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
1626                                                const struct util_format_description *desc,
1627                                                int first_non_void)
1628 {
1629         unsigned type;
1630         int i;
1631
1632         if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1633                 return V_008F0C_BUF_DATA_FORMAT_10_11_11;
1634
1635         assert(first_non_void >= 0);
1636         type = desc->channel[first_non_void].type;
1637
1638         if (type == UTIL_FORMAT_TYPE_FIXED)
1639                 return V_008F0C_BUF_DATA_FORMAT_INVALID;
1640
1641         if (desc->nr_channels == 4 &&
1642             desc->channel[0].size == 10 &&
1643             desc->channel[1].size == 10 &&
1644             desc->channel[2].size == 10 &&
1645             desc->channel[3].size == 2)
1646                 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
1647
1648         /* See whether the components are of the same size. */
1649         for (i = 0; i < desc->nr_channels; i++) {
1650                 if (desc->channel[first_non_void].size != desc->channel[i].size)
1651                         return V_008F0C_BUF_DATA_FORMAT_INVALID;
1652         }
1653
1654         switch (desc->channel[first_non_void].size) {
1655         case 8:
1656                 switch (desc->nr_channels) {
1657                 case 1:
1658                         return V_008F0C_BUF_DATA_FORMAT_8;
1659                 case 2:
1660                         return V_008F0C_BUF_DATA_FORMAT_8_8;
1661                 case 3:
1662                 case 4:
1663                         return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
1664                 }
1665                 break;
1666         case 16:
1667                 switch (desc->nr_channels) {
1668                 case 1:
1669                         return V_008F0C_BUF_DATA_FORMAT_16;
1670                 case 2:
1671                         return V_008F0C_BUF_DATA_FORMAT_16_16;
1672                 case 3:
1673                 case 4:
1674                         return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
1675                 }
1676                 break;
1677         case 32:
1678                 /* From the Southern Islands ISA documentation about MTBUF:
1679                  * 'Memory reads of data in memory that is 32 or 64 bits do not
1680                  * undergo any format conversion.'
1681                  */
1682                 if (type != UTIL_FORMAT_TYPE_FLOAT &&
1683                     !desc->channel[first_non_void].pure_integer)
1684                         return V_008F0C_BUF_DATA_FORMAT_INVALID;
1685
1686                 switch (desc->nr_channels) {
1687                 case 1:
1688                         return V_008F0C_BUF_DATA_FORMAT_32;
1689                 case 2:
1690                         return V_008F0C_BUF_DATA_FORMAT_32_32;
1691                 case 3:
1692                         return V_008F0C_BUF_DATA_FORMAT_32_32_32;
1693                 case 4:
1694                         return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
1695                 }
1696                 break;
1697         }
1698
1699         return V_008F0C_BUF_DATA_FORMAT_INVALID;
1700 }
1701
1702 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
1703                                               const struct util_format_description *desc,
1704                                               int first_non_void)
1705 {
1706         if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
1707                 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1708
1709         assert(first_non_void >= 0);
1710
1711         switch (desc->channel[first_non_void].type) {
1712         case UTIL_FORMAT_TYPE_SIGNED:
1713                 if (desc->channel[first_non_void].normalized)
1714                         return V_008F0C_BUF_NUM_FORMAT_SNORM;
1715                 else if (desc->channel[first_non_void].pure_integer)
1716                         return V_008F0C_BUF_NUM_FORMAT_SINT;
1717                 else
1718                         return V_008F0C_BUF_NUM_FORMAT_SSCALED;
1719                 break;
1720         case UTIL_FORMAT_TYPE_UNSIGNED:
1721                 if (desc->channel[first_non_void].normalized)
1722                         return V_008F0C_BUF_NUM_FORMAT_UNORM;
1723                 else if (desc->channel[first_non_void].pure_integer)
1724                         return V_008F0C_BUF_NUM_FORMAT_UINT;
1725                 else
1726                         return V_008F0C_BUF_NUM_FORMAT_USCALED;
1727                 break;
1728         case UTIL_FORMAT_TYPE_FLOAT:
1729         default:
1730                 return V_008F0C_BUF_NUM_FORMAT_FLOAT;
1731         }
1732 }
1733
1734 static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format)
1735 {
1736         const struct util_format_description *desc;
1737         int first_non_void;
1738         unsigned data_format;
1739
1740         desc = util_format_description(format);
1741         first_non_void = util_format_get_first_non_void_channel(format);
1742         data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
1743         return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID;
1744 }
1745
1746 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
1747 {
1748         return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
1749                 r600_translate_colorswap(format, FALSE) != ~0U;
1750 }
1751
1752 static bool si_is_zs_format_supported(enum pipe_format format)
1753 {
1754         return si_translate_dbformat(format) != V_028040_Z_INVALID;
1755 }
1756
1757 boolean si_is_format_supported(struct pipe_screen *screen,
1758                                enum pipe_format format,
1759                                enum pipe_texture_target target,
1760                                unsigned sample_count,
1761                                unsigned usage)
1762 {
1763         unsigned retval = 0;
1764
1765         if (target >= PIPE_MAX_TEXTURE_TYPES) {
1766                 R600_ERR("r600: unsupported texture type %d\n", target);
1767                 return FALSE;
1768         }
1769
1770         if (!util_format_is_supported(format, usage))
1771                 return FALSE;
1772
1773         if (sample_count > 1) {
1774                 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
1775                         return FALSE;
1776
1777                 switch (sample_count) {
1778                 case 2:
1779                 case 4:
1780                 case 8:
1781                         break;
1782                 case 16:
1783                         if (format == PIPE_FORMAT_NONE)
1784                                 return TRUE;
1785                         else
1786                                 return FALSE;
1787                 default:
1788                         return FALSE;
1789                 }
1790         }
1791
1792         if (usage & PIPE_BIND_SAMPLER_VIEW) {
1793                 if (target == PIPE_BUFFER) {
1794                         if (si_is_vertex_format_supported(screen, format))
1795                                 retval |= PIPE_BIND_SAMPLER_VIEW;
1796                 } else {
1797                         if (si_is_sampler_format_supported(screen, format))
1798                                 retval |= PIPE_BIND_SAMPLER_VIEW;
1799                 }
1800         }
1801
1802         if ((usage & (PIPE_BIND_RENDER_TARGET |
1803                       PIPE_BIND_DISPLAY_TARGET |
1804                       PIPE_BIND_SCANOUT |
1805                       PIPE_BIND_SHARED |
1806                       PIPE_BIND_BLENDABLE)) &&
1807             si_is_colorbuffer_format_supported(format)) {
1808                 retval |= usage &
1809                           (PIPE_BIND_RENDER_TARGET |
1810                            PIPE_BIND_DISPLAY_TARGET |
1811                            PIPE_BIND_SCANOUT |
1812                            PIPE_BIND_SHARED);
1813                 if (!util_format_is_pure_integer(format) &&
1814                     !util_format_is_depth_or_stencil(format))
1815                         retval |= usage & PIPE_BIND_BLENDABLE;
1816         }
1817
1818         if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
1819             si_is_zs_format_supported(format)) {
1820                 retval |= PIPE_BIND_DEPTH_STENCIL;
1821         }
1822
1823         if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
1824             si_is_vertex_format_supported(screen, format)) {
1825                 retval |= PIPE_BIND_VERTEX_BUFFER;
1826         }
1827
1828         if (usage & PIPE_BIND_TRANSFER_READ)
1829                 retval |= PIPE_BIND_TRANSFER_READ;
1830         if (usage & PIPE_BIND_TRANSFER_WRITE)
1831                 retval |= PIPE_BIND_TRANSFER_WRITE;
1832
1833         if ((usage & PIPE_BIND_LINEAR) &&
1834             !util_format_is_compressed(format) &&
1835             !(usage & PIPE_BIND_DEPTH_STENCIL))
1836                 retval |= PIPE_BIND_LINEAR;
1837
1838         return retval == usage;
1839 }
1840
1841 static unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level,
1842                                    bool stencil)
1843 {
1844         unsigned tile_mode_index = 0;
1845
1846         if (stencil) {
1847                 tile_mode_index = rtex->surface.stencil_tiling_index[level];
1848         } else {
1849                 tile_mode_index = rtex->surface.tiling_index[level];
1850         }
1851         return tile_mode_index;
1852 }
1853
1854 /*
1855  * framebuffer handling
1856  */
1857
1858 static void si_choose_spi_color_formats(struct r600_surface *surf,
1859                                         unsigned format, unsigned swap,
1860                                         unsigned ntype, bool is_depth)
1861 {
1862         /* Alpha is needed for alpha-to-coverage.
1863          * Blending may be with or without alpha.
1864          */
1865         unsigned normal = 0; /* most optimal, may not support blending or export alpha */
1866         unsigned alpha = 0; /* exports alpha, but may not support blending */
1867         unsigned blend = 0; /* supports blending, but may not export alpha */
1868         unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
1869
1870         /* Choose the SPI color formats. These are required values for Stoney/RB+.
1871          * Other chips have multiple choices, though they are not necessarily better.
1872          */
1873         switch (format) {
1874         case V_028C70_COLOR_5_6_5:
1875         case V_028C70_COLOR_1_5_5_5:
1876         case V_028C70_COLOR_5_5_5_1:
1877         case V_028C70_COLOR_4_4_4_4:
1878         case V_028C70_COLOR_10_11_11:
1879         case V_028C70_COLOR_11_11_10:
1880         case V_028C70_COLOR_8:
1881         case V_028C70_COLOR_8_8:
1882         case V_028C70_COLOR_8_8_8_8:
1883         case V_028C70_COLOR_10_10_10_2:
1884         case V_028C70_COLOR_2_10_10_10:
1885                 if (ntype == V_028C70_NUMBER_UINT)
1886                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
1887                 else if (ntype == V_028C70_NUMBER_SINT)
1888                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
1889                 else
1890                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
1891                 break;
1892
1893         case V_028C70_COLOR_16:
1894         case V_028C70_COLOR_16_16:
1895         case V_028C70_COLOR_16_16_16_16:
1896                 if (ntype == V_028C70_NUMBER_UNORM ||
1897                     ntype == V_028C70_NUMBER_SNORM) {
1898                         /* UNORM16 and SNORM16 don't support blending */
1899                         if (ntype == V_028C70_NUMBER_UNORM)
1900                                 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
1901                         else
1902                                 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
1903
1904                         /* Use 32 bits per channel for blending. */
1905                         if (format == V_028C70_COLOR_16) {
1906                                 if (swap == V_028C70_SWAP_STD) { /* R */
1907                                         blend = V_028714_SPI_SHADER_32_R;
1908                                         blend_alpha = V_028714_SPI_SHADER_32_AR;
1909                                 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
1910                                         blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1911                                 else
1912                                         assert(0);
1913                         } else if (format == V_028C70_COLOR_16_16) {
1914                                 if (swap == V_028C70_SWAP_STD) { /* RG */
1915                                         blend = V_028714_SPI_SHADER_32_GR;
1916                                         blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1917                                 } else if (swap == V_028C70_SWAP_ALT) /* RA */
1918                                         blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
1919                                 else
1920                                         assert(0);
1921                         } else /* 16_16_16_16 */
1922                                 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1923                 } else if (ntype == V_028C70_NUMBER_UINT)
1924                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
1925                 else if (ntype == V_028C70_NUMBER_SINT)
1926                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
1927                 else if (ntype == V_028C70_NUMBER_FLOAT)
1928                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
1929                 else
1930                         assert(0);
1931                 break;
1932
1933         case V_028C70_COLOR_32:
1934                 if (swap == V_028C70_SWAP_STD) { /* R */
1935                         blend = normal = V_028714_SPI_SHADER_32_R;
1936                         alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
1937                 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
1938                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
1939                 else
1940                         assert(0);
1941                 break;
1942
1943         case V_028C70_COLOR_32_32:
1944                 if (swap == V_028C70_SWAP_STD) { /* RG */
1945                         blend = normal = V_028714_SPI_SHADER_32_GR;
1946                         alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
1947                 } else if (swap == V_028C70_SWAP_ALT) /* RA */
1948                         alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
1949                 else
1950                         assert(0);
1951                 break;
1952
1953         case V_028C70_COLOR_32_32_32_32:
1954         case V_028C70_COLOR_8_24:
1955         case V_028C70_COLOR_24_8:
1956         case V_028C70_COLOR_X24_8_32_FLOAT:
1957                 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
1958                 break;
1959
1960         default:
1961                 assert(0);
1962                 return;
1963         }
1964
1965         /* The DB->CB copy needs 32_ABGR. */
1966         if (is_depth)
1967                 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
1968
1969         surf->spi_shader_col_format = normal;
1970         surf->spi_shader_col_format_alpha = alpha;
1971         surf->spi_shader_col_format_blend = blend;
1972         surf->spi_shader_col_format_blend_alpha = blend_alpha;
1973 }
1974
1975 static void si_initialize_color_surface(struct si_context *sctx,
1976                                         struct r600_surface *surf)
1977 {
1978         struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
1979         unsigned level = surf->base.u.tex.level;
1980         uint64_t offset = rtex->surface.level[level].offset;
1981         unsigned pitch, slice;
1982         unsigned color_info, color_attrib, color_pitch, color_view;
1983         unsigned tile_mode_index;
1984         unsigned format, swap, ntype, endian;
1985         const struct util_format_description *desc;
1986         int i;
1987         unsigned blend_clamp = 0, blend_bypass = 0;
1988
1989         color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
1990                      S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
1991
1992         pitch = (rtex->surface.level[level].nblk_x) / 8 - 1;
1993         slice = (rtex->surface.level[level].nblk_x * rtex->surface.level[level].nblk_y) / 64;
1994         if (slice) {
1995                 slice = slice - 1;
1996         }
1997
1998         tile_mode_index = si_tile_mode_index(rtex, level, false);
1999
2000         desc = util_format_description(surf->base.format);
2001         for (i = 0; i < 4; i++) {
2002                 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2003                         break;
2004                 }
2005         }
2006         if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
2007                 ntype = V_028C70_NUMBER_FLOAT;
2008         } else {
2009                 ntype = V_028C70_NUMBER_UNORM;
2010                 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
2011                         ntype = V_028C70_NUMBER_SRGB;
2012                 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2013                         if (desc->channel[i].pure_integer) {
2014                                 ntype = V_028C70_NUMBER_SINT;
2015                         } else {
2016                                 assert(desc->channel[i].normalized);
2017                                 ntype = V_028C70_NUMBER_SNORM;
2018                         }
2019                 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2020                         if (desc->channel[i].pure_integer) {
2021                                 ntype = V_028C70_NUMBER_UINT;
2022                         } else {
2023                                 assert(desc->channel[i].normalized);
2024                                 ntype = V_028C70_NUMBER_UNORM;
2025                         }
2026                 }
2027         }
2028
2029         format = si_translate_colorformat(surf->base.format);
2030         if (format == V_028C70_COLOR_INVALID) {
2031                 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
2032         }
2033         assert(format != V_028C70_COLOR_INVALID);
2034         swap = r600_translate_colorswap(surf->base.format, FALSE);
2035         endian = si_colorformat_endian_swap(format);
2036
2037         /* blend clamp should be set for all NORM/SRGB types */
2038         if (ntype == V_028C70_NUMBER_UNORM ||
2039             ntype == V_028C70_NUMBER_SNORM ||
2040             ntype == V_028C70_NUMBER_SRGB)
2041                 blend_clamp = 1;
2042
2043         /* set blend bypass according to docs if SINT/UINT or
2044            8/24 COLOR variants */
2045         if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2046             format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2047             format == V_028C70_COLOR_X24_8_32_FLOAT) {
2048                 blend_clamp = 0;
2049                 blend_bypass = 1;
2050         }
2051
2052         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2053             (format == V_028C70_COLOR_8 ||
2054              format == V_028C70_COLOR_8_8 ||
2055              format == V_028C70_COLOR_8_8_8_8))
2056                 surf->color_is_int8 = true;
2057
2058         color_info = S_028C70_FORMAT(format) |
2059                 S_028C70_COMP_SWAP(swap) |
2060                 S_028C70_BLEND_CLAMP(blend_clamp) |
2061                 S_028C70_BLEND_BYPASS(blend_bypass) |
2062                 S_028C70_NUMBER_TYPE(ntype) |
2063                 S_028C70_ENDIAN(endian);
2064
2065         color_pitch = S_028C64_TILE_MAX(pitch);
2066
2067         /* Intensity is implemented as Red, so treat it that way. */
2068         color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
2069                 S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
2070                                            util_format_is_intensity(surf->base.format));
2071
2072         if (rtex->resource.b.b.nr_samples > 1) {
2073                 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
2074
2075                 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2076                                 S_028C74_NUM_FRAGMENTS(log_samples);
2077
2078                 if (rtex->fmask.size) {
2079                         color_info |= S_028C70_COMPRESSION(1);
2080                         unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
2081
2082                         color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index);
2083
2084                         if (sctx->b.chip_class == SI) {
2085                                 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
2086                                 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
2087                         }
2088                         if (sctx->b.chip_class >= CIK) {
2089                                 color_pitch |= S_028C64_FMASK_TILE_MAX(rtex->fmask.pitch_in_pixels / 8 - 1);
2090                         }
2091                 }
2092         }
2093
2094         offset += rtex->resource.gpu_address;
2095
2096         surf->cb_color_base = offset >> 8;
2097         surf->cb_color_pitch = color_pitch;
2098         surf->cb_color_slice = S_028C68_TILE_MAX(slice);
2099         surf->cb_color_view = color_view;
2100         surf->cb_color_info = color_info;
2101         surf->cb_color_attrib = color_attrib;
2102
2103         if (sctx->b.chip_class >= VI && rtex->dcc_offset) {
2104                 unsigned max_uncompressed_block_size = 2;
2105
2106                 if (rtex->surface.nsamples > 1) {
2107                         if (rtex->surface.bpe == 1)
2108                                 max_uncompressed_block_size = 0;
2109                         else if (rtex->surface.bpe == 2)
2110                                 max_uncompressed_block_size = 1;
2111                 }
2112
2113                 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2114                                        S_028C78_INDEPENDENT_64B_BLOCKS(1);
2115                 surf->cb_dcc_base = (rtex->resource.gpu_address +
2116                                      rtex->dcc_offset +
2117                                      rtex->surface.level[level].dcc_offset) >> 8;
2118         }
2119
2120         if (rtex->fmask.size) {
2121                 surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
2122                 surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
2123         } else {
2124                 /* This must be set for fast clear to work without FMASK. */
2125                 surf->cb_color_fmask = surf->cb_color_base;
2126                 surf->cb_color_fmask_slice = surf->cb_color_slice;
2127                 surf->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2128
2129                 if (sctx->b.chip_class == SI) {
2130                         unsigned bankh = util_logbase2(rtex->surface.bankh);
2131                         surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2132                 }
2133
2134                 if (sctx->b.chip_class >= CIK) {
2135                         surf->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch);
2136                 }
2137         }
2138
2139         /* Determine pixel shader export format */
2140         si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
2141
2142         surf->color_initialized = true;
2143 }
2144
2145 static void si_init_depth_surface(struct si_context *sctx,
2146                                   struct r600_surface *surf)
2147 {
2148         struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
2149         unsigned level = surf->base.u.tex.level;
2150         struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
2151         unsigned format;
2152         uint32_t z_info, s_info, db_depth_info;
2153         uint64_t z_offs, s_offs;
2154         uint32_t db_htile_data_base, db_htile_surface, pa_su_poly_offset_db_fmt_cntl = 0;
2155
2156         switch (sctx->framebuffer.state.zsbuf->texture->format) {
2157         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2158         case PIPE_FORMAT_X8Z24_UNORM:
2159         case PIPE_FORMAT_Z24X8_UNORM:
2160         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2161                 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2162                 break;
2163         case PIPE_FORMAT_Z32_FLOAT:
2164         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2165                 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2166                                                 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2167                 break;
2168         case PIPE_FORMAT_Z16_UNORM:
2169                 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2170                 break;
2171         default:
2172                 assert(0);
2173         }
2174
2175         format = si_translate_dbformat(rtex->resource.b.b.format);
2176
2177         if (format == V_028040_Z_INVALID) {
2178                 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
2179         }
2180         assert(format != V_028040_Z_INVALID);
2181
2182         s_offs = z_offs = rtex->resource.gpu_address;
2183         z_offs += rtex->surface.level[level].offset;
2184         s_offs += rtex->surface.stencil_level[level].offset;
2185
2186         db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2187
2188         z_info = S_028040_FORMAT(format);
2189         if (rtex->resource.b.b.nr_samples > 1) {
2190                 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
2191         }
2192
2193         if (rtex->surface.flags & RADEON_SURF_SBUFFER)
2194                 s_info = S_028044_FORMAT(V_028044_STENCIL_8);
2195         else
2196                 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2197
2198         if (sctx->b.chip_class >= CIK) {
2199                 struct radeon_info *info = &sctx->screen->b.info;
2200                 unsigned index = rtex->surface.tiling_index[level];
2201                 unsigned stencil_index = rtex->surface.stencil_tiling_index[level];
2202                 unsigned macro_index = rtex->surface.macro_tile_index;
2203                 unsigned tile_mode = info->si_tile_mode_array[index];
2204                 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2205                 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2206
2207                 db_depth_info |=
2208                         S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2209                         S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2210                         S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2211                         S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2212                         S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2213                         S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2214                 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2215                 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2216         } else {
2217                 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false);
2218                 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2219                 tile_mode_index = si_tile_mode_index(rtex, level, true);
2220                 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2221         }
2222
2223         /* HiZ aka depth buffer htile */
2224         /* use htile only for first level */
2225         if (rtex->htile_buffer && !level) {
2226                 z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2227                           S_028040_ALLOW_EXPCLEAR(1);
2228
2229                 if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
2230                         /* Workaround: For a not yet understood reason, the
2231                          * combination of MSAA, fast stencil clear and stencil
2232                          * decompress messes with subsequent stencil buffer
2233                          * uses. Problem was reproduced on Verde, Bonaire,
2234                          * Tonga, and Carrizo.
2235                          *
2236                          * Disabling EXPCLEAR works around the problem.
2237                          *
2238                          * Check piglit's arb_texture_multisample-stencil-clear
2239                          * test if you want to try changing this.
2240                          */
2241                         if (rtex->resource.b.b.nr_samples <= 1)
2242                                 s_info |= S_028044_ALLOW_EXPCLEAR(1);
2243                 } else
2244                         /* Use all of the htile_buffer for depth if there's no stencil. */
2245                         s_info |= S_028044_TILE_STENCIL_DISABLE(1);
2246
2247                 uint64_t va = rtex->htile_buffer->gpu_address;
2248                 db_htile_data_base = va >> 8;
2249                 db_htile_surface = S_028ABC_FULL_CACHE(1);
2250         } else {
2251                 db_htile_data_base = 0;
2252                 db_htile_surface = 0;
2253         }
2254
2255         assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
2256
2257         surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
2258                               S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
2259         surf->db_htile_data_base = db_htile_data_base;
2260         surf->db_depth_info = db_depth_info;
2261         surf->db_z_info = z_info;
2262         surf->db_stencil_info = s_info;
2263         surf->db_depth_base = z_offs >> 8;
2264         surf->db_stencil_base = s_offs >> 8;
2265         surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
2266                               S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
2267         surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
2268                                                         levelinfo->nblk_y) / 64 - 1);
2269         surf->db_htile_surface = db_htile_surface;
2270         surf->pa_su_poly_offset_db_fmt_cntl = pa_su_poly_offset_db_fmt_cntl;
2271
2272         surf->depth_initialized = true;
2273 }
2274
2275 static void si_set_framebuffer_state(struct pipe_context *ctx,
2276                                      const struct pipe_framebuffer_state *state)
2277 {
2278         struct si_context *sctx = (struct si_context *)ctx;
2279         struct pipe_constant_buffer constbuf = {0};
2280         struct r600_surface *surf = NULL;
2281         struct r600_texture *rtex;
2282         bool old_cb0_is_integer = sctx->framebuffer.cb0_is_integer;
2283         unsigned old_nr_samples = sctx->framebuffer.nr_samples;
2284         int i;
2285
2286         /* Only flush TC when changing the framebuffer state, because
2287          * the only client not using TC that can change textures is
2288          * the framebuffer.
2289          *
2290          * Flush all CB and DB caches here because all buffers can be used
2291          * for write by both TC (with shader image stores) and CB/DB.
2292          */
2293         sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
2294                          SI_CONTEXT_INV_GLOBAL_L2 |
2295                          SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
2296                          SI_CONTEXT_CS_PARTIAL_FLUSH;
2297
2298         /* Take the maximum of the old and new count. If the new count is lower,
2299          * dirtying is needed to disable the unbound colorbuffers.
2300          */
2301         sctx->framebuffer.dirty_cbufs |=
2302                 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
2303         sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
2304
2305         util_copy_framebuffer_state(&sctx->framebuffer.state, state);
2306
2307         sctx->framebuffer.spi_shader_col_format = 0;
2308         sctx->framebuffer.spi_shader_col_format_alpha = 0;
2309         sctx->framebuffer.spi_shader_col_format_blend = 0;
2310         sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
2311         sctx->framebuffer.color_is_int8 = 0;
2312
2313         sctx->framebuffer.compressed_cb_mask = 0;
2314         sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
2315         sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
2316         sctx->framebuffer.cb0_is_integer = state->nr_cbufs && state->cbufs[0] &&
2317                                   util_format_is_pure_integer(state->cbufs[0]->format);
2318
2319         if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
2320                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
2321
2322         for (i = 0; i < state->nr_cbufs; i++) {
2323                 if (!state->cbufs[i])
2324                         continue;
2325
2326                 surf = (struct r600_surface*)state->cbufs[i];
2327                 rtex = (struct r600_texture*)surf->base.texture;
2328
2329                 if (!surf->color_initialized) {
2330                         si_initialize_color_surface(sctx, surf);
2331                 }
2332
2333                 sctx->framebuffer.spi_shader_col_format |=
2334                         surf->spi_shader_col_format << (i * 4);
2335                 sctx->framebuffer.spi_shader_col_format_alpha |=
2336                         surf->spi_shader_col_format_alpha << (i * 4);
2337                 sctx->framebuffer.spi_shader_col_format_blend |=
2338                         surf->spi_shader_col_format_blend << (i * 4);
2339                 sctx->framebuffer.spi_shader_col_format_blend_alpha |=
2340                         surf->spi_shader_col_format_blend_alpha << (i * 4);
2341
2342                 if (surf->color_is_int8)
2343                         sctx->framebuffer.color_is_int8 |= 1 << i;
2344
2345                 if (rtex->fmask.size && rtex->cmask.size) {
2346                         sctx->framebuffer.compressed_cb_mask |= 1 << i;
2347                 }
2348                 r600_context_add_resource_size(ctx, surf->base.texture);
2349         }
2350         /* Set the second SPI format for possible dual-src blending. */
2351         if (i == 1 && surf) {
2352                 sctx->framebuffer.spi_shader_col_format |=
2353                         surf->spi_shader_col_format << (i * 4);
2354                 sctx->framebuffer.spi_shader_col_format_alpha |=
2355                         surf->spi_shader_col_format_alpha << (i * 4);
2356                 sctx->framebuffer.spi_shader_col_format_blend |=
2357                         surf->spi_shader_col_format_blend << (i * 4);
2358                 sctx->framebuffer.spi_shader_col_format_blend_alpha |=
2359                         surf->spi_shader_col_format_blend_alpha << (i * 4);
2360         }
2361
2362         if (state->zsbuf) {
2363                 surf = (struct r600_surface*)state->zsbuf;
2364
2365                 if (!surf->depth_initialized) {
2366                         si_init_depth_surface(sctx, surf);
2367                 }
2368                 r600_context_add_resource_size(ctx, surf->base.texture);
2369         }
2370
2371         si_update_poly_offset_state(sctx);
2372         si_mark_atom_dirty(sctx, &sctx->cb_render_state);
2373         si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
2374
2375         if (sctx->framebuffer.nr_samples != old_nr_samples) {
2376                 si_mark_atom_dirty(sctx, &sctx->msaa_config);
2377                 si_mark_atom_dirty(sctx, &sctx->db_render_state);
2378
2379                 /* Set sample locations as fragment shader constants. */
2380                 switch (sctx->framebuffer.nr_samples) {
2381                 case 1:
2382                         constbuf.user_buffer = sctx->b.sample_locations_1x;
2383                         break;
2384                 case 2:
2385                         constbuf.user_buffer = sctx->b.sample_locations_2x;
2386                         break;
2387                 case 4:
2388                         constbuf.user_buffer = sctx->b.sample_locations_4x;
2389                         break;
2390                 case 8:
2391                         constbuf.user_buffer = sctx->b.sample_locations_8x;
2392                         break;
2393                 case 16:
2394                         constbuf.user_buffer = sctx->b.sample_locations_16x;
2395                         break;
2396                 default:
2397                         R600_ERR("Requested an invalid number of samples %i.\n",
2398                                  sctx->framebuffer.nr_samples);
2399                         assert(0);
2400                 }
2401                 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
2402                 si_set_constant_buffer(sctx, &sctx->rw_buffers,
2403                                        SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
2404
2405                 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
2406         }
2407 }
2408
2409 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
2410 {
2411         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2412         struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
2413         unsigned i, nr_cbufs = state->nr_cbufs;
2414         struct r600_texture *tex = NULL;
2415         struct r600_surface *cb = NULL;
2416
2417         /* Colorbuffers. */
2418         for (i = 0; i < nr_cbufs; i++) {
2419                 if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
2420                         continue;
2421
2422                 cb = (struct r600_surface*)state->cbufs[i];
2423                 if (!cb) {
2424                         radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
2425                                                S_028C70_FORMAT(V_028C70_COLOR_INVALID));
2426                         continue;
2427                 }
2428
2429                 tex = (struct r600_texture *)cb->base.texture;
2430                 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2431                                       &tex->resource, RADEON_USAGE_READWRITE,
2432                                       tex->surface.nsamples > 1 ?
2433                                               RADEON_PRIO_COLOR_BUFFER_MSAA :
2434                                               RADEON_PRIO_COLOR_BUFFER);
2435
2436                 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
2437                         radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2438                                 tex->cmask_buffer, RADEON_USAGE_READWRITE,
2439                                 RADEON_PRIO_CMASK);
2440                 }
2441
2442                 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
2443                                            sctx->b.chip_class >= VI ? 14 : 13);
2444                 radeon_emit(cs, cb->cb_color_base);     /* R_028C60_CB_COLOR0_BASE */
2445                 radeon_emit(cs, cb->cb_color_pitch);    /* R_028C64_CB_COLOR0_PITCH */
2446                 radeon_emit(cs, cb->cb_color_slice);    /* R_028C68_CB_COLOR0_SLICE */
2447                 radeon_emit(cs, cb->cb_color_view);     /* R_028C6C_CB_COLOR0_VIEW */
2448                 radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
2449                 radeon_emit(cs, cb->cb_color_attrib);   /* R_028C74_CB_COLOR0_ATTRIB */
2450                 radeon_emit(cs, cb->cb_dcc_control);    /* R_028C78_CB_COLOR0_DCC_CONTROL */
2451                 radeon_emit(cs, tex->cmask.base_address_reg);   /* R_028C7C_CB_COLOR0_CMASK */
2452                 radeon_emit(cs, tex->cmask.slice_tile_max);     /* R_028C80_CB_COLOR0_CMASK_SLICE */
2453                 radeon_emit(cs, cb->cb_color_fmask);            /* R_028C84_CB_COLOR0_FMASK */
2454                 radeon_emit(cs, cb->cb_color_fmask_slice);      /* R_028C88_CB_COLOR0_FMASK_SLICE */
2455                 radeon_emit(cs, tex->color_clear_value[0]);     /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
2456                 radeon_emit(cs, tex->color_clear_value[1]);     /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
2457
2458                 if (sctx->b.chip_class >= VI)
2459                         radeon_emit(cs, cb->cb_dcc_base);       /* R_028C94_CB_COLOR0_DCC_BASE */
2460         }
2461         /* set CB_COLOR1_INFO for possible dual-src blending */
2462         if (i == 1 && state->cbufs[0] &&
2463             sctx->framebuffer.dirty_cbufs & (1 << 0)) {
2464                 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
2465                                        cb->cb_color_info | tex->cb_color_info);
2466                 i++;
2467         }
2468         for (; i < 8 ; i++)
2469                 if (sctx->framebuffer.dirty_cbufs & (1 << i))
2470                         radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
2471
2472         /* ZS buffer. */
2473         if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
2474                 struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
2475                 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
2476
2477                 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2478                                       &rtex->resource, RADEON_USAGE_READWRITE,
2479                                       zb->base.texture->nr_samples > 1 ?
2480                                               RADEON_PRIO_DEPTH_BUFFER_MSAA :
2481                                               RADEON_PRIO_DEPTH_BUFFER);
2482
2483                 if (zb->db_htile_data_base) {
2484                         radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
2485                                               rtex->htile_buffer, RADEON_USAGE_READWRITE,
2486                                               RADEON_PRIO_HTILE);
2487                 }
2488
2489                 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
2490                 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
2491
2492                 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
2493                 radeon_emit(cs, zb->db_depth_info);     /* R_02803C_DB_DEPTH_INFO */
2494                 radeon_emit(cs, zb->db_z_info |         /* R_028040_DB_Z_INFO */
2495                             S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
2496                 radeon_emit(cs, zb->db_stencil_info);   /* R_028044_DB_STENCIL_INFO */
2497                 radeon_emit(cs, zb->db_depth_base);     /* R_028048_DB_Z_READ_BASE */
2498                 radeon_emit(cs, zb->db_stencil_base);   /* R_02804C_DB_STENCIL_READ_BASE */
2499                 radeon_emit(cs, zb->db_depth_base);     /* R_028050_DB_Z_WRITE_BASE */
2500                 radeon_emit(cs, zb->db_stencil_base);   /* R_028054_DB_STENCIL_WRITE_BASE */
2501                 radeon_emit(cs, zb->db_depth_size);     /* R_028058_DB_DEPTH_SIZE */
2502                 radeon_emit(cs, zb->db_depth_slice);    /* R_02805C_DB_DEPTH_SLICE */
2503
2504                 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
2505                 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
2506                 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
2507
2508                 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
2509                 radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
2510                                        zb->pa_su_poly_offset_db_fmt_cntl);
2511         } else if (sctx->framebuffer.dirty_zsbuf) {
2512                 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
2513                 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
2514                 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
2515         }
2516
2517         /* Framebuffer dimensions. */
2518         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
2519         radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2520                                S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
2521
2522         sctx->framebuffer.dirty_cbufs = 0;
2523         sctx->framebuffer.dirty_zsbuf = false;
2524 }
2525
2526 static void si_emit_msaa_sample_locs(struct si_context *sctx,
2527                                      struct r600_atom *atom)
2528 {
2529         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2530         unsigned nr_samples = sctx->framebuffer.nr_samples;
2531
2532         /* Smoothing (only possible with nr_samples == 1) uses the same
2533          * sample locations as the MSAA it simulates.
2534          */
2535         if (nr_samples <= 1 && sctx->smoothing_enabled)
2536                 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
2537
2538         /* On Polaris, the small primitive filter uses the sample locations
2539          * even when MSAA is off, so we need to make sure they're set to 0.
2540          */
2541         if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
2542             (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
2543                 sctx->msaa_sample_locs.nr_samples = nr_samples;
2544                 cayman_emit_msaa_sample_locs(cs, nr_samples);
2545         }
2546
2547         if (sctx->b.family >= CHIP_POLARIS10) {
2548                 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
2549                 unsigned small_prim_filter_cntl =
2550                         S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
2551                         S_028830_LINE_FILTER_DISABLE(1); /* line bug */
2552
2553                 /* The alternative of setting sample locations to 0 would
2554                  * require a DB flush to avoid Z errors, see
2555                  * https://bugs.freedesktop.org/show_bug.cgi?id=96908
2556                  */
2557                 if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
2558                         small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
2559
2560                 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
2561                                        small_prim_filter_cntl);
2562         }
2563 }
2564
2565 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
2566 {
2567         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
2568
2569         cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
2570                                 sctx->ps_iter_samples,
2571                                 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0);
2572 }
2573
2574
2575 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
2576 {
2577         struct si_context *sctx = (struct si_context *)ctx;
2578
2579         if (sctx->ps_iter_samples == min_samples)
2580                 return;
2581
2582         sctx->ps_iter_samples = min_samples;
2583
2584         if (sctx->framebuffer.nr_samples > 1)
2585                 si_mark_atom_dirty(sctx, &sctx->msaa_config);
2586 }
2587
2588 /*
2589  * Samplers
2590  */
2591
2592 /**
2593  * Build the sampler view descriptor for a buffer texture.
2594  * @param state 256-bit descriptor; only the high 128 bits are filled in
2595  */
2596 void
2597 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
2598                           enum pipe_format format,
2599                           unsigned first_element, unsigned last_element,
2600                           uint32_t *state)
2601 {
2602         const struct util_format_description *desc;
2603         int first_non_void;
2604         uint64_t va;
2605         unsigned stride;
2606         unsigned num_records;
2607         unsigned num_format, data_format;
2608
2609         desc = util_format_description(format);
2610         first_non_void = util_format_get_first_non_void_channel(format);
2611         stride = desc->block.bits / 8;
2612         va = buf->gpu_address + first_element * stride;
2613         num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
2614         data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
2615
2616         num_records = last_element + 1 - first_element;
2617         num_records = MIN2(num_records, buf->b.b.width0 / stride);
2618
2619         if (screen->b.chip_class >= VI)
2620                 num_records *= stride;
2621
2622         state[4] = va;
2623         state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
2624                    S_008F04_STRIDE(stride);
2625         state[6] = num_records;
2626         state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
2627                    S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
2628                    S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
2629                    S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
2630                    S_008F0C_NUM_FORMAT(num_format) |
2631                    S_008F0C_DATA_FORMAT(data_format);
2632 }
2633
2634 /**
2635  * Build the sampler view descriptor for a texture.
2636  */
2637 void
2638 si_make_texture_descriptor(struct si_screen *screen,
2639                            struct r600_texture *tex,
2640                            bool sampler,
2641                            enum pipe_texture_target target,
2642                            enum pipe_format pipe_format,
2643                            const unsigned char state_swizzle[4],
2644                            unsigned base_level, unsigned first_level, unsigned last_level,
2645                            unsigned first_layer, unsigned last_layer,
2646                            unsigned width, unsigned height, unsigned depth,
2647                            uint32_t *state,
2648                            uint32_t *fmask_state)
2649 {
2650         struct pipe_resource *res = &tex->resource.b.b;
2651         const struct radeon_surf_level *surflevel = tex->surface.level;
2652         const struct util_format_description *desc;
2653         unsigned char swizzle[4];
2654         int first_non_void;
2655         unsigned num_format, data_format, type;
2656         uint32_t pitch;
2657         uint64_t va;
2658
2659         /* Texturing with separate depth and stencil. */
2660         if (tex->is_depth && !tex->is_flushing_texture) {
2661                 switch (pipe_format) {
2662                 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2663                         pipe_format = PIPE_FORMAT_Z32_FLOAT;
2664                         break;
2665                 case PIPE_FORMAT_X8Z24_UNORM:
2666                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2667                         /* Z24 is always stored like this. */
2668                         pipe_format = PIPE_FORMAT_Z24X8_UNORM;
2669                         break;
2670                 case PIPE_FORMAT_X24S8_UINT:
2671                 case PIPE_FORMAT_S8X24_UINT:
2672                 case PIPE_FORMAT_X32_S8X24_UINT:
2673                         pipe_format = PIPE_FORMAT_S8_UINT;
2674                         surflevel = tex->surface.stencil_level;
2675                         break;
2676                 default:;
2677                 }
2678         }
2679
2680         desc = util_format_description(pipe_format);
2681
2682         if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
2683                 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
2684                 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
2685
2686                 switch (pipe_format) {
2687                 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2688                 case PIPE_FORMAT_X24S8_UINT:
2689                 case PIPE_FORMAT_X32_S8X24_UINT:
2690                 case PIPE_FORMAT_X8Z24_UNORM:
2691                         util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
2692                         break;
2693                 default:
2694                         util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
2695                 }
2696         } else {
2697                 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
2698         }
2699
2700         first_non_void = util_format_get_first_non_void_channel(pipe_format);
2701
2702         switch (pipe_format) {
2703         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
2704                 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2705                 break;
2706         default:
2707                 if (first_non_void < 0) {
2708                         if (util_format_is_compressed(pipe_format)) {
2709                                 switch (pipe_format) {
2710                                 case PIPE_FORMAT_DXT1_SRGB:
2711                                 case PIPE_FORMAT_DXT1_SRGBA:
2712                                 case PIPE_FORMAT_DXT3_SRGBA:
2713                                 case PIPE_FORMAT_DXT5_SRGBA:
2714                                 case PIPE_FORMAT_BPTC_SRGBA:
2715                                 case PIPE_FORMAT_ETC2_SRGB8:
2716                                 case PIPE_FORMAT_ETC2_SRGB8A1:
2717                                 case PIPE_FORMAT_ETC2_SRGBA8:
2718                                         num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2719                                         break;
2720                                 case PIPE_FORMAT_RGTC1_SNORM:
2721                                 case PIPE_FORMAT_LATC1_SNORM:
2722                                 case PIPE_FORMAT_RGTC2_SNORM:
2723                                 case PIPE_FORMAT_LATC2_SNORM:
2724                                 case PIPE_FORMAT_ETC2_R11_SNORM:
2725                                 case PIPE_FORMAT_ETC2_RG11_SNORM:
2726                                 /* implies float, so use SNORM/UNORM to determine
2727                                    whether data is signed or not */
2728                                 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2729                                         num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2730                                         break;
2731                                 default:
2732                                         num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2733                                         break;
2734                                 }
2735                         } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
2736                                 num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2737                         } else {
2738                                 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2739                         }
2740                 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
2741                         num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
2742                 } else {
2743                         num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2744
2745                         switch (desc->channel[first_non_void].type) {
2746                         case UTIL_FORMAT_TYPE_FLOAT:
2747                                 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
2748                                 break;
2749                         case UTIL_FORMAT_TYPE_SIGNED:
2750                                 if (desc->channel[first_non_void].normalized)
2751                                         num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
2752                                 else if (desc->channel[first_non_void].pure_integer)
2753                                         num_format = V_008F14_IMG_NUM_FORMAT_SINT;
2754                                 else
2755                                         num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
2756                                 break;
2757                         case UTIL_FORMAT_TYPE_UNSIGNED:
2758                                 if (desc->channel[first_non_void].normalized)
2759                                         num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
2760                                 else if (desc->channel[first_non_void].pure_integer)
2761                                         num_format = V_008F14_IMG_NUM_FORMAT_UINT;
2762                                 else
2763                                         num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
2764                         }
2765                 }
2766         }
2767
2768         data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void);
2769         if (data_format == ~0) {
2770                 data_format = 0;
2771         }
2772
2773         if (!sampler &&
2774             (res->target == PIPE_TEXTURE_CUBE ||
2775              res->target == PIPE_TEXTURE_CUBE_ARRAY ||
2776              res->target == PIPE_TEXTURE_3D)) {
2777                 /* For the purpose of shader images, treat cube maps and 3D
2778                  * textures as 2D arrays. For 3D textures, the address
2779                  * calculations for mipmaps are different, so we rely on the
2780                  * caller to effectively disable mipmaps.
2781                  */
2782                 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
2783
2784                 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
2785         } else {
2786                 type = si_tex_dim(res->target, target, res->nr_samples);
2787         }
2788
2789         if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
2790                 height = 1;
2791                 depth = res->array_size;
2792         } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
2793                    type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2794                 if (sampler || res->target != PIPE_TEXTURE_3D)
2795                         depth = res->array_size;
2796         } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
2797                 depth = res->array_size / 6;
2798
2799         pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
2800         va = tex->resource.gpu_address + surflevel[base_level].offset;
2801
2802         state[0] = va >> 8;
2803         state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
2804                     S_008F14_DATA_FORMAT(data_format) |
2805                     S_008F14_NUM_FORMAT(num_format));
2806         state[2] = (S_008F18_WIDTH(width - 1) |
2807                     S_008F18_HEIGHT(height - 1));
2808         state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
2809                     S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
2810                     S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
2811                     S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
2812                     S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
2813                                         0 : first_level) |
2814                     S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
2815                                         util_logbase2(res->nr_samples) :
2816                                         last_level) |
2817                     S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level, false)) |
2818                     S_008F1C_POW2_PAD(res->last_level > 0) |
2819                     S_008F1C_TYPE(type));
2820         state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
2821         state[5] = (S_008F24_BASE_ARRAY(first_layer) |
2822                     S_008F24_LAST_ARRAY(last_layer));
2823
2824         if (tex->dcc_offset) {
2825                 unsigned swap = r600_translate_colorswap(pipe_format, FALSE);
2826
2827                 state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
2828                 state[7] = (tex->resource.gpu_address +
2829                             tex->dcc_offset +
2830                             surflevel[base_level].dcc_offset) >> 8;
2831         } else {
2832                 state[6] = 0;
2833                 state[7] = 0;
2834
2835                 /* The last dword is unused by hw. The shader uses it to clear
2836                  * bits in the first dword of sampler state.
2837                  */
2838                 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
2839                         if (first_level == last_level)
2840                                 state[7] = C_008F30_MAX_ANISO_RATIO;
2841                         else
2842                                 state[7] = 0xffffffff;
2843                 }
2844         }
2845
2846         /* Initialize the sampler view for FMASK. */
2847         if (tex->fmask.size) {
2848                 uint32_t fmask_format;
2849
2850                 va = tex->resource.gpu_address + tex->fmask.offset;
2851
2852                 switch (res->nr_samples) {
2853                 case 2:
2854                         fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
2855                         break;
2856                 case 4:
2857                         fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
2858                         break;
2859                 case 8:
2860                         fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
2861                         break;
2862                 default:
2863                         assert(0);
2864                         fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
2865                 }
2866
2867                 fmask_state[0] = va >> 8;
2868                 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
2869                                  S_008F14_DATA_FORMAT(fmask_format) |
2870                                  S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
2871                 fmask_state[2] = S_008F18_WIDTH(width - 1) |
2872                                  S_008F18_HEIGHT(height - 1);
2873                 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
2874                                  S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
2875                                  S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
2876                                  S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
2877                                  S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) |
2878                                  S_008F1C_TYPE(si_tex_dim(res->target, target, 0));
2879                 fmask_state[4] = S_008F20_DEPTH(depth - 1) |
2880                                  S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1);
2881                 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
2882                                  S_008F24_LAST_ARRAY(last_layer);
2883                 fmask_state[6] = 0;
2884                 fmask_state[7] = 0;
2885         }
2886 }
2887
2888 /**
2889  * Create a sampler view.
2890  *
2891  * @param ctx           context
2892  * @param texture       texture
2893  * @param state         sampler view template
2894  * @param width0        width0 override (for compressed textures as int)
2895  * @param height0       height0 override (for compressed textures as int)
2896  * @param force_level   set the base address to the level (for compressed textures)
2897  */
2898 struct pipe_sampler_view *
2899 si_create_sampler_view_custom(struct pipe_context *ctx,
2900                               struct pipe_resource *texture,
2901                               const struct pipe_sampler_view *state,
2902                               unsigned width0, unsigned height0,
2903                               unsigned force_level)
2904 {
2905         struct si_context *sctx = (struct si_context*)ctx;
2906         struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
2907         struct r600_texture *tmp = (struct r600_texture*)texture;
2908         unsigned base_level, first_level, last_level;
2909         unsigned char state_swizzle[4];
2910         unsigned height, depth, width;
2911         unsigned last_layer = state->u.tex.last_layer;
2912
2913         if (!view)
2914                 return NULL;
2915
2916         /* initialize base object */
2917         view->base = *state;
2918         view->base.texture = NULL;
2919         view->base.reference.count = 1;
2920         view->base.context = ctx;
2921
2922         /* NULL resource, obey swizzle (only ZERO and ONE make sense). */
2923         if (!texture) {
2924                 view->state[3] = S_008F1C_DST_SEL_X(si_map_swizzle(state->swizzle_r)) |
2925                                  S_008F1C_DST_SEL_Y(si_map_swizzle(state->swizzle_g)) |
2926                                  S_008F1C_DST_SEL_Z(si_map_swizzle(state->swizzle_b)) |
2927                                  S_008F1C_DST_SEL_W(si_map_swizzle(state->swizzle_a)) |
2928                                  S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D);
2929                 return &view->base;
2930         }
2931
2932         pipe_resource_reference(&view->base.texture, texture);
2933
2934         if (state->format == PIPE_FORMAT_X24S8_UINT ||
2935             state->format == PIPE_FORMAT_S8X24_UINT ||
2936             state->format == PIPE_FORMAT_X32_S8X24_UINT ||
2937             state->format == PIPE_FORMAT_S8_UINT)
2938                 view->is_stencil_sampler = true;
2939
2940         /* Buffer resource. */
2941         if (texture->target == PIPE_BUFFER) {
2942                 si_make_buffer_descriptor(sctx->screen,
2943                                           (struct r600_resource *)texture,
2944                                           state->format,
2945                                           state->u.buf.first_element,
2946                                           state->u.buf.last_element,
2947                                           view->state);
2948
2949                 LIST_ADDTAIL(&view->list, &sctx->b.texture_buffers);
2950                 return &view->base;
2951         }
2952
2953         state_swizzle[0] = state->swizzle_r;
2954         state_swizzle[1] = state->swizzle_g;
2955         state_swizzle[2] = state->swizzle_b;
2956         state_swizzle[3] = state->swizzle_a;
2957
2958         base_level = 0;
2959         first_level = state->u.tex.first_level;
2960         last_level = state->u.tex.last_level;
2961         width = width0;
2962         height = height0;
2963         depth = texture->depth0;
2964
2965         if (force_level) {
2966                 assert(force_level == first_level &&
2967                        force_level == last_level);
2968                 base_level = force_level;
2969                 first_level = 0;
2970                 last_level = 0;
2971                 width = u_minify(width, force_level);
2972                 height = u_minify(height, force_level);
2973                 depth = u_minify(depth, force_level);
2974         }
2975
2976         /* This is not needed if state trackers set last_layer correctly. */
2977         if (state->target == PIPE_TEXTURE_1D ||
2978             state->target == PIPE_TEXTURE_2D ||
2979             state->target == PIPE_TEXTURE_RECT ||
2980             state->target == PIPE_TEXTURE_CUBE)
2981                 last_layer = state->u.tex.first_layer;
2982
2983         si_make_texture_descriptor(sctx->screen, tmp, true, state->target,
2984                                    state->format, state_swizzle,
2985                                    base_level, first_level, last_level,
2986                                    state->u.tex.first_layer, last_layer,
2987                                    width, height, depth,
2988                                    view->state, view->fmask_state);
2989
2990         return &view->base;
2991 }
2992
2993 static struct pipe_sampler_view *
2994 si_create_sampler_view(struct pipe_context *ctx,
2995                        struct pipe_resource *texture,
2996                        const struct pipe_sampler_view *state)
2997 {
2998         return si_create_sampler_view_custom(ctx, texture, state,
2999                                              texture ? texture->width0 : 0,
3000                                              texture ? texture->height0 : 0, 0);
3001 }
3002
3003 static void si_sampler_view_destroy(struct pipe_context *ctx,
3004                                     struct pipe_sampler_view *state)
3005 {
3006         struct si_sampler_view *view = (struct si_sampler_view *)state;
3007
3008         if (state->texture && state->texture->target == PIPE_BUFFER)
3009                 LIST_DELINIT(&view->list);
3010
3011         pipe_resource_reference(&state->texture, NULL);
3012         FREE(view);
3013 }
3014
3015 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
3016 {
3017         return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
3018                wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
3019                (linear_filter &&
3020                 (wrap == PIPE_TEX_WRAP_CLAMP ||
3021                  wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
3022 }
3023
3024 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
3025 {
3026         bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
3027                              state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
3028
3029         return (state->border_color.ui[0] || state->border_color.ui[1] ||
3030                 state->border_color.ui[2] || state->border_color.ui[3]) &&
3031                (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
3032                 wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
3033                 wrap_mode_uses_border_color(state->wrap_r, linear_filter));
3034 }
3035
3036 static void *si_create_sampler_state(struct pipe_context *ctx,
3037                                      const struct pipe_sampler_state *state)
3038 {
3039         struct si_context *sctx = (struct si_context *)ctx;
3040         struct r600_common_screen *rscreen = sctx->b.screen;
3041         struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
3042         unsigned border_color_type, border_color_index = 0;
3043         unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
3044                                                        : state->max_anisotropy;
3045         unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
3046
3047         if (!rstate) {
3048                 return NULL;
3049         }
3050
3051         if (!sampler_state_needs_border_color(state))
3052                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3053         else if (state->border_color.f[0] == 0 &&
3054                  state->border_color.f[1] == 0 &&
3055                  state->border_color.f[2] == 0 &&
3056                  state->border_color.f[3] == 0)
3057                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3058         else if (state->border_color.f[0] == 0 &&
3059                  state->border_color.f[1] == 0 &&
3060                  state->border_color.f[2] == 0 &&
3061                  state->border_color.f[3] == 1)
3062                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3063         else if (state->border_color.f[0] == 1 &&
3064                  state->border_color.f[1] == 1 &&
3065                  state->border_color.f[2] == 1 &&
3066                  state->border_color.f[3] == 1)
3067                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3068         else {
3069                 int i;
3070
3071                 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
3072
3073                 /* Check if the border has been uploaded already. */
3074                 for (i = 0; i < sctx->border_color_count; i++)
3075                         if (memcmp(&sctx->border_color_table[i], &state->border_color,
3076                                    sizeof(state->border_color)) == 0)
3077                                 break;
3078
3079                 if (i >= SI_MAX_BORDER_COLORS) {
3080                         /* Getting 4096 unique border colors is very unlikely. */
3081                         fprintf(stderr, "radeonsi: The border color table is full. "
3082                                 "Any new border colors will be just black. "
3083                                 "Please file a bug.\n");
3084                         border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3085                 } else {
3086                         if (i == sctx->border_color_count) {
3087                                 /* Upload a new border color. */
3088                                 memcpy(&sctx->border_color_table[i], &state->border_color,
3089                                        sizeof(state->border_color));
3090                                 util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
3091                                                         &state->border_color,
3092                                                         sizeof(state->border_color));
3093                                 sctx->border_color_count++;
3094                         }
3095
3096                         border_color_index = i;
3097                 }
3098         }
3099
3100         rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
3101                           S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
3102                           S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
3103                           S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3104                           S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
3105                           S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
3106                           S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
3107                           S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
3108         rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
3109                           S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
3110         rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
3111                           S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
3112                           S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
3113                           S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
3114                           S_008F38_MIP_POINT_PRECLAMP(1) |
3115                           S_008F38_DISABLE_LSB_CEIL(1) |
3116                           S_008F38_FILTER_PREC_FIX(1) |
3117                           S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
3118         rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
3119                          S_008F3C_BORDER_COLOR_TYPE(border_color_type);
3120         return rstate;
3121 }
3122
3123 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
3124 {
3125         struct si_context *sctx = (struct si_context *)ctx;
3126
3127         if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
3128                 return;
3129
3130         sctx->sample_mask.sample_mask = sample_mask;
3131         si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
3132 }
3133
3134 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
3135 {
3136         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
3137         unsigned mask = sctx->sample_mask.sample_mask;
3138
3139         radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
3140         radeon_emit(cs, mask | (mask << 16));
3141         radeon_emit(cs, mask | (mask << 16));
3142 }
3143
3144 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
3145 {
3146         free(state);
3147 }
3148
3149 /*
3150  * Vertex elements & buffers
3151  */
3152
3153 static void *si_create_vertex_elements(struct pipe_context *ctx,
3154                                        unsigned count,
3155                                        const struct pipe_vertex_element *elements)
3156 {
3157         struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
3158         int i;
3159
3160         assert(count <= SI_MAX_ATTRIBS);
3161         if (!v)
3162                 return NULL;
3163
3164         v->count = count;
3165         for (i = 0; i < count; ++i) {
3166                 const struct util_format_description *desc;
3167                 unsigned data_format, num_format;
3168                 int first_non_void;
3169
3170                 desc = util_format_description(elements[i].src_format);
3171                 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
3172                 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
3173                 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
3174
3175                 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
3176                                    S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
3177                                    S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
3178                                    S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
3179                                    S_008F0C_NUM_FORMAT(num_format) |
3180                                    S_008F0C_DATA_FORMAT(data_format);
3181                 v->format_size[i] = desc->block.bits / 8;
3182         }
3183         memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
3184
3185         return v;
3186 }
3187
3188 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
3189 {
3190         struct si_context *sctx = (struct si_context *)ctx;
3191         struct si_vertex_element *v = (struct si_vertex_element*)state;
3192
3193         sctx->vertex_elements = v;
3194         sctx->vertex_buffers_dirty = true;
3195 }
3196
3197 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
3198 {
3199         struct si_context *sctx = (struct si_context *)ctx;
3200
3201         if (sctx->vertex_elements == state)
3202                 sctx->vertex_elements = NULL;
3203         FREE(state);
3204 }
3205
3206 static void si_set_vertex_buffers(struct pipe_context *ctx,
3207                                   unsigned start_slot, unsigned count,
3208                                   const struct pipe_vertex_buffer *buffers)
3209 {
3210         struct si_context *sctx = (struct si_context *)ctx;
3211         struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
3212         int i;
3213
3214         assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
3215
3216         if (buffers) {
3217                 for (i = 0; i < count; i++) {
3218                         const struct pipe_vertex_buffer *src = buffers + i;
3219                         struct pipe_vertex_buffer *dsti = dst + i;
3220
3221                         pipe_resource_reference(&dsti->buffer, src->buffer);
3222                         dsti->buffer_offset = src->buffer_offset;
3223                         dsti->stride = src->stride;
3224                         r600_context_add_resource_size(ctx, src->buffer);
3225                 }
3226         } else {
3227                 for (i = 0; i < count; i++) {
3228                         pipe_resource_reference(&dst[i].buffer, NULL);
3229                 }
3230         }
3231         sctx->vertex_buffers_dirty = true;
3232 }
3233
3234 static void si_set_index_buffer(struct pipe_context *ctx,
3235                                 const struct pipe_index_buffer *ib)
3236 {
3237         struct si_context *sctx = (struct si_context *)ctx;
3238
3239         if (ib) {
3240                 pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
3241                 memcpy(&sctx->index_buffer, ib, sizeof(*ib));
3242                 r600_context_add_resource_size(ctx, ib->buffer);
3243         } else {
3244                 pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
3245         }
3246 }
3247
3248 /*
3249  * Misc
3250  */
3251
3252 static void si_set_tess_state(struct pipe_context *ctx,
3253                               const float default_outer_level[4],
3254                               const float default_inner_level[2])
3255 {
3256         struct si_context *sctx = (struct si_context *)ctx;
3257         struct pipe_constant_buffer cb;
3258         float array[8];
3259
3260         memcpy(array, default_outer_level, sizeof(float) * 4);
3261         memcpy(array+4, default_inner_level, sizeof(float) * 2);
3262
3263         cb.buffer = NULL;
3264         cb.user_buffer = NULL;
3265         cb.buffer_size = sizeof(array);
3266
3267         si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
3268                                (void*)array, sizeof(array),
3269                                &cb.buffer_offset);
3270
3271         si_set_constant_buffer(sctx, &sctx->rw_buffers,
3272                                SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
3273         pipe_resource_reference(&cb.buffer, NULL);
3274 }
3275
3276 static void si_texture_barrier(struct pipe_context *ctx)
3277 {
3278         struct si_context *sctx = (struct si_context *)ctx;
3279
3280         sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
3281                          SI_CONTEXT_INV_GLOBAL_L2 |
3282                          SI_CONTEXT_FLUSH_AND_INV_CB |
3283                          SI_CONTEXT_CS_PARTIAL_FLUSH;
3284 }
3285
3286 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
3287 {
3288         struct si_context *sctx = (struct si_context *)ctx;
3289
3290         /* Subsequent commands must wait for all shader invocations to
3291          * complete. */
3292         sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
3293                          SI_CONTEXT_CS_PARTIAL_FLUSH;
3294
3295         if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
3296                 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
3297                                  SI_CONTEXT_INV_VMEM_L1;
3298
3299         if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
3300                      PIPE_BARRIER_SHADER_BUFFER |
3301                      PIPE_BARRIER_TEXTURE |
3302                      PIPE_BARRIER_IMAGE |
3303                      PIPE_BARRIER_STREAMOUT_BUFFER |
3304                      PIPE_BARRIER_GLOBAL_BUFFER)) {
3305                 /* As far as I can tell, L1 contents are written back to L2
3306                  * automatically at end of shader, but the contents of other
3307                  * L1 caches might still be stale. */
3308                 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
3309         }
3310
3311         if (flags & PIPE_BARRIER_INDEX_BUFFER) {
3312                 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
3313
3314                 /* Indices are read through TC L2 since VI. */
3315                 if (sctx->screen->b.chip_class <= CIK)
3316                         sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
3317         }
3318
3319         if (flags & PIPE_BARRIER_FRAMEBUFFER)
3320                 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
3321
3322         if (flags & (PIPE_BARRIER_MAPPED_BUFFER |
3323                      PIPE_BARRIER_FRAMEBUFFER |
3324                      PIPE_BARRIER_INDIRECT_BUFFER)) {
3325                 /* Not sure if INV_GLOBAL_L2 is the best thing here.
3326                  *
3327                  * We need to make sure that TC L1 & L2 are written back to
3328                  * memory, because neither CPU accesses nor CB fetches consider
3329                  * TC, but there's no need to invalidate any TC cache lines. */
3330                 sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
3331         }
3332 }
3333
3334 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
3335 {
3336         struct pipe_blend_state blend;
3337
3338         memset(&blend, 0, sizeof(blend));
3339         blend.independent_blend_enable = true;
3340         blend.rt[0].colormask = 0xf;
3341         return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
3342 }
3343
3344 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
3345                                  bool include_draw_vbo)
3346 {
3347         si_need_cs_space((struct si_context*)ctx);
3348 }
3349
3350 static void si_init_config(struct si_context *sctx);
3351
3352 void si_init_state_functions(struct si_context *sctx)
3353 {
3354         si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
3355         si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
3356         si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
3357         si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
3358         si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
3359
3360         si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
3361         si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
3362         si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
3363         si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
3364         si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
3365         si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
3366         si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
3367         si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
3368         si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
3369         si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
3370         si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
3371
3372         sctx->b.b.create_blend_state = si_create_blend_state;
3373         sctx->b.b.bind_blend_state = si_bind_blend_state;
3374         sctx->b.b.delete_blend_state = si_delete_blend_state;
3375         sctx->b.b.set_blend_color = si_set_blend_color;
3376
3377         sctx->b.b.create_rasterizer_state = si_create_rs_state;
3378         sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
3379         sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
3380
3381         sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
3382         sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
3383         sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
3384
3385         sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
3386         sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
3387         sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
3388         sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
3389         sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
3390
3391         sctx->b.b.set_clip_state = si_set_clip_state;
3392         sctx->b.b.set_stencil_ref = si_set_stencil_ref;
3393
3394         sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
3395         sctx->b.b.get_sample_position = cayman_get_sample_position;
3396
3397         sctx->b.b.create_sampler_state = si_create_sampler_state;
3398         sctx->b.b.delete_sampler_state = si_delete_sampler_state;
3399
3400         sctx->b.b.create_sampler_view = si_create_sampler_view;
3401         sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
3402
3403         sctx->b.b.set_sample_mask = si_set_sample_mask;
3404
3405         sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
3406         sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
3407         sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
3408         sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
3409         sctx->b.b.set_index_buffer = si_set_index_buffer;
3410
3411         sctx->b.b.texture_barrier = si_texture_barrier;
3412         sctx->b.b.memory_barrier = si_memory_barrier;
3413         sctx->b.b.set_min_samples = si_set_min_samples;
3414         sctx->b.b.set_tess_state = si_set_tess_state;
3415
3416         sctx->b.b.set_active_query_state = si_set_active_query_state;
3417         sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
3418         sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
3419
3420         sctx->b.b.draw_vbo = si_draw_vbo;
3421
3422         si_init_config(sctx);
3423 }
3424
3425 static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
3426                                      struct r600_texture *rtex,
3427                                      struct radeon_bo_metadata *md)
3428 {
3429         struct si_screen *sscreen = (struct si_screen*)rscreen;
3430         struct pipe_resource *res = &rtex->resource.b.b;
3431         static const unsigned char swizzle[] = {
3432                 PIPE_SWIZZLE_X,
3433                 PIPE_SWIZZLE_Y,
3434                 PIPE_SWIZZLE_Z,
3435                 PIPE_SWIZZLE_W
3436         };
3437         uint32_t desc[8], i;
3438         bool is_array = util_resource_is_array_texture(res);
3439
3440         /* DRM 2.x.x doesn't support this. */
3441         if (rscreen->info.drm_major != 3)
3442                 return;
3443
3444         assert(rtex->fmask.size == 0);
3445
3446         /* Metadata image format format version 1:
3447          * [0] = 1 (metadata format identifier)
3448          * [1] = (VENDOR_ID << 16) | PCI_ID
3449          * [2:9] = image descriptor for the whole resource
3450          *         [2] is always 0, because the base address is cleared
3451          *         [9] is the DCC offset bits [39:8] from the beginning of
3452          *             the buffer
3453          * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
3454          */
3455
3456         md->metadata[0] = 1; /* metadata image format version 1 */
3457
3458         /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
3459         md->metadata[1] = (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
3460
3461         si_make_texture_descriptor(sscreen, rtex, true,
3462                                    res->target, res->format,
3463                                    swizzle, 0, 0, res->last_level, 0,
3464                                    is_array ? res->array_size - 1 : 0,
3465                                    res->width0, res->height0, res->depth0,
3466                                    desc, NULL);
3467
3468         /* Clear the base address and set the relative DCC offset. */
3469         desc[0] = 0;
3470         desc[1] &= C_008F14_BASE_ADDRESS_HI;
3471         desc[7] = rtex->dcc_offset >> 8;
3472
3473         /* Dwords [2:9] contain the image descriptor. */
3474         memcpy(&md->metadata[2], desc, sizeof(desc));
3475
3476         /* Dwords [10:..] contain the mipmap level offsets. */
3477         for (i = 0; i <= res->last_level; i++)
3478                 md->metadata[10+i] = rtex->surface.level[i].offset >> 8;
3479
3480         md->size_metadata = (11 + res->last_level) * 4;
3481 }
3482
3483 void si_init_screen_state_functions(struct si_screen *sscreen)
3484 {
3485         sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
3486 }
3487
3488 static void
3489 si_write_harvested_raster_configs(struct si_context *sctx,
3490                                   struct si_pm4_state *pm4,
3491                                   unsigned raster_config,
3492                                   unsigned raster_config_1)
3493 {
3494         unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
3495         unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
3496         unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3497         unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3498         unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
3499         unsigned rb_per_se = num_rb / num_se;
3500         unsigned se_mask[4];
3501         unsigned se;
3502
3503         se_mask[0] = ((1 << rb_per_se) - 1);
3504         se_mask[1] = (se_mask[0] << rb_per_se);
3505         se_mask[2] = (se_mask[1] << rb_per_se);
3506         se_mask[3] = (se_mask[2] << rb_per_se);
3507
3508         se_mask[0] &= rb_mask;
3509         se_mask[1] &= rb_mask;
3510         se_mask[2] &= rb_mask;
3511         se_mask[3] &= rb_mask;
3512
3513         assert(num_se == 1 || num_se == 2 || num_se == 4);
3514         assert(sh_per_se == 1 || sh_per_se == 2);
3515         assert(rb_per_pkr == 1 || rb_per_pkr == 2);
3516
3517         /* XXX: I can't figure out what the *_XSEL and *_YSEL
3518          * fields are for, so I'm leaving them as their default
3519          * values. */
3520
3521         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3522                              (!se_mask[2] && !se_mask[3]))) {
3523                 raster_config_1 &= C_028354_SE_PAIR_MAP;
3524
3525                 if (!se_mask[0] && !se_mask[1]) {
3526                         raster_config_1 |=
3527                                 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
3528                 } else {
3529                         raster_config_1 |=
3530                                 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
3531                 }
3532         }
3533
3534         for (se = 0; se < num_se; se++) {
3535                 unsigned raster_config_se = raster_config;
3536                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3537                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3538                 int idx = (se / 2) * 2;
3539
3540                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3541                         raster_config_se &= C_028350_SE_MAP;
3542
3543                         if (!se_mask[idx]) {
3544                                 raster_config_se |=
3545                                         S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
3546                         } else {
3547                                 raster_config_se |=
3548                                         S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
3549                         }
3550                 }
3551
3552                 pkr0_mask &= rb_mask;
3553                 pkr1_mask &= rb_mask;
3554                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3555                         raster_config_se &= C_028350_PKR_MAP;
3556
3557                         if (!pkr0_mask) {
3558                                 raster_config_se |=
3559                                         S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
3560                         } else {
3561                                 raster_config_se |=
3562                                         S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
3563                         }
3564                 }
3565
3566                 if (rb_per_se >= 2) {
3567                         unsigned rb0_mask = 1 << (se * rb_per_se);
3568                         unsigned rb1_mask = rb0_mask << 1;
3569
3570                         rb0_mask &= rb_mask;
3571                         rb1_mask &= rb_mask;
3572                         if (!rb0_mask || !rb1_mask) {
3573                                 raster_config_se &= C_028350_RB_MAP_PKR0;
3574
3575                                 if (!rb0_mask) {
3576                                         raster_config_se |=
3577                                                 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
3578                                 } else {
3579                                         raster_config_se |=
3580                                                 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
3581                                 }
3582                         }
3583
3584                         if (rb_per_se > 2) {
3585                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3586                                 rb1_mask = rb0_mask << 1;
3587                                 rb0_mask &= rb_mask;
3588                                 rb1_mask &= rb_mask;
3589                                 if (!rb0_mask || !rb1_mask) {
3590                                         raster_config_se &= C_028350_RB_MAP_PKR1;
3591
3592                                         if (!rb0_mask) {
3593                                                 raster_config_se |=
3594                                                         S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
3595                                         } else {
3596                                                 raster_config_se |=
3597                                                         S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
3598                                         }
3599                                 }
3600                         }
3601                 }
3602
3603                 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3604                 if (sctx->b.chip_class < CIK)
3605                         si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3606                                        SE_INDEX(se) | SH_BROADCAST_WRITES |
3607                                        INSTANCE_BROADCAST_WRITES);
3608                 else
3609                         si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3610                                        S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
3611                                        S_030800_INSTANCE_BROADCAST_WRITES(1));
3612                 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
3613                 if (sctx->b.chip_class >= CIK)
3614                         si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
3615         }
3616
3617         /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
3618         if (sctx->b.chip_class < CIK)
3619                 si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
3620                                SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
3621                                INSTANCE_BROADCAST_WRITES);
3622         else
3623                 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
3624                                S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
3625                                S_030800_INSTANCE_BROADCAST_WRITES(1));
3626 }
3627
3628 static void si_init_config(struct si_context *sctx)
3629 {
3630         struct si_screen *sscreen = sctx->screen;
3631         unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
3632         unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
3633         unsigned raster_config, raster_config_1;
3634         uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
3635         struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
3636         int i;
3637
3638         if (!pm4)
3639                 return;
3640
3641         si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
3642         si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
3643         si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
3644         si_pm4_cmd_end(pm4, false);
3645
3646         si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
3647         si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
3648
3649         /* FIXME calculate these values somehow ??? */
3650         si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
3651         si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
3652         si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
3653
3654         si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
3655         si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
3656
3657         si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
3658         si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
3659         if (sctx->b.chip_class < CIK)
3660                 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
3661                                S_008A14_CLIP_VTX_REORDER_ENA(1));
3662
3663         si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
3664         si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
3665
3666         si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
3667
3668         for (i = 0; i < 16; i++) {
3669                 si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
3670                 si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
3671         }
3672
3673         switch (sctx->screen->b.family) {
3674         case CHIP_TAHITI:
3675         case CHIP_PITCAIRN:
3676                 raster_config = 0x2a00126a;
3677                 raster_config_1 = 0x00000000;
3678                 break;
3679         case CHIP_VERDE:
3680                 raster_config = 0x0000124a;
3681                 raster_config_1 = 0x00000000;
3682                 break;
3683         case CHIP_OLAND:
3684                 raster_config = 0x00000082;
3685                 raster_config_1 = 0x00000000;
3686                 break;
3687         case CHIP_HAINAN:
3688                 raster_config = 0x00000000;
3689                 raster_config_1 = 0x00000000;
3690                 break;
3691         case CHIP_BONAIRE:
3692                 raster_config = 0x16000012;
3693                 raster_config_1 = 0x00000000;
3694                 break;
3695         case CHIP_HAWAII:
3696                 raster_config = 0x3a00161a;
3697                 raster_config_1 = 0x0000002e;
3698                 break;
3699         case CHIP_FIJI:
3700                 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
3701                         /* old kernels with old tiling config */
3702                         raster_config = 0x16000012;
3703                         raster_config_1 = 0x0000002a;
3704                 } else {
3705                         raster_config = 0x3a00161a;
3706                         raster_config_1 = 0x0000002e;
3707                 }
3708                 break;
3709         case CHIP_POLARIS10:
3710                 raster_config = 0x16000012;
3711                 raster_config_1 = 0x0000002a;
3712                 break;
3713         case CHIP_POLARIS11:
3714                 raster_config = 0x16000012;
3715                 raster_config_1 = 0x00000000;
3716                 break;
3717         case CHIP_TONGA:
3718                 raster_config = 0x16000012;
3719                 raster_config_1 = 0x0000002a;
3720                 break;
3721         case CHIP_ICELAND:
3722                 if (num_rb == 1)
3723                         raster_config = 0x00000000;
3724                 else
3725                         raster_config = 0x00000002;
3726                 raster_config_1 = 0x00000000;
3727                 break;
3728         case CHIP_CARRIZO:
3729                 raster_config = 0x00000002;
3730                 raster_config_1 = 0x00000000;
3731                 break;
3732         case CHIP_KAVERI:
3733                 /* KV should be 0x00000002, but that causes problems with radeon */
3734                 raster_config = 0x00000000; /* 0x00000002 */
3735                 raster_config_1 = 0x00000000;
3736                 break;
3737         case CHIP_KABINI:
3738         case CHIP_MULLINS:
3739         case CHIP_STONEY:
3740                 raster_config = 0x00000000;
3741                 raster_config_1 = 0x00000000;
3742                 break;
3743         default:
3744                 fprintf(stderr,
3745                         "radeonsi: Unknown GPU, using 0 for raster_config\n");
3746                 raster_config = 0x00000000;
3747                 raster_config_1 = 0x00000000;
3748                 break;
3749         }
3750
3751         /* Always use the default config when all backends are enabled
3752          * (or when we failed to determine the enabled backends).
3753          */
3754         if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
3755                 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
3756                                raster_config);
3757                 if (sctx->b.chip_class >= CIK)
3758                         si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
3759                                        raster_config_1);
3760         } else {
3761                 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
3762         }
3763
3764         si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
3765         si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
3766         si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
3767                        S_028244_BR_X(16384) | S_028244_BR_Y(16384));
3768         si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
3769         si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
3770                        S_028034_BR_X(16384) | S_028034_BR_Y(16384));
3771
3772         si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
3773         si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
3774         /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
3775         si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
3776         si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
3777         si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
3778         si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
3779         si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
3780         si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
3781                        S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
3782                        S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
3783
3784         si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
3785         si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
3786         si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
3787
3788         if (sctx->b.chip_class >= CIK) {
3789                 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
3790                 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
3791                 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
3792
3793                 if (sscreen->b.info.num_good_compute_units /
3794                     (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
3795                         /* Too few available compute units per SH. Disallowing
3796                          * VS to run on CU0 could hurt us more than late VS
3797                          * allocation would help.
3798                          *
3799                          * LATE_ALLOC_VS = 2 is the highest safe number.
3800                          */
3801                         si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
3802                         si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
3803                         si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
3804                 } else {
3805                         /* Set LATE_ALLOC_VS == 31. It should be less than
3806                          * the number of scratch waves. Limitations:
3807                          * - VS can't execute on CU0.
3808                          * - If HS writes outputs to LDS, LS can't execute on CU0.
3809                          */
3810                         si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
3811                         si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
3812                         si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
3813                 }
3814
3815                 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
3816         }
3817
3818         if (sctx->b.chip_class >= VI) {
3819                 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
3820                                S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
3821                                S_028424_OVERWRITE_COMBINER_WATERMARK(4));
3822                 if (sctx->b.family < CHIP_POLARIS10)
3823                         si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
3824                 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
3825                 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
3826                                S_028B50_ACCUM_ISOLINE(32) |
3827                                S_028B50_ACCUM_TRI(11) |
3828                                S_028B50_ACCUM_QUAD(11) |
3829                                S_028B50_DONUT_SPLIT(16));
3830         }
3831
3832         if (sctx->b.family == CHIP_STONEY)
3833                 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
3834
3835         si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
3836         if (sctx->b.chip_class >= CIK)
3837                 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
3838         si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
3839                       RADEON_PRIO_BORDER_COLORS);
3840
3841         si_pm4_upload_indirect_buffer(sctx, pm4);
3842         sctx->init_config = pm4;
3843 }