OSDN Git Service

Merge remote-tracking branch 'mesa/12.0' into marshmallow-x86
[android-x86/external-mesa.git] / src / gallium / drivers / freedreno / a4xx / fd4_emit.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Rob Clark <robclark@freedesktop.org>
27  */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_helpers.h"
33 #include "util/u_format.h"
34 #include "util/u_viewport.h"
35
36 #include "freedreno_resource.h"
37 #include "freedreno_query_hw.h"
38
39 #include "fd4_emit.h"
40 #include "fd4_blend.h"
41 #include "fd4_context.h"
42 #include "fd4_program.h"
43 #include "fd4_rasterizer.h"
44 #include "fd4_texture.h"
45 #include "fd4_format.h"
46 #include "fd4_zsa.h"
47
48 static const enum adreno_state_block sb[] = {
49         [SHADER_VERTEX]   = SB_VERT_SHADER,
50         [SHADER_FRAGMENT] = SB_FRAG_SHADER,
51 };
52
53 /* regid:          base const register
54  * prsc or dwords: buffer containing constant values
55  * sizedwords:     size of const value buffer
56  */
57 void
58 fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
59                 uint32_t regid, uint32_t offset, uint32_t sizedwords,
60                 const uint32_t *dwords, struct pipe_resource *prsc)
61 {
62         uint32_t i, sz;
63         enum adreno_state_src src;
64
65         debug_assert((regid % 4) == 0);
66         debug_assert((sizedwords % 4) == 0);
67
68         if (prsc) {
69                 sz = 0;
70                 src = 0x2;  // TODO ??
71         } else {
72                 sz = sizedwords;
73                 src = SS_DIRECT;
74         }
75
76         OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
77         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
78                         CP_LOAD_STATE_0_STATE_SRC(src) |
79                         CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
80                         CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
81         if (prsc) {
82                 struct fd_bo *bo = fd_resource(prsc)->bo;
83                 OUT_RELOC(ring, bo, offset,
84                                 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
85         } else {
86                 OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
87                                 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
88                 dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
89         }
90         for (i = 0; i < sz; i++) {
91                 OUT_RING(ring, dwords[i]);
92         }
93 }
94
95 static void
96 fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
97                 uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
98 {
99         uint32_t i;
100
101         debug_assert((regid % 4) == 0);
102         debug_assert((num % 4) == 0);
103
104         OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
105         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
106                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
107                         CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
108                         CP_LOAD_STATE_0_NUM_UNIT(num/4));
109         OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
110                         CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
111
112         for (i = 0; i < num; i++) {
113                 if (bos[i]) {
114                         if (write) {
115                                 OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
116                         } else {
117                                 OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
118                         }
119                 } else {
120                         OUT_RING(ring, 0xbad00000 | (i << 16));
121                 }
122         }
123 }
124
125 static void
126 emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
127                 enum adreno_state_block sb, struct fd_texture_stateobj *tex,
128                 const struct ir3_shader_variant *v)
129 {
130         static const uint32_t bcolor_reg[] = {
131                         [SB_VERT_TEX] = REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
132                         [SB_FRAG_TEX] = REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
133         };
134         struct fd4_context *fd4_ctx = fd4_context(ctx);
135         unsigned i, off;
136         void *ptr;
137
138         u_upload_alloc(fd4_ctx->border_color_uploader,
139                         0, BORDER_COLOR_UPLOAD_SIZE,
140                        BORDER_COLOR_UPLOAD_SIZE, &off,
141                         &fd4_ctx->border_color_buf,
142                         &ptr);
143
144         fd_setup_border_colors(tex, ptr, 0);
145
146         if (tex->num_samplers > 0) {
147                 int num_samplers;
148
149                 /* not sure if this is an a420.0 workaround, but we seem
150                  * to need to emit these in pairs.. emit a final dummy
151                  * entry if odd # of samplers:
152                  */
153                 num_samplers = align(tex->num_samplers, 2);
154
155                 /* output sampler state: */
156                 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers));
157                 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
158                                 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
159                                 CP_LOAD_STATE_0_STATE_BLOCK(sb) |
160                                 CP_LOAD_STATE_0_NUM_UNIT(num_samplers));
161                 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
162                                 CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
163                 for (i = 0; i < tex->num_samplers; i++) {
164                         static const struct fd4_sampler_stateobj dummy_sampler = {};
165                         const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
166                                         fd4_sampler_stateobj(tex->samplers[i]) :
167                                         &dummy_sampler;
168                         OUT_RING(ring, sampler->texsamp0);
169                         OUT_RING(ring, sampler->texsamp1);
170                 }
171
172                 for (; i < num_samplers; i++) {
173                         OUT_RING(ring, 0x00000000);
174                         OUT_RING(ring, 0x00000000);
175                 }
176         }
177
178         if (tex->num_textures > 0) {
179                 unsigned num_textures = tex->num_textures + v->astc_srgb.count;
180
181                 /* emit texture state: */
182                 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * num_textures));
183                 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
184                                 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
185                                 CP_LOAD_STATE_0_STATE_BLOCK(sb) |
186                                 CP_LOAD_STATE_0_NUM_UNIT(num_textures));
187                 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
188                                 CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
189                 for (i = 0; i < tex->num_textures; i++) {
190                         static const struct fd4_pipe_sampler_view dummy_view = {};
191                         const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
192                                         fd4_pipe_sampler_view(tex->textures[i]) :
193                                         &dummy_view;
194
195                         OUT_RING(ring, view->texconst0);
196                         OUT_RING(ring, view->texconst1);
197                         OUT_RING(ring, view->texconst2);
198                         OUT_RING(ring, view->texconst3);
199                         if (view->base.texture) {
200                                 struct fd_resource *rsc = fd_resource(view->base.texture);
201                                 OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
202                         } else {
203                                 OUT_RING(ring, 0x00000000);
204                         }
205                         OUT_RING(ring, 0x00000000);
206                         OUT_RING(ring, 0x00000000);
207                         OUT_RING(ring, 0x00000000);
208                 }
209
210                 for (i = 0; i < v->astc_srgb.count; i++) {
211                         static const struct fd4_pipe_sampler_view dummy_view = {};
212                         const struct fd4_pipe_sampler_view *view;
213                         unsigned idx = v->astc_srgb.orig_idx[i];
214
215                         view = tex->textures[idx] ?
216                                         fd4_pipe_sampler_view(tex->textures[idx]) :
217                                         &dummy_view;
218
219                         debug_assert(view->texconst0 & A4XX_TEX_CONST_0_SRGB);
220
221                         OUT_RING(ring, view->texconst0 & ~A4XX_TEX_CONST_0_SRGB);
222                         OUT_RING(ring, view->texconst1);
223                         OUT_RING(ring, view->texconst2);
224                         OUT_RING(ring, view->texconst3);
225                         if (view->base.texture) {
226                                 struct fd_resource *rsc = fd_resource(view->base.texture);
227                                 OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0);
228                         } else {
229                                 OUT_RING(ring, 0x00000000);
230                         }
231                         OUT_RING(ring, 0x00000000);
232                         OUT_RING(ring, 0x00000000);
233                         OUT_RING(ring, 0x00000000);
234                 }
235         } else {
236                 debug_assert(v->astc_srgb.count == 0);
237         }
238
239         OUT_PKT0(ring, bcolor_reg[sb], 1);
240         OUT_RELOC(ring, fd_resource(fd4_ctx->border_color_buf)->bo, off, 0, 0);
241
242         u_upload_unmap(fd4_ctx->border_color_uploader);
243 }
244
245 /* emit texture state for mem->gmem restore operation.. eventually it would
246  * be good to get rid of this and use normal CSO/etc state for more of these
247  * special cases..
248  */
249 void
250 fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
251                 struct pipe_surface **bufs)
252 {
253         unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
254         int i;
255
256         for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
257                 mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
258         }
259
260         /* output sampler state: */
261         OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs));
262         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
263                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
264                         CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
265                         CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
266         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
267                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
268         for (i = 0; i < nr_bufs; i++) {
269                 OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
270                                 A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
271                                 A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
272                                 A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
273                                 A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
274                 OUT_RING(ring, 0x00000000);
275         }
276
277         /* emit texture state: */
278         OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs));
279         OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
280                         CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
281                         CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
282                         CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
283         OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
284                         CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
285         for (i = 0; i < nr_bufs; i++) {
286                 if (bufs[i]) {
287                         struct fd_resource *rsc = fd_resource(bufs[i]->texture);
288                         enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format);
289
290                         /* The restore blit_zs shader expects stencil in sampler 0,
291                          * and depth in sampler 1
292                          */
293                         if (rsc->stencil && (i == 0)) {
294                                 rsc = rsc->stencil;
295                                 format = fd4_gmem_restore_format(rsc->base.b.format);
296                         }
297
298                         /* note: PIPE_BUFFER disallowed for surfaces */
299                         unsigned lvl = bufs[i]->u.tex.level;
300                         struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
301                         unsigned offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
302
303                         /* z32 restore is accomplished using depth write.  If there is
304                          * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
305                          * then no render target:
306                          *
307                          * (The same applies for z32_s8x24, since for stencil sampler
308                          * state the above 'if' will replace 'format' with s8)
309                          */
310                         if ((format == PIPE_FORMAT_Z32_FLOAT) ||
311                                         (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
312                                 mrt_comp[i] = 0;
313
314                         debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
315
316                         OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
317                                         A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
318                                         fd4_tex_swiz(format,  PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
319                                                         PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W));
320                         OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
321                                         A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
322                         OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
323                                         A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(format)));
324                         OUT_RING(ring, 0x00000000);
325                         OUT_RELOC(ring, rsc->bo, offset, 0, 0);
326                         OUT_RING(ring, 0x00000000);
327                         OUT_RING(ring, 0x00000000);
328                         OUT_RING(ring, 0x00000000);
329                 } else {
330                         OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
331                                         A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
332                                         A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
333                                         A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
334                                         A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
335                                         A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
336                         OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) |
337                                         A4XX_TEX_CONST_1_HEIGHT(0));
338                         OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
339                         OUT_RING(ring, 0x00000000);
340                         OUT_RING(ring, 0x00000000);
341                         OUT_RING(ring, 0x00000000);
342                         OUT_RING(ring, 0x00000000);
343                         OUT_RING(ring, 0x00000000);
344                 }
345         }
346
347         OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
348         OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
349                         A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
350                         A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
351                         A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
352                         A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
353                         A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
354                         A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
355                         A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
356 }
357
358 void
359 fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
360 {
361         int32_t i, j, last = -1;
362         uint32_t total_in = 0;
363         const struct fd_vertex_state *vtx = emit->vtx;
364         const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
365         unsigned vertex_regid = regid(63, 0);
366         unsigned instance_regid = regid(63, 0);
367         unsigned vtxcnt_regid = regid(63, 0);
368
369         /* Note that sysvals come *after* normal inputs: */
370         for (i = 0; i < vp->inputs_count; i++) {
371                 if (!vp->inputs[i].compmask)
372                         continue;
373                 if (vp->inputs[i].sysval) {
374                         switch(vp->inputs[i].slot) {
375                         case SYSTEM_VALUE_BASE_VERTEX:
376                                 /* handled elsewhere */
377                                 break;
378                         case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
379                                 vertex_regid = vp->inputs[i].regid;
380                                 break;
381                         case SYSTEM_VALUE_INSTANCE_ID:
382                                 instance_regid = vp->inputs[i].regid;
383                                 break;
384                         case SYSTEM_VALUE_VERTEX_CNT:
385                                 vtxcnt_regid = vp->inputs[i].regid;
386                                 break;
387                         default:
388                                 unreachable("invalid system value");
389                                 break;
390                         }
391                 } else if (i < vtx->vtx->num_elements) {
392                         last = i;
393                 }
394         }
395
396         for (i = 0, j = 0; i <= last; i++) {
397                 assert(!vp->inputs[i].sysval);
398                 if (vp->inputs[i].compmask) {
399                         struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
400                         const struct pipe_vertex_buffer *vb =
401                                         &vtx->vertexbuf.vb[elem->vertex_buffer_index];
402                         struct fd_resource *rsc = fd_resource(vb->buffer);
403                         enum pipe_format pfmt = elem->src_format;
404                         enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
405                         bool switchnext = (i != last) ||
406                                         (vertex_regid != regid(63, 0)) ||
407                                         (instance_regid != regid(63, 0)) ||
408                                         (vtxcnt_regid != regid(63, 0));
409                         bool isint = util_format_is_pure_integer(pfmt);
410                         uint32_t fs = util_format_get_blocksize(pfmt);
411                         uint32_t off = vb->buffer_offset + elem->src_offset;
412                         uint32_t size = fd_bo_size(rsc->bo) - off;
413                         debug_assert(fmt != ~0);
414
415                         OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
416                         OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
417                                         A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
418                                         COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
419                                         COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
420                         OUT_RELOC(ring, rsc->bo, off, 0, 0);
421                         OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
422                         OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(MAX2(1, elem->instance_divisor)));
423
424                         OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
425                         OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
426                                         A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
427                                         A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
428                                         A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
429                                         A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
430                                         A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
431                                         A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
432                                         COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
433                                         COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
434
435                         total_in += vp->inputs[i].ncomp;
436                         j++;
437                 }
438         }
439
440         /* hw doesn't like to be configured for zero vbo's, it seems: */
441         if (last < 0) {
442                 /* just recycle the shader bo, we just need to point to *something*
443                  * valid:
444                  */
445                 struct fd_bo *dummy_vbo = vp->bo;
446                 bool switchnext = (vertex_regid != regid(63, 0)) ||
447                                 (instance_regid != regid(63, 0)) ||
448                                 (vtxcnt_regid != regid(63, 0));
449
450                 OUT_PKT0(ring, REG_A4XX_VFD_FETCH(0), 4);
451                 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) |
452                                 A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) |
453                                 COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
454                 OUT_RELOC(ring, dummy_vbo, 0, 0, 0);
455                 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(1));
456                 OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(1));
457
458                 OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(0), 1);
459                 OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
460                                 A4XX_VFD_DECODE_INSTR_WRITEMASK(0x1) |
461                                 A4XX_VFD_DECODE_INSTR_FORMAT(VFMT4_8_UNORM) |
462                                 A4XX_VFD_DECODE_INSTR_SWAP(XYZW) |
463                                 A4XX_VFD_DECODE_INSTR_REGID(regid(0,0)) |
464                                 A4XX_VFD_DECODE_INSTR_SHIFTCNT(1) |
465                                 A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
466                                 COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
467
468                 total_in = 1;
469                 j = 1;
470         }
471
472         OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
473         OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
474                         0xa0000 | /* XXX */
475                         A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
476                         A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
477         OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
478                         A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
479                         A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
480         OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_2 */
481         OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
482         OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_4 */
483
484         /* cache invalidate, otherwise vertex fetch could see
485          * stale vbo contents:
486          */
487         OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
488         OUT_RING(ring, 0x00000000);
489         OUT_RING(ring, 0x00000012);
490 }
491
492 void
493 fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
494                 struct fd4_emit *emit)
495 {
496         const struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
497         const struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
498         uint32_t dirty = emit->dirty;
499
500         emit_marker(ring, 5);
501
502         if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
503                 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
504                 unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
505
506                 for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
507                         mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
508                 }
509
510                 OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
511                 OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
512                                 A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
513                                 A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
514                                 A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
515                                 A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
516                                 A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
517                                 A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
518                                 A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
519         }
520
521         if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
522                 struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
523                 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
524                 uint32_t rb_alpha_control = zsa->rb_alpha_control;
525
526                 if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
527                         rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
528
529                 OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
530                 OUT_RING(ring, rb_alpha_control);
531
532                 OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
533                 OUT_RING(ring, zsa->rb_stencil_control);
534                 OUT_RING(ring, zsa->rb_stencil_control2);
535         }
536
537         if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
538                 struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
539                 struct pipe_stencil_ref *sr = &ctx->stencil_ref;
540
541                 OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
542                 OUT_RING(ring, zsa->rb_stencilrefmask |
543                                 A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
544                 OUT_RING(ring, zsa->rb_stencilrefmask_bf |
545                                 A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
546         }
547
548         if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
549                 struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
550                 bool fragz = fp->has_kill | fp->writes_pos;
551                 bool clamp = !ctx->rasterizer->depth_clip;
552
553                 OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
554                 OUT_RING(ring, zsa->rb_depth_control |
555                                 COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
556                                 COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
557                                 COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
558
559                 /* maybe this register/bitfield needs a better name.. this
560                  * appears to be just disabling early-z
561                  */
562                 OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
563                 OUT_RING(ring, zsa->gras_alpha_control |
564                                 COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
565                                 COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
566         }
567
568         if (dirty & FD_DIRTY_RASTERIZER) {
569                 struct fd4_rasterizer_stateobj *rasterizer =
570                                 fd4_rasterizer_stateobj(ctx->rasterizer);
571
572                 OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
573                 OUT_RING(ring, rasterizer->gras_su_mode_control |
574                                 A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
575
576                 OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
577                 OUT_RING(ring, rasterizer->gras_su_point_minmax);
578                 OUT_RING(ring, rasterizer->gras_su_point_size);
579
580                 OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
581                 OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
582                 OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
583
584                 OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
585                 OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
586         }
587
588         /* NOTE: since primitive_restart is not actually part of any
589          * state object, we need to make sure that we always emit
590          * PRIM_VTX_CNTL.. either that or be more clever and detect
591          * when it changes.
592          */
593         if (emit->info) {
594                 const struct pipe_draw_info *info = emit->info;
595                 struct fd4_rasterizer_stateobj *rast =
596                         fd4_rasterizer_stateobj(ctx->rasterizer);
597                 uint32_t val = rast->pc_prim_vtx_cntl;
598
599                 if (info->indexed && info->primitive_restart)
600                         val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
601
602                 val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
603
604                 if (fp->total_in > 0) {
605                         uint32_t varout = align(fp->total_in, 16) / 16;
606                         if (varout > 1)
607                                 varout = align(varout, 2);
608                         val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
609                 }
610
611                 OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
612                 OUT_RING(ring, val);
613                 OUT_RING(ring, rast->pc_prim_vtx_cntl2);
614         }
615
616         if (dirty & FD_DIRTY_SCISSOR) {
617                 struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
618
619                 OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
620                 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
621                                 A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
622                 OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
623                                 A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
624
625                 ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
626                 ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
627                 ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
628                 ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
629         }
630
631         if (dirty & FD_DIRTY_VIEWPORT) {
632                 fd_wfi(ctx, ring);
633                 OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
634                 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
635                 OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
636                 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
637                 OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
638                 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
639                 OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
640         }
641
642         if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
643                 float zmin, zmax;
644                 int depth = 24;
645                 if (ctx->framebuffer.zsbuf) {
646                         depth = util_format_get_component_bits(
647                                         pipe_surface_format(ctx->framebuffer.zsbuf),
648                                         UTIL_FORMAT_COLORSPACE_ZS, 0);
649                 }
650                 util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
651                                                                 &zmin, &zmax);
652
653                 OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
654                 if (depth == 32) {
655                         OUT_RING(ring, fui(zmin));
656                         OUT_RING(ring, fui(zmax));
657                 } else if (depth == 16) {
658                         OUT_RING(ring, (uint32_t)(zmin * 0xffff));
659                         OUT_RING(ring, (uint32_t)(zmax * 0xffff));
660                 } else {
661                         OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
662                         OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
663                 }
664         }
665
666         if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
667                 struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
668                 unsigned n = pfb->nr_cbufs;
669                 /* if we have depth/stencil, we need at least on MRT: */
670                 if (pfb->zsbuf)
671                         n = MAX2(1, n);
672                 fd4_program_emit(ring, emit, n, pfb->cbufs);
673         }
674
675         if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
676                 ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
677                 if (!emit->key.binning_pass)
678                         ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
679         }
680
681         if ((dirty & FD_DIRTY_BLEND)) {
682                 struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
683                 uint32_t i;
684
685                 for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
686                         enum pipe_format format = pipe_surface_format(
687                                         ctx->framebuffer.cbufs[i]);
688                         bool is_int = util_format_is_pure_integer(format);
689                         bool has_alpha = util_format_has_alpha(format);
690                         uint32_t control = blend->rb_mrt[i].control;
691                         uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
692
693                         if (is_int) {
694                                 control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
695                                 control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
696                         }
697
698                         if (has_alpha) {
699                                 blend_control |= blend->rb_mrt[i].blend_control_rgb;
700                         } else {
701                                 blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
702                                 control &= ~A4XX_RB_MRT_CONTROL_BLEND2;
703                         }
704
705                         OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
706                         OUT_RING(ring, control);
707
708                         OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
709                         OUT_RING(ring, blend_control);
710                 }
711
712                 OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
713                 OUT_RING(ring, blend->rb_fs_output |
714                                 A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
715         }
716
717         if (dirty & FD_DIRTY_BLEND_COLOR) {
718                 struct pipe_blend_color *bcolor = &ctx->blend_color;
719
720                 OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
721                 OUT_RING(ring, A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) |
722                                 A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) |
723                                 A4XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f));
724                 OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
725                 OUT_RING(ring, A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) |
726                                 A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) |
727                                 A4XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f));
728                 OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[1]));
729                 OUT_RING(ring, A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) |
730                                 A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) |
731                                 A4XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f));
732                 OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
733                 OUT_RING(ring, A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) |
734                                 A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) |
735                                 A4XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f));
736                 OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
737         }
738
739         if (dirty & FD_DIRTY_VERTTEX) {
740                 if (vp->has_samp)
741                         emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex, vp);
742                 else
743                         dirty &= ~FD_DIRTY_VERTTEX;
744         }
745
746         if (dirty & FD_DIRTY_FRAGTEX) {
747                 if (fp->has_samp)
748                         emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex, fp);
749                 else
750                         dirty &= ~FD_DIRTY_FRAGTEX;
751         }
752
753         ctx->dirty &= ~dirty;
754 }
755
756 /* emit setup at begin of new cmdstream buffer (don't rely on previous
757  * state, there could have been a context switch between ioctls):
758  */
759 void
760 fd4_emit_restore(struct fd_context *ctx)
761 {
762         struct fd4_context *fd4_ctx = fd4_context(ctx);
763         struct fd_ringbuffer *ring = ctx->ring;
764
765         OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
766         OUT_RING(ring, 0x00000001);
767
768         OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
769         OUT_RING(ring, 0x00000000);
770
771         OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
772         OUT_RING(ring, 0x00000006);
773
774         OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
775         OUT_RING(ring, 0x0000003a);
776
777         OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
778         OUT_RING(ring, 0x00000001);
779
780         OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
781         OUT_RING(ring, 0x00000000);
782
783         OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
784         OUT_RING(ring, 0x00000007);
785
786         OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
787         OUT_RING(ring, 0x00000000);
788
789         OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
790         OUT_RING(ring, 0x00000000);
791         OUT_RING(ring, 0x00000012);
792
793         OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
794         OUT_RING(ring, 0x00000000);
795
796         OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
797         OUT_RING(ring, 0x00000006);
798
799         OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
800         OUT_RING(ring, 0x00000000);
801
802         OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
803         OUT_RING(ring, 0x00040000);
804
805         OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
806         OUT_RING(ring, 0x00000000);
807
808         OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
809         OUT_RING(ring, 0x00001000);
810
811         OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
812         OUT_RING(ring, 0x00000000);
813
814         OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
815         OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
816                         A4XX_RB_BLEND_RED_FLOAT(0.0));
817         OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) |
818                         A4XX_RB_BLEND_GREEN_FLOAT(0.0));
819         OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) |
820                         A4XX_RB_BLEND_BLUE_FLOAT(0.0));
821         OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
822                         A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
823
824         OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
825         OUT_RING(ring, 0x00000000);
826
827         OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
828         OUT_RING(ring, 0x00000000);
829
830         OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
831         OUT_RING(ring, 0x00000000);
832
833         OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
834         OUT_RING(ring, 0x00000000);
835
836         OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
837         OUT_RING(ring, 0x00000000);
838
839         OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
840         OUT_RING(ring, 0x00000000);
841
842         OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
843         OUT_RING(ring, 0x0000001d);
844
845         OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
846         OUT_RING(ring, 0x00000000);
847
848         OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
849         OUT_RING(ring, 0x00000001);
850
851         OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
852         OUT_RING(ring, 0x00000000);
853
854         OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
855         OUT_RING(ring, 0x00000000);
856
857         OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
858         OUT_RING(ring, 0x00000000);
859
860         OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
861         OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) |
862                         A4XX_TPL1_TP_TEX_COUNT_HS(0) |
863                         A4XX_TPL1_TP_TEX_COUNT_DS(0) |
864                         A4XX_TPL1_TP_TEX_COUNT_GS(0));
865
866         OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
867         OUT_RING(ring, 16);
868
869         /* we don't use this yet.. probably best to disable.. */
870         OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
871         OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) |
872                         CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS |
873                         CP_SET_DRAW_STATE_0_GROUP_ID(0));
874         OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0));
875
876         OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
877         OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_PARAM */
878         OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */
879
880         OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
881         OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_PARAM */
882         OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */
883
884         OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
885         OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
886                         A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
887                         A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
888                         A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
889
890         OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
891         OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
892                         A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
893
894         OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
895         OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
896                         A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
897
898         OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
899         OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
900
901         OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
902         OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
903
904         OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
905         OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
906
907         OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
908         OUT_RING(ring, 0x0);
909
910         fd_hw_query_enable(ctx, ring);
911
912         ctx->needs_rb_fbd = true;
913 }
914
915 static void
916 fd4_emit_ib(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
917                 struct fd_ringmarker *end)
918 {
919         __OUT_IB(ring, true, start, end);
920 }
921
922 void
923 fd4_emit_init(struct pipe_context *pctx)
924 {
925         struct fd_context *ctx = fd_context(pctx);
926         ctx->emit_const = fd4_emit_const;
927         ctx->emit_const_bo = fd4_emit_const_bo;
928         ctx->emit_ib = fd4_emit_ib;
929 }