2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
33 #include "ilo_context.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
42 * Translate winsys tiling to hardware tiling.
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
48 case INTEL_TILING_NONE:
51 return BRW_SURFACE_TILED;
53 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
55 assert(!"unknown tiling");
61 * Translate a pipe primitive type to the matching hardware primitive type.
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
66 static const int prim_mapping[PIPE_PRIM_MAX] = {
67 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
68 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
69 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
70 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
71 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
72 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
73 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
74 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
75 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
76 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
77 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
83 assert(prim_mapping[prim]);
85 return prim_mapping[prim];
89 * Translate a pipe texture target to the matching hardware surface type.
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
96 return BRW_SURFACE_BUFFER;
98 case PIPE_TEXTURE_1D_ARRAY:
99 return BRW_SURFACE_1D;
100 case PIPE_TEXTURE_2D:
101 case PIPE_TEXTURE_RECT:
102 case PIPE_TEXTURE_2D_ARRAY:
103 return BRW_SURFACE_2D;
104 case PIPE_TEXTURE_3D:
105 return BRW_SURFACE_3D;
106 case PIPE_TEXTURE_CUBE:
107 case PIPE_TEXTURE_CUBE_ARRAY:
108 return BRW_SURFACE_CUBE;
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER;
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
120 gen6_translate_depth_format(enum pipe_format format)
123 case PIPE_FORMAT_Z16_UNORM:
124 return BRW_DEPTHFORMAT_D16_UNORM;
125 case PIPE_FORMAT_Z32_FLOAT:
126 return BRW_DEPTHFORMAT_D32_FLOAT;
127 case PIPE_FORMAT_Z24X8_UNORM:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
139 * Translate a pipe logicop to the matching hardware logicop.
142 gen6_translate_pipe_logicop(unsigned logicop)
145 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
146 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
147 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
148 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
149 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
150 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
151 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
152 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
153 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
154 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
155 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
156 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
157 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
158 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
159 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
160 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR;
168 * Translate a pipe blend function to the matching hardware blend function.
171 gen6_translate_pipe_blend(unsigned blend)
174 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
175 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
176 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
177 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
178 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD;
186 * Translate a pipe blend factor to the matching hardware blend factor.
189 gen6_translate_pipe_blendfactor(unsigned blendfactor)
191 switch (blendfactor) {
192 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
193 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
194 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
195 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
196 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
198 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
199 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
200 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
202 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE;
218 * Translate a pipe stencil op to the matching hardware stencil op.
221 gen6_translate_pipe_stencil_op(unsigned stencil_op)
223 switch (stencil_op) {
224 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
225 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
226 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
227 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
228 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
229 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
230 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
231 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP;
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
242 gen6_translate_tex_mipfilter(unsigned filter)
245 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
246 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
247 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE;
255 * Translate a pipe texture filter to the matching hardware mapfilter.
258 gen6_translate_tex_filter(unsigned filter)
261 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
262 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST;
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
274 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
276 /* clamp to edge or border? */
277 if (wrap == PIPE_TEX_WRAP_CLAMP) {
278 wrap = (clamp_to_edge) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
283 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
287 case PIPE_TEX_WRAP_CLAMP:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP;
298 * Translate a pipe DSA test function to the matching hardware compare
302 gen6_translate_dsa_func(unsigned func)
305 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
306 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
307 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
308 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
309 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
310 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
311 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
312 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER;
320 * Translate a pipe shadow compare function to the matching hardware shadow
324 gen6_translate_shadow_func(unsigned func)
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
334 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
335 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
336 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
337 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
338 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
339 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
340 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
341 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER;
349 * Translate an index size to the matching hardware index format.
352 gen6_translate_index_size(int size)
355 case 4: return BRW_INDEX_DWORD;
356 case 2: return BRW_INDEX_WORD;
357 case 1: return BRW_INDEX_BYTE;
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE;
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
366 struct intel_bo *general_state_bo,
367 struct intel_bo *surface_state_bo,
368 struct intel_bo *dynamic_state_bo,
369 struct intel_bo *indirect_object_bo,
370 struct intel_bo *instruction_bo,
371 uint32_t general_state_size,
372 uint32_t dynamic_state_size,
373 uint32_t indirect_object_size,
374 uint32_t instruction_size,
377 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len = 10;
380 ILO_GPE_VALID_GEN(dev, 6, 7);
382 /* 4K-page aligned */
383 assert(((general_state_size | dynamic_state_size |
384 indirect_object_size | instruction_size) & 0xfff) == 0);
386 ilo_cp_begin(cp, cmd_len);
387 ilo_cp_write(cp, cmd | (cmd_len - 2));
389 ilo_cp_write_bo(cp, 1, general_state_bo,
392 ilo_cp_write_bo(cp, 1, surface_state_bo,
393 INTEL_DOMAIN_SAMPLER,
395 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
396 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
398 ilo_cp_write_bo(cp, 1, indirect_object_bo,
401 ilo_cp_write_bo(cp, 1, instruction_bo,
402 INTEL_DOMAIN_INSTRUCTION,
405 if (general_state_size) {
406 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
411 /* skip range check */
415 if (dynamic_state_size) {
416 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
417 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
421 /* skip range check */
422 ilo_cp_write(cp, 0xfffff000 + 1);
425 if (indirect_object_size) {
426 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
431 /* skip range check */
432 ilo_cp_write(cp, 0xfffff000 + 1);
435 if (instruction_size) {
436 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
437 INTEL_DOMAIN_INSTRUCTION,
441 /* skip range check */
449 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
453 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len = 2;
456 ILO_GPE_VALID_GEN(dev, 6, 7);
458 ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
459 ilo_cp_write(cp, cmd);
460 ilo_cp_write(cp, sip);
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
469 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len = 1;
472 ILO_GPE_VALID_GEN(dev, 6, 7);
474 ilo_cp_begin(cp, cmd_len);
475 ilo_cp_write(cp, cmd | enable);
480 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
484 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len = 1;
487 ILO_GPE_VALID_GEN(dev, 6, 7);
490 assert(pipeline == 0x0 || pipeline == 0x1);
492 ilo_cp_begin(cp, cmd_len);
493 ilo_cp_write(cp, cmd | pipeline);
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
499 int max_threads, int num_urb_entries,
503 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len = 8;
507 ILO_GPE_VALID_GEN(dev, 6, 6);
509 dw2 = (max_threads - 1) << 16 |
510 num_urb_entries << 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
514 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
517 ilo_cp_begin(cp, cmd_len);
518 ilo_cp_write(cp, cmd | (cmd_len - 2));
519 ilo_cp_write(cp, 0); /* scratch */
520 ilo_cp_write(cp, dw2);
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, dw4);
523 ilo_cp_write(cp, 0); /* scoreboard */
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
531 uint32_t buf, int size,
534 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len = 4;
537 ILO_GPE_VALID_GEN(dev, 6, 6);
539 assert(buf % 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size = align(size, 32);
543 ilo_cp_begin(cp, cmd_len);
544 ilo_cp_write(cp, cmd | (cmd_len - 2));
545 ilo_cp_write(cp, 0); /* MBZ */
546 ilo_cp_write(cp, size);
547 ilo_cp_write(cp, buf);
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
553 uint32_t offset, int num_ids,
556 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len = 4;
559 ILO_GPE_VALID_GEN(dev, 6, 6);
561 assert(offset % 32 == 0);
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp, num_ids * 8 * 4);
568 ilo_cp_write(cp, offset);
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
574 int id, int byte, int thread_count,
577 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len = 2;
581 ILO_GPE_VALID_GEN(dev, 6, 6);
587 ilo_cp_begin(cp, cmd_len);
588 ilo_cp_write(cp, cmd | (cmd_len - 2));
589 ilo_cp_write(cp, dw1);
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
595 int thread_count_water_mark,
599 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len = 2;
603 ILO_GPE_VALID_GEN(dev, 6, 6);
605 dw1 = thread_count_water_mark << 16 |
608 ilo_cp_begin(cp, cmd_len);
609 ilo_cp_write(cp, cmd | (cmd_len - 2));
610 ilo_cp_write(cp, dw1);
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
623 uint32_t vs_binding_table,
624 uint32_t gs_binding_table,
625 uint32_t ps_binding_table,
628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len = 4;
631 ILO_GPE_VALID_GEN(dev, 6, 6);
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, cmd | (cmd_len - 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS |
636 GEN6_BINDING_TABLE_MODIFY_GS |
637 GEN6_BINDING_TABLE_MODIFY_PS);
638 ilo_cp_write(cp, vs_binding_table);
639 ilo_cp_write(cp, gs_binding_table);
640 ilo_cp_write(cp, ps_binding_table);
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
646 uint32_t vs_sampler_state,
647 uint32_t gs_sampler_state,
648 uint32_t ps_sampler_state,
651 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len = 4;
654 ILO_GPE_VALID_GEN(dev, 6, 6);
656 ilo_cp_begin(cp, cmd_len);
657 ilo_cp_write(cp, cmd | (cmd_len - 2) |
658 VS_SAMPLER_STATE_CHANGE |
659 GS_SAMPLER_STATE_CHANGE |
660 PS_SAMPLER_STATE_CHANGE);
661 ilo_cp_write(cp, vs_sampler_state);
662 ilo_cp_write(cp, gs_sampler_state);
663 ilo_cp_write(cp, ps_sampler_state);
668 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
669 int vs_total_size, int gs_total_size,
670 int vs_entry_size, int gs_entry_size,
673 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len = 3;
675 const int row_size = 128; /* 1024 bits */
676 int vs_alloc_size, gs_alloc_size;
677 int vs_num_entries, gs_num_entries;
679 ILO_GPE_VALID_GEN(dev, 6, 6);
681 /* in 1024-bit URB rows */
682 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
683 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
685 /* the valid range is [1, 5] */
690 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
694 if (vs_num_entries > 256)
695 vs_num_entries = 256;
696 assert(vs_num_entries >= 24);
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
700 if (gs_num_entries > 256)
701 gs_num_entries = 256;
703 ilo_cp_begin(cp, cmd_len);
704 ilo_cp_write(cp, cmd | (cmd_len - 2));
705 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
706 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
707 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
708 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
714 const struct pipe_vertex_buffer *vbuffers,
715 uint64_t vbuffer_mask,
716 const struct ilo_ve_state *ve,
719 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
723 ILO_GPE_VALID_GEN(dev, 6, 7);
726 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
728 * "From 1 to 33 VBs can be specified..."
730 assert(vbuffer_mask <= (1UL << 33));
737 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
738 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
740 if (vbuffer_mask & (1 << pipe_idx))
744 ilo_cp_begin(cp, cmd_len);
745 ilo_cp_write(cp, cmd | (cmd_len - 2));
747 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
748 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
749 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
750 const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
753 if (!(vbuffer_mask & (1 << pipe_idx)))
756 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
758 if (instance_divisor)
759 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
761 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
763 if (dev->gen >= ILO_GEN(7))
764 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
766 /* use null vb if there is no buffer or the stride is out of range */
767 if (vb->buffer && vb->stride <= 2048) {
768 const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
769 const uint32_t start_offset = vb->buffer_offset;
770 const uint32_t end_offset = buf->bo->get_size(buf->bo) - 1;
772 dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
774 ilo_cp_write(cp, dw);
775 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
776 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
777 ilo_cp_write(cp, instance_divisor);
782 ilo_cp_write(cp, dw);
785 ilo_cp_write(cp, instance_divisor);
793 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
794 struct ilo_ve_cso *cso)
798 ILO_GPE_VALID_GEN(dev, 6, 7);
801 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
803 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
804 * valid VERTEX_ELEMENT structure.
806 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
807 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
809 * - The Source Element Format must be set to the UINT format.
811 * - [DevSNB]: Edge Flags are not supported for QUADLIST
812 * primitives. Software may elect to convert QUADLIST primitives
813 * to some set of corresponding edge-flag-supported primitive
814 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
817 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
819 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
820 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
821 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
822 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
825 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
826 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
827 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
829 * Since all the hardware cares about is whether the flags are zero or not,
830 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
832 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
833 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
834 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
835 BRW_SURFACEFORMAT_R32_FLOAT - 1);
837 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
840 assert(format == BRW_SURFACEFORMAT_R8_UINT);
845 ve_init_cso_with_components(const struct ilo_dev_info *dev,
846 int comp0, int comp1, int comp2, int comp3,
847 struct ilo_ve_cso *cso)
849 ILO_GPE_VALID_GEN(dev, 6, 7);
851 STATIC_ASSERT(Elements(cso->payload) >= 2);
852 cso->payload[0] = GEN6_VE0_VALID;
854 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
855 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
856 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
857 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
861 ve_init_cso(const struct ilo_dev_info *dev,
862 const struct pipe_vertex_element *state,
864 struct ilo_ve_cso *cso)
867 BRW_VE1_COMPONENT_STORE_SRC,
868 BRW_VE1_COMPONENT_STORE_SRC,
869 BRW_VE1_COMPONENT_STORE_SRC,
870 BRW_VE1_COMPONENT_STORE_SRC,
874 ILO_GPE_VALID_GEN(dev, 6, 7);
876 switch (util_format_get_nr_components(state->src_format)) {
877 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
878 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
879 case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
880 BRW_VE1_COMPONENT_STORE_1_INT :
881 BRW_VE1_COMPONENT_STORE_1_FLT;
884 format = ilo_translate_vertex_format(state->src_format);
886 STATIC_ASSERT(Elements(cso->payload) >= 2);
888 vb_index << GEN6_VE0_INDEX_SHIFT |
890 format << BRW_VE0_FORMAT_SHIFT |
891 state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
894 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
895 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
896 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
897 comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
901 ilo_gpe_init_ve(const struct ilo_dev_info *dev,
903 const struct pipe_vertex_element *states,
904 struct ilo_ve_state *ve)
908 ILO_GPE_VALID_GEN(dev, 6, 7);
910 ve->count = num_states;
913 for (i = 0; i < num_states; i++) {
914 const unsigned pipe_idx = states[i].vertex_buffer_index;
915 const unsigned instance_divisor = states[i].instance_divisor;
919 * map the pipe vb to the hardware vb, which has a fixed instance
922 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
923 if (ve->vb_mapping[hw_idx] == pipe_idx &&
924 ve->instance_divisors[hw_idx] == instance_divisor)
928 /* create one if there is no matching hardware vb */
929 if (hw_idx >= ve->vb_count) {
930 hw_idx = ve->vb_count++;
932 ve->vb_mapping[hw_idx] = pipe_idx;
933 ve->instance_divisors[hw_idx] = instance_divisor;
936 ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
941 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
942 const struct ilo_ve_state *ve,
943 bool last_velement_edgeflag,
944 bool prepend_generated_ids,
947 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
951 ILO_GPE_VALID_GEN(dev, 6, 7);
954 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
956 * "Up to 34 (DevSNB+) vertex elements are supported."
958 assert(ve->count + prepend_generated_ids <= 34);
960 if (!ve->count && !prepend_generated_ids) {
961 struct ilo_ve_cso dummy;
963 ve_init_cso_with_components(dev,
964 BRW_VE1_COMPONENT_STORE_0,
965 BRW_VE1_COMPONENT_STORE_0,
966 BRW_VE1_COMPONENT_STORE_0,
967 BRW_VE1_COMPONENT_STORE_1_FLT,
971 ilo_cp_begin(cp, cmd_len);
972 ilo_cp_write(cp, cmd | (cmd_len - 2));
973 ilo_cp_write_multi(cp, dummy.payload, 2);
979 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
981 ilo_cp_begin(cp, cmd_len);
982 ilo_cp_write(cp, cmd | (cmd_len - 2));
984 if (prepend_generated_ids) {
985 struct ilo_ve_cso gen_ids;
987 ve_init_cso_with_components(dev,
988 BRW_VE1_COMPONENT_STORE_VID,
989 BRW_VE1_COMPONENT_STORE_IID,
990 BRW_VE1_COMPONENT_NOSTORE,
991 BRW_VE1_COMPONENT_NOSTORE,
994 ilo_cp_write_multi(cp, gen_ids.payload, 2);
997 if (last_velement_edgeflag) {
998 struct ilo_ve_cso edgeflag;
1000 for (i = 0; i < ve->count - 1; i++)
1001 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1003 edgeflag = ve->cso[i];
1004 ve_set_cso_edgeflag(dev, &edgeflag);
1005 ilo_cp_write_multi(cp, edgeflag.payload, 2);
1008 for (i = 0; i < ve->count; i++)
1009 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1016 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
1017 const struct pipe_index_buffer *ib,
1018 bool enable_cut_index,
1021 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
1022 const uint8_t cmd_len = 3;
1023 const struct ilo_buffer *buf = ilo_buffer(ib->buffer);
1024 uint32_t start_offset, end_offset;
1027 ILO_GPE_VALID_GEN(dev, 6, 7);
1032 format = gen6_translate_index_size(ib->index_size);
1034 start_offset = ib->offset;
1035 /* start_offset must be aligned to index size */
1036 if (start_offset % ib->index_size) {
1037 /* TODO need a u_upload_mgr to upload the IB to an aligned address */
1038 assert(!"unaligned index buffer offset");
1039 start_offset -= start_offset % ib->index_size;
1042 /* end_offset must also be aligned */
1043 end_offset = buf->bo->get_size(buf->bo);
1044 end_offset -= (end_offset % ib->index_size);
1045 /* it is inclusive */
1048 ilo_cp_begin(cp, cmd_len);
1049 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1050 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
1052 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1053 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1058 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
1059 uint32_t clip_viewport,
1060 uint32_t sf_viewport,
1061 uint32_t cc_viewport,
1064 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
1065 const uint8_t cmd_len = 4;
1067 ILO_GPE_VALID_GEN(dev, 6, 6);
1069 ilo_cp_begin(cp, cmd_len);
1070 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1071 GEN6_CLIP_VIEWPORT_MODIFY |
1072 GEN6_SF_VIEWPORT_MODIFY |
1073 GEN6_CC_VIEWPORT_MODIFY);
1074 ilo_cp_write(cp, clip_viewport);
1075 ilo_cp_write(cp, sf_viewport);
1076 ilo_cp_write(cp, cc_viewport);
1081 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
1082 uint32_t blend_state,
1083 uint32_t depth_stencil_state,
1084 uint32_t color_calc_state,
1087 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
1088 const uint8_t cmd_len = 4;
1090 ILO_GPE_VALID_GEN(dev, 6, 6);
1092 ilo_cp_begin(cp, cmd_len);
1093 ilo_cp_write(cp, cmd | (cmd_len - 2));
1094 ilo_cp_write(cp, blend_state | 1);
1095 ilo_cp_write(cp, depth_stencil_state | 1);
1096 ilo_cp_write(cp, color_calc_state | 1);
1101 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1102 uint32_t scissor_rect,
1105 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1106 const uint8_t cmd_len = 2;
1108 ILO_GPE_VALID_GEN(dev, 6, 7);
1110 ilo_cp_begin(cp, cmd_len);
1111 ilo_cp_write(cp, cmd | (cmd_len - 2));
1112 ilo_cp_write(cp, scissor_rect);
1117 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1118 const struct ilo_shader *vs,
1122 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1123 const uint8_t cmd_len = 6;
1124 uint32_t dw2, dw4, dw5;
1125 int vue_read_len, max_threads;
1127 ILO_GPE_VALID_GEN(dev, 6, 7);
1130 ilo_cp_begin(cp, cmd_len);
1131 ilo_cp_write(cp, cmd | (cmd_len - 2));
1132 ilo_cp_write(cp, 0);
1133 ilo_cp_write(cp, 0);
1134 ilo_cp_write(cp, 0);
1135 ilo_cp_write(cp, 0);
1136 ilo_cp_write(cp, 0);
1142 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1144 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1145 * 128-bit vertex elements to be passed into the payload for each
1148 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1149 * data to be read and passed to the thread."
1151 vue_read_len = (vs->in.count + 1) / 2;
1158 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
1160 * "Device # of EUs #Threads/EU
1164 max_threads = (dev->gt == 2) ? 60 : 24;
1168 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
1170 * "Device # of EUs #Threads/EU
1171 * Ivy Bridge (GT2) 16 8
1172 * Ivy Bridge (GT1) 6 6"
1174 max_threads = (dev->gt == 2) ? 128 : 36;
1177 /* see brwCreateContext() */
1178 max_threads = (dev->gt == 2) ? 280 : 70;
1185 dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1187 dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT;
1189 dw4 = vs->in.start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
1190 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
1191 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
1193 dw5 = GEN6_VS_STATISTICS_ENABLE |
1196 if (dev->gen >= ILO_GEN(7.5))
1197 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
1199 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
1201 ilo_cp_begin(cp, cmd_len);
1202 ilo_cp_write(cp, cmd | (cmd_len - 2));
1203 ilo_cp_write(cp, vs->cache_offset);
1204 ilo_cp_write(cp, dw2);
1205 ilo_cp_write(cp, 0); /* scratch */
1206 ilo_cp_write(cp, dw4);
1207 ilo_cp_write(cp, dw5);
1212 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1213 const struct ilo_shader *gs,
1214 const struct ilo_shader *vs,
1218 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1219 const uint8_t cmd_len = 7;
1220 uint32_t dw1, dw2, dw4, dw5, dw6;
1223 ILO_GPE_VALID_GEN(dev, 6, 6);
1225 if (!gs && (!vs || !vs->stream_output)) {
1228 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1229 dw5 = GEN6_GS_STATISTICS_ENABLE;
1233 int max_threads, vue_read_len;
1236 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1238 * "Maximum Number of Threads valid range is [0,27] when Rendering
1239 * Enabled bit is set."
1241 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1243 * "Programming Note: If the GS stage is enabled, software must
1244 * always allocate at least one GS URB Entry. This is true even if
1245 * the GS thread never needs to output vertices to the pipeline,
1246 * e.g., when only performing stream output. This is an artifact of
1247 * the need to pass the GS thread an initial destination URB
1250 * As such, we always enable rendering, and limit the number of threads.
1253 /* maximum is 60, but limited to 28 */
1257 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
1261 if (max_threads > 28)
1264 dw2 = GEN6_GS_SPF_MODE;
1266 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
1267 GEN6_GS_STATISTICS_ENABLE |
1268 GEN6_GS_SO_STATISTICS_ENABLE |
1269 GEN6_GS_RENDERING_ENABLE;
1272 * we cannot make use of GEN6_GS_REORDER because it will reorder
1273 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1274 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1275 * (2N+2, 2N+1, 2N+3)).
1277 dw6 = GEN6_GS_ENABLE;
1280 /* VS ouputs must match GS inputs */
1281 assert(gs->in.count == vs->out.count);
1282 for (i = 0; i < gs->in.count; i++) {
1283 assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
1284 assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
1288 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1290 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1291 * Length) to 0 indicating no Vertex URB data to be read and
1292 * passed to the thread."
1294 vue_read_len = (gs->in.count + 1) / 2;
1298 dw1 = gs->cache_offset;
1299 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1300 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1301 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1303 if (gs->in.discard_adj)
1304 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1306 if (gs->stream_output) {
1307 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1308 if (gs->svbi_post_inc) {
1309 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1310 gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1316 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1318 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1319 * Length) to 0 indicating no Vertex URB data to be read and
1320 * passed to the thread."
1322 vue_read_len = (vs->out.count + 1) / 2;
1327 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1328 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1329 vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1331 if (vs->in.discard_adj)
1332 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1334 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1335 if (vs->svbi_post_inc) {
1336 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1337 vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1342 ilo_cp_begin(cp, cmd_len);
1343 ilo_cp_write(cp, cmd | (cmd_len - 2));
1344 ilo_cp_write(cp, dw1);
1345 ilo_cp_write(cp, dw2);
1346 ilo_cp_write(cp, 0);
1347 ilo_cp_write(cp, dw4);
1348 ilo_cp_write(cp, dw5);
1349 ilo_cp_write(cp, dw6);
1354 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1355 const struct pipe_rasterizer_state *rasterizer,
1356 bool has_linear_interp,
1357 bool enable_guardband,
1361 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1362 const uint8_t cmd_len = 4;
1363 uint32_t dw1, dw2, dw3;
1365 ILO_GPE_VALID_GEN(dev, 6, 7);
1368 ilo_cp_begin(cp, cmd_len);
1369 ilo_cp_write(cp, cmd | (cmd_len - 2));
1370 ilo_cp_write(cp, 0);
1371 ilo_cp_write(cp, 0);
1372 ilo_cp_write(cp, 0);
1378 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
1380 if (dev->gen >= ILO_GEN(7)) {
1382 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1384 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1385 * enabled only for the cases where the incoming primitive topology
1386 * into the clipper guaranteed to be Trilist."
1388 * What does this mean?
1391 GEN7_CLIP_EARLY_CULL;
1393 if (rasterizer->front_ccw)
1394 dw1 |= GEN7_CLIP_WINDING_CCW;
1396 switch (rasterizer->cull_face) {
1397 case PIPE_FACE_NONE:
1398 dw1 |= GEN7_CLIP_CULLMODE_NONE;
1400 case PIPE_FACE_FRONT:
1401 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
1403 case PIPE_FACE_BACK:
1404 dw1 |= GEN7_CLIP_CULLMODE_BACK;
1406 case PIPE_FACE_FRONT_AND_BACK:
1407 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
1412 dw2 = GEN6_CLIP_ENABLE |
1414 rasterizer->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
1415 GEN6_CLIP_MODE_NORMAL;
1417 if (rasterizer->clip_halfz)
1418 dw2 |= GEN6_CLIP_API_D3D;
1420 dw2 |= GEN6_CLIP_API_OGL;
1422 if (rasterizer->depth_clip)
1423 dw2 |= GEN6_CLIP_Z_TEST;
1426 * There are several reasons that guard band test should be disabled
1428 * - when the renderer does not perform 2D clipping
1429 * - GL wide points (to avoid partially visibie object)
1430 * - GL wide or AA lines (to avoid partially visibie object)
1432 if (enable_guardband && true /* API_GL */) {
1433 if (rasterizer->point_size_per_vertex || rasterizer->point_size > 1.0f)
1434 enable_guardband = false;
1435 if (rasterizer->line_smooth || rasterizer->line_width > 1.0f)
1436 enable_guardband = false;
1439 if (enable_guardband)
1440 dw2 |= GEN6_CLIP_GB_TEST;
1442 if (has_linear_interp)
1443 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1445 if (rasterizer->flatshade_first) {
1446 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1447 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1448 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1451 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1452 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1453 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1456 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
1457 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
1458 GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1459 (num_viewports - 1);
1461 ilo_cp_begin(cp, cmd_len);
1462 ilo_cp_write(cp, cmd | (cmd_len - 2));
1463 ilo_cp_write(cp, dw1);
1464 ilo_cp_write(cp, dw2);
1465 ilo_cp_write(cp, dw3);
1470 * Fill in DW2 to DW7 of 3DSTATE_SF.
1473 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
1474 const struct pipe_rasterizer_state *rasterizer,
1476 enum pipe_format depth_format,
1477 bool separate_stencil,
1478 uint32_t *dw, int num_dwords)
1480 float offset_const, offset_scale, offset_clamp;
1481 int format, line_width, point_width;
1483 ILO_GPE_VALID_GEN(dev, 6, 7);
1484 assert(num_dwords == 6);
1488 dw[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
1498 * Scale the constant term. The minimum representable value used by the HW
1499 * is not large enouch to be the minimum resolvable difference.
1501 offset_const = rasterizer->offset_units * 2.0f;
1503 offset_scale = rasterizer->offset_scale;
1504 offset_clamp = rasterizer->offset_clamp;
1506 if (separate_stencil) {
1507 switch (depth_format) {
1508 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1509 depth_format = PIPE_FORMAT_Z24X8_UNORM;
1511 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1512 depth_format = PIPE_FORMAT_Z32_FLOAT;;
1514 case PIPE_FORMAT_S8_UINT:
1515 depth_format = PIPE_FORMAT_NONE;
1522 format = gen6_translate_depth_format(depth_format);
1523 /* FLOAT surface is assumed when there is no depth buffer */
1525 format = BRW_DEPTHFORMAT_D32_FLOAT;
1528 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1529 * pixels in the minor direction. We have to make the lines slightly
1530 * thicker, 0.5 pixel on both sides, so that they intersect that many
1531 * pixels are considered into the lines.
1533 * Line width is in U3.7.
1535 line_width = (int) ((rasterizer->line_width +
1536 (float) rasterizer->line_smooth) * 128.0f + 0.5f);
1537 line_width = CLAMP(line_width, 0, 1023);
1540 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1542 * "Software must not program a value of 0.0 when running in
1543 * MSRASTMODE_ON_xxx modes - zero-width lines are not available when
1544 * multisampling rasterization is enabled."
1546 if (rasterizer->multisample) {
1548 line_width = 128; /* 1.0f */
1550 else if (line_width == 128 && !rasterizer->line_smooth) {
1556 point_width = (int) (rasterizer->point_size * 8.0f + 0.5f);
1557 point_width = CLAMP(point_width, 1, 2047);
1560 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1562 * "This bit (Statistics Enable) should be set whenever clipping is
1563 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1564 * should be cleared if clipping is disabled or Statistics Enable in
1565 * CLIP_STATE is clear."
1567 dw[0] = GEN6_SF_STATISTICS_ENABLE |
1568 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
1570 /* XXX GEN6 path seems to work fine for GEN7 */
1571 if (false && dev->gen >= ILO_GEN(7)) {
1572 dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1575 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1577 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1578 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1579 * Depth Offset Enable Point) should be set whenever non zero depth
1580 * bias (Slope, Bias) values are used. Setting this bit may have
1581 * some degradation of performance for some workloads."
1583 if (rasterizer->offset_tri ||
1584 rasterizer->offset_line ||
1585 rasterizer->offset_point) {
1586 /* XXX need to scale offset_const according to the depth format */
1587 dw[0] |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
1589 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
1590 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
1591 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1594 offset_const = 0.0f;
1595 offset_scale = 0.0f;
1596 offset_clamp = 0.0f;
1600 if (dev->gen >= ILO_GEN(7))
1601 dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1603 if (rasterizer->offset_tri)
1604 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
1605 if (rasterizer->offset_line)
1606 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
1607 if (rasterizer->offset_point)
1608 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1611 switch (rasterizer->fill_front) {
1612 case PIPE_POLYGON_MODE_FILL:
1613 dw[0] |= GEN6_SF_FRONT_SOLID;
1615 case PIPE_POLYGON_MODE_LINE:
1616 dw[0] |= GEN6_SF_FRONT_WIREFRAME;
1618 case PIPE_POLYGON_MODE_POINT:
1619 dw[0] |= GEN6_SF_FRONT_POINT;
1623 switch (rasterizer->fill_back) {
1624 case PIPE_POLYGON_MODE_FILL:
1625 dw[0] |= GEN6_SF_BACK_SOLID;
1627 case PIPE_POLYGON_MODE_LINE:
1628 dw[0] |= GEN6_SF_BACK_WIREFRAME;
1630 case PIPE_POLYGON_MODE_POINT:
1631 dw[0] |= GEN6_SF_BACK_POINT;
1635 if (rasterizer->front_ccw)
1636 dw[0] |= GEN6_SF_WINDING_CCW;
1640 if (rasterizer->line_smooth) {
1642 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1644 * "This field (Anti-aliasing Enable) must be disabled if any of the
1645 * render targets have integer (UINT or SINT) surface format."
1647 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1649 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1650 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1652 * TODO We do not check those yet.
1654 dw[1] |= GEN6_SF_LINE_AA_ENABLE |
1655 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
1658 switch (rasterizer->cull_face) {
1659 case PIPE_FACE_NONE:
1660 dw[1] |= GEN6_SF_CULL_NONE;
1662 case PIPE_FACE_FRONT:
1663 dw[1] |= GEN6_SF_CULL_FRONT;
1665 case PIPE_FACE_BACK:
1666 dw[1] |= GEN6_SF_CULL_BACK;
1668 case PIPE_FACE_FRONT_AND_BACK:
1669 dw[1] |= GEN6_SF_CULL_BOTH;
1673 dw[1] |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1675 if (rasterizer->scissor)
1676 dw[1] |= GEN6_SF_SCISSOR_ENABLE;
1678 if (num_samples > 1 && rasterizer->multisample)
1679 dw[1] |= GEN6_SF_MSRAST_ON_PATTERN;
1681 dw[2] = GEN6_SF_LINE_AA_MODE_TRUE |
1682 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
1684 if (rasterizer->line_last_pixel)
1687 if (rasterizer->flatshade_first) {
1688 dw[2] |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
1689 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
1690 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1693 dw[2] |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
1694 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
1695 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1698 if (!rasterizer->point_size_per_vertex)
1699 dw[2] |= GEN6_SF_USE_STATE_POINT_WIDTH;
1701 dw[2] |= point_width;
1703 dw[3] = fui(offset_const);
1704 dw[4] = fui(offset_scale);
1705 dw[5] = fui(offset_clamp);
1709 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1712 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
1713 const struct pipe_rasterizer_state *rasterizer,
1714 const struct ilo_shader *fs,
1715 const struct ilo_shader *last_sh,
1716 uint32_t *dw, int num_dwords)
1718 uint32_t point_sprite_enable, const_interp_enable;
1719 uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
1720 int vue_offset, vue_len;
1721 int dst, max_src, i;
1723 ILO_GPE_VALID_GEN(dev, 6, 7);
1724 assert(num_dwords == 13);
1727 if (dev->gen >= ILO_GEN(7))
1728 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
1730 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
1732 for (i = 1; i < num_dwords; i++)
1739 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
1740 assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
1741 assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
1743 vue_len = last_sh->out.count - vue_offset;
1747 vue_len = fs->in.count;
1750 point_sprite_enable = 0;
1751 const_interp_enable = 0;
1752 max_src = (last_sh) ? 0 : fs->in.count - 1;
1754 for (dst = 0; dst < fs->in.count; dst++) {
1755 const int semantic = fs->in.semantic_names[dst];
1756 const int index = fs->in.semantic_indices[dst];
1757 const int interp = fs->in.interp[dst];
1762 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1764 * "This field (Point Sprite Texture Coordinate Enable) must be
1765 * programmed to 0 when non-point primitives are rendered."
1767 * TODO We do not check that yet.
1769 if (semantic == TGSI_SEMANTIC_GENERIC &&
1770 (rasterizer->sprite_coord_enable & (1 << index)))
1771 point_sprite_enable |= 1 << dst;
1773 if (interp == TGSI_INTERPOLATE_CONSTANT ||
1774 (interp == TGSI_INTERPOLATE_COLOR && rasterizer->flatshade))
1775 const_interp_enable |= 1 << dst;
1782 /* find the matching VS/GS OUT for FS IN[i] */
1784 for (src = 0; src < vue_len; src++) {
1785 if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
1786 last_sh->out.semantic_indices[vue_offset + src] != index)
1791 if (semantic == TGSI_SEMANTIC_COLOR && rasterizer->light_twoside &&
1792 src < vue_len - 1) {
1793 const int next = src + 1;
1795 if (last_sh->out.semantic_names[vue_offset + next] ==
1796 TGSI_SEMANTIC_BCOLOR &&
1797 last_sh->out.semantic_indices[vue_offset + next] == index) {
1798 ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
1799 ATTRIBUTE_SWIZZLE_SHIFT;
1807 /* if there is no COLOR, try BCOLOR */
1808 if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
1809 for (src = 0; src < vue_len; src++) {
1810 if (last_sh->out.semantic_names[vue_offset + src] !=
1811 TGSI_SEMANTIC_BCOLOR ||
1812 last_sh->out.semantic_indices[vue_offset + src] != index)
1820 if (src < vue_len) {
1821 attr_ctrl[dst] = ctrl;
1827 * The previous shader stage does not output this attribute. The
1828 * value is supposed to be undefined for fs, unless the attribute
1829 * goes through point sprite replacement or the attribute is
1830 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
1831 * attribute is picked.
1833 * We should update the fs code and omit the output of
1834 * TGSI_SEMANTIC_POSITION here.
1840 for (; dst < Elements(attr_ctrl); dst++)
1843 /* only the first 16 attributes can be remapped */
1844 for (dst = 16; dst < Elements(attr_ctrl); dst++)
1845 assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
1848 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1850 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1851 * 0 indicating no Vertex URB data to be read.
1853 * This field should be set to the minimum length required to read the
1854 * maximum source attribute. The maximum source attribute is indicated
1855 * by the maximum value of the enabled Attribute # Source Attribute if
1856 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1857 * enable is not set.
1859 * read_length = ceiling((max_source_attr+1)/2)
1861 * [errata] Corruption/Hang possible if length programmed larger than
1864 vue_len = max_src + 1;
1866 assert(fs->in.count <= 32);
1867 assert(vue_offset % 2 == 0);
1869 if (dev->gen >= ILO_GEN(7)) {
1870 dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1871 (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1872 vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
1875 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
1878 dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
1879 (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1880 vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
1883 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
1886 switch (rasterizer->sprite_coord_mode) {
1887 case PIPE_SPRITE_COORD_UPPER_LEFT:
1888 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
1890 case PIPE_SPRITE_COORD_LOWER_LEFT:
1891 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
1895 for (i = 0; i < 8; i++)
1896 dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
1898 dw[9] = point_sprite_enable;
1899 dw[10] = const_interp_enable;
1901 /* WrapShortest enables */
1907 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1908 const struct pipe_rasterizer_state *rasterizer,
1909 const struct ilo_shader *fs,
1910 const struct ilo_shader *last_sh,
1913 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1914 const uint8_t cmd_len = 20;
1915 uint32_t dw_raster[6], dw_sbe[13];
1917 ILO_GPE_VALID_GEN(dev, 6, 6);
1919 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1920 1, PIPE_FORMAT_NONE, false, dw_raster, Elements(dw_raster));
1921 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1922 fs, last_sh, dw_sbe, Elements(dw_sbe));
1924 ilo_cp_begin(cp, cmd_len);
1925 ilo_cp_write(cp, cmd | (cmd_len - 2));
1926 ilo_cp_write(cp, dw_sbe[0]);
1927 ilo_cp_write_multi(cp, dw_raster, 6);
1928 ilo_cp_write_multi(cp, &dw_sbe[1], 12);
1933 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1934 const struct ilo_shader *fs,
1936 const struct pipe_rasterizer_state *rasterizer,
1937 bool dual_blend, bool cc_may_kill,
1940 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1941 const uint8_t cmd_len = 9;
1942 const int num_samples = 1;
1943 uint32_t dw2, dw4, dw5, dw6;
1946 ILO_GPE_VALID_GEN(dev, 6, 6);
1948 /* see brwCreateContext() */
1949 max_threads = (dev->gt == 2) ? 80 : 40;
1952 ilo_cp_begin(cp, cmd_len);
1953 ilo_cp_write(cp, cmd | (cmd_len - 2));
1954 ilo_cp_write(cp, 0);
1955 ilo_cp_write(cp, 0);
1956 ilo_cp_write(cp, 0);
1957 ilo_cp_write(cp, 0);
1958 /* honor the valid range even if dispatching is disabled */
1959 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1960 ilo_cp_write(cp, 0);
1961 ilo_cp_write(cp, 0);
1962 ilo_cp_write(cp, 0);
1968 dw2 = (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
1970 dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
1972 dw4 = fs->in.start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
1973 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
1974 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
1977 dw4 |= GEN6_WM_STATISTICS_ENABLE;
1981 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1983 * "This bit (Statistics Enable) must be disabled if either of these
1984 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1985 * Resolve Enable or Depth Buffer Resolve Enable."
1987 dw4 |= GEN6_WM_DEPTH_CLEAR;
1988 dw4 |= GEN6_WM_DEPTH_RESOLVE;
1989 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
1992 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT |
1993 GEN6_WM_LINE_AA_WIDTH_2_0;
1996 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1998 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
1999 * PS kernel or color calculator has the ability to kill (discard)
2000 * pixels or samples, other than due to depth or stencil testing.
2001 * This bit is required to be ENABLED in the following situations:
2003 * The API pixel shader program contains "killpix" or "discard"
2004 * instructions, or other code in the pixel shader kernel that can
2005 * cause the final pixel mask to differ from the pixel mask received
2008 * A sampler with chroma key enabled with kill pixel mode is used by
2011 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
2014 * The pixel shader kernel generates and outputs oMask.
2016 * Note: As ClipDistance clipping is fully supported in hardware and
2017 * therefore not via PS instructions, there should be no need to
2018 * ENABLE this bit due to ClipDistance clipping."
2020 if (fs->has_kill || cc_may_kill)
2021 dw5 |= GEN6_WM_KILL_ENABLE;
2024 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
2026 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
2027 * field must be set to disabled."
2029 * TODO This is not checked yet.
2031 if (fs->out.has_pos)
2032 dw5 |= GEN6_WM_COMPUTED_DEPTH;
2035 dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
2040 * a) fs writes colors and color is not masked, or
2041 * b) fs writes depth, or
2045 dw5 |= GEN6_WM_DISPATCH_ENABLE;
2047 /* same value as in 3DSTATE_SF */
2048 if (rasterizer->line_smooth)
2049 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
2051 if (rasterizer->poly_stipple_enable)
2052 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
2053 if (rasterizer->line_stipple_enable)
2054 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
2057 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
2059 if (fs->dispatch_16)
2060 dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
2062 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
2064 dw6 = fs->in.count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
2065 GEN6_WM_POSOFFSET_NONE |
2066 GEN6_WM_POSITION_ZW_PIXEL |
2067 fs->in.barycentric_interpolation_mode <<
2068 GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
2070 if (rasterizer->bottom_edge_rule)
2071 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
2073 if (num_samples > 1) {
2074 if (rasterizer->multisample)
2075 dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
2077 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
2078 dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
2081 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL |
2082 GEN6_WM_MSDISPMODE_PERSAMPLE;
2085 ilo_cp_begin(cp, cmd_len);
2086 ilo_cp_write(cp, cmd | (cmd_len - 2));
2087 ilo_cp_write(cp, fs->cache_offset);
2088 ilo_cp_write(cp, dw2);
2089 ilo_cp_write(cp, 0); /* scratch */
2090 ilo_cp_write(cp, dw4);
2091 ilo_cp_write(cp, dw5);
2092 ilo_cp_write(cp, dw6);
2093 ilo_cp_write(cp, 0); /* kernel 1 */
2094 ilo_cp_write(cp, 0); /* kernel 2 */
2099 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
2100 const uint32_t *bufs, const int *sizes,
2101 int num_bufs, int max_read_length,
2102 uint32_t *dw, int num_dwords)
2104 unsigned enabled = 0x0;
2105 int total_read_length, i;
2107 assert(num_dwords == 4);
2109 total_read_length = 0;
2110 for (i = 0; i < 4; i++) {
2111 if (i < num_bufs && sizes[i]) {
2112 /* in 256-bit units minus one */
2113 const int read_len = (sizes[i] + 31) / 32 - 1;
2115 assert(bufs[i] % 32 == 0);
2116 assert(read_len < 32);
2119 dw[i] = bufs[i] | read_len;
2121 total_read_length += read_len + 1;
2128 assert(total_read_length <= max_read_length);
2134 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
2135 const uint32_t *bufs, const int *sizes,
2139 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
2140 const uint8_t cmd_len = 5;
2141 uint32_t buf_dw[4], buf_enabled;
2143 ILO_GPE_VALID_GEN(dev, 6, 6);
2144 assert(num_bufs <= 4);
2147 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2149 * "The sum of all four read length fields (each incremented to
2150 * represent the actual read length) must be less than or equal to 32"
2152 buf_enabled = gen6_fill_3dstate_constant(dev,
2153 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
2155 ilo_cp_begin(cp, cmd_len);
2156 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2157 ilo_cp_write(cp, buf_dw[0]);
2158 ilo_cp_write(cp, buf_dw[1]);
2159 ilo_cp_write(cp, buf_dw[2]);
2160 ilo_cp_write(cp, buf_dw[3]);
2165 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
2166 const uint32_t *bufs, const int *sizes,
2170 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
2171 const uint8_t cmd_len = 5;
2172 uint32_t buf_dw[4], buf_enabled;
2174 ILO_GPE_VALID_GEN(dev, 6, 6);
2175 assert(num_bufs <= 4);
2178 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2180 * "The sum of all four read length fields (each incremented to
2181 * represent the actual read length) must be less than or equal to 64"
2183 buf_enabled = gen6_fill_3dstate_constant(dev,
2184 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2186 ilo_cp_begin(cp, cmd_len);
2187 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2188 ilo_cp_write(cp, buf_dw[0]);
2189 ilo_cp_write(cp, buf_dw[1]);
2190 ilo_cp_write(cp, buf_dw[2]);
2191 ilo_cp_write(cp, buf_dw[3]);
2196 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
2197 const uint32_t *bufs, const int *sizes,
2201 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
2202 const uint8_t cmd_len = 5;
2203 uint32_t buf_dw[4], buf_enabled;
2205 ILO_GPE_VALID_GEN(dev, 6, 6);
2206 assert(num_bufs <= 4);
2209 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2211 * "The sum of all four read length fields (each incremented to
2212 * represent the actual read length) must be less than or equal to 64"
2214 buf_enabled = gen6_fill_3dstate_constant(dev,
2215 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2217 ilo_cp_begin(cp, cmd_len);
2218 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2219 ilo_cp_write(cp, buf_dw[0]);
2220 ilo_cp_write(cp, buf_dw[1]);
2221 ilo_cp_write(cp, buf_dw[2]);
2222 ilo_cp_write(cp, buf_dw[3]);
2227 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
2228 unsigned sample_mask,
2231 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
2232 const uint8_t cmd_len = 2;
2233 const unsigned valid_mask = 0xf;
2235 ILO_GPE_VALID_GEN(dev, 6, 6);
2237 sample_mask &= valid_mask;
2239 ilo_cp_begin(cp, cmd_len);
2240 ilo_cp_write(cp, cmd | (cmd_len - 2));
2241 ilo_cp_write(cp, sample_mask);
2246 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
2247 unsigned x, unsigned y,
2248 unsigned width, unsigned height,
2251 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
2252 const uint8_t cmd_len = 4;
2253 unsigned xmax = x + width - 1;
2254 unsigned ymax = y + height - 1;
2257 ILO_GPE_VALID_GEN(dev, 6, 7);
2259 if (dev->gen >= ILO_GEN(7)) {
2264 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2266 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2267 * must be an even number"
2274 if (x > rect_limit) x = rect_limit;
2275 if (y > rect_limit) y = rect_limit;
2276 if (xmax > rect_limit) xmax = rect_limit;
2277 if (ymax > rect_limit) ymax = rect_limit;
2279 ilo_cp_begin(cp, cmd_len);
2280 ilo_cp_write(cp, cmd | (cmd_len - 2));
2281 ilo_cp_write(cp, y << 16 | x);
2282 ilo_cp_write(cp, ymax << 16 | xmax);
2285 * There is no need to set the origin. It is intended to support front
2288 ilo_cp_write(cp, 0);
2294 gen6_get_depth_buffer_format(const struct ilo_dev_info *dev,
2295 enum pipe_format format,
2297 bool separate_stencil,
2303 ILO_GPE_VALID_GEN(dev, 6, 7);
2306 *has_stencil = false;
2309 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2311 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2312 * Surface Format of the depth buffer cannot be
2313 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2314 * requires the separate stencil buffer."
2316 * From the Ironlake PRM, volume 2 part 1, page 330:
2318 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2319 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2321 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2322 * is indeed used, the depth values output by the fragment shaders will
2323 * be different when read back.
2325 * As for GEN7+, separate_stencil_buffer is always true.
2328 case PIPE_FORMAT_Z16_UNORM:
2329 depth_format = BRW_DEPTHFORMAT_D16_UNORM;
2331 case PIPE_FORMAT_Z32_FLOAT:
2332 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2334 case PIPE_FORMAT_Z24X8_UNORM:
2335 depth_format = (separate_stencil) ?
2336 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2337 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2339 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2340 depth_format = (separate_stencil) ?
2341 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2342 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2343 *has_stencil = true;
2345 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2346 depth_format = (separate_stencil) ?
2347 BRW_DEPTHFORMAT_D32_FLOAT :
2348 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
2349 *has_stencil = true;
2351 case PIPE_FORMAT_S8_UINT:
2352 if (separate_stencil) {
2353 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2355 *has_stencil = true;
2360 assert(!"unsupported depth/stencil format");
2361 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2363 *has_stencil = false;
2367 return depth_format;
2371 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2372 const struct pipe_surface *surface,
2375 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2376 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2377 const uint8_t cmd_len = 7;
2378 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
2379 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
2380 const bool hiz = false;
2381 struct ilo_texture *tex;
2382 uint32_t dw1, dw3, dw4, dw6;
2383 uint32_t slice_offset, x_offset, y_offset;
2384 int surface_type, depth_format;
2385 unsigned lod, first_layer, num_layers;
2386 unsigned width, height, depth;
2387 bool separate_stencil, has_depth, has_stencil;
2389 ILO_GPE_VALID_GEN(dev, 6, 7);
2391 if (dev->gen >= ILO_GEN(7)) {
2392 separate_stencil = true;
2396 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2398 * "This field (Separate Stencil Buffer Enable) must be set to the
2399 * same value (enabled or disabled) as Hierarchical Depth Buffer
2402 separate_stencil = hiz;
2406 depth_format = gen6_get_depth_buffer_format(dev,
2407 surface->format, hiz, separate_stencil, &has_depth, &has_stencil);
2411 has_stencil = false;
2414 if (!has_depth && !has_stencil) {
2415 dw1 = BRW_SURFACE_NULL << 29 |
2416 BRW_DEPTHFORMAT_D32_FLOAT << 18;
2419 if (dev->gen == ILO_GEN(6)) {
2424 ilo_cp_begin(cp, cmd_len);
2425 ilo_cp_write(cp, cmd | (cmd_len - 2));
2426 ilo_cp_write(cp, dw1);
2427 ilo_cp_write(cp, 0);
2428 ilo_cp_write(cp, 0);
2429 ilo_cp_write(cp, 0);
2430 ilo_cp_write(cp, 0);
2431 ilo_cp_write(cp, 0);
2437 tex = ilo_texture(surface->texture);
2439 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
2440 lod = surface->u.tex.level;
2441 first_layer = surface->u.tex.first_layer;
2442 num_layers = surface->u.tex.last_layer - first_layer + 1;
2444 width = tex->base.width0;
2445 height = tex->base.height0;
2446 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
2447 tex->base.depth0 : num_layers;
2449 if (surface_type == BRW_SURFACE_CUBE) {
2451 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
2453 * "For Other Surfaces (Cube Surfaces):
2454 * This field (Minimum Array Element) is ignored."
2456 * "For Other Surfaces (Cube Surfaces):
2457 * This field (Render Target View Extent) is ignored."
2459 * As such, we cannot set first_layer and num_layers on cube surfaces.
2460 * To work around that, treat it as a 2D surface.
2462 surface_type = BRW_SURFACE_2D;
2466 * we always treat the resource as non-mipmapped and set the slice/x/y
2470 /* no layered rendering */
2471 assert(num_layers == 1);
2473 slice_offset = ilo_texture_get_slice_offset(tex,
2474 lod, first_layer, &x_offset, &y_offset);
2477 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2479 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2480 * Coordinate Offset X) must be zero to ensure correct alignment"
2482 * XXX Skip the check for gen6, which seems to be fine. We need to make
2483 * sure that does not happen eventually.
2485 if (dev->gen >= ILO_GEN(7)) {
2486 assert((x_offset & 7) == 0 && (y_offset & 7) == 0);
2491 /* the size of the layer */
2492 width = u_minify(width, lod);
2493 height = u_minify(height, lod);
2494 if (surface_type == BRW_SURFACE_3D)
2495 depth = u_minify(depth, lod);
2505 /* we have to treat them as 2D surfaces */
2506 if (surface_type == BRW_SURFACE_CUBE) {
2507 assert(tex->base.width0 == tex->base.height0);
2508 /* we will set slice_offset to point to the single face */
2509 surface_type = BRW_SURFACE_2D;
2511 else if (surface_type == BRW_SURFACE_1D && height > 1) {
2512 assert(tex->base.height0 == 1);
2513 surface_type = BRW_SURFACE_2D;
2522 /* required for GEN6+ */
2523 assert(tex->tiling == INTEL_TILING_Y);
2524 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2525 tex->bo_stride % 128 == 0);
2526 assert(width <= tex->bo_stride);
2528 switch (surface_type) {
2529 case BRW_SURFACE_1D:
2530 assert(width <= max_2d_size && height == 1 &&
2531 depth <= max_array_size);
2532 assert(first_layer < max_array_size - 1 &&
2533 num_layers <= max_array_size);
2535 case BRW_SURFACE_2D:
2536 assert(width <= max_2d_size && height <= max_2d_size &&
2537 depth <= max_array_size);
2538 assert(first_layer < max_array_size - 1 &&
2539 num_layers <= max_array_size);
2541 case BRW_SURFACE_3D:
2542 assert(width <= 2048 && height <= 2048 && depth <= 2048);
2543 assert(first_layer < 2048 && num_layers <= max_array_size);
2544 assert(x_offset == 0 && y_offset == 0);
2546 case BRW_SURFACE_CUBE:
2547 assert(width <= max_2d_size && height <= max_2d_size && depth == 1);
2548 assert(first_layer == 0 && num_layers == 1);
2549 assert(width == height);
2550 assert(x_offset == 0 && y_offset == 0);
2553 assert(!"unexpected depth surface type");
2557 dw1 = surface_type << 29 |
2558 depth_format << 18 |
2559 (tex->bo_stride - 1);
2561 if (dev->gen >= ILO_GEN(7)) {
2571 dw3 = (height - 1) << 18 |
2575 dw4 = (depth - 1) << 21 |
2578 dw6 = (num_layers - 1) << 21;
2581 dw1 |= (tex->tiling != INTEL_TILING_NONE) << 27 |
2582 (tex->tiling == INTEL_TILING_Y) << 26;
2589 dw3 = (height - 1) << 19 |
2592 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
2594 dw4 = (depth - 1) << 21 |
2596 (num_layers - 1) << 1;
2601 ilo_cp_begin(cp, cmd_len);
2602 ilo_cp_write(cp, cmd | (cmd_len - 2));
2603 ilo_cp_write(cp, dw1);
2606 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2607 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2610 ilo_cp_write(cp, 0);
2613 ilo_cp_write(cp, dw3);
2614 ilo_cp_write(cp, dw4);
2615 ilo_cp_write(cp, y_offset << 16 | x_offset);
2616 ilo_cp_write(cp, dw6);
2621 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
2622 int x_offset, int y_offset,
2625 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
2626 const uint8_t cmd_len = 2;
2628 ILO_GPE_VALID_GEN(dev, 6, 7);
2629 assert(x_offset >= 0 && x_offset <= 31);
2630 assert(y_offset >= 0 && y_offset <= 31);
2632 ilo_cp_begin(cp, cmd_len);
2633 ilo_cp_write(cp, cmd | (cmd_len - 2));
2634 ilo_cp_write(cp, x_offset << 8 | y_offset);
2639 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
2640 const struct pipe_poly_stipple *pattern,
2643 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
2644 const uint8_t cmd_len = 33;
2647 ILO_GPE_VALID_GEN(dev, 6, 7);
2648 assert(Elements(pattern->stipple) == 32);
2650 ilo_cp_begin(cp, cmd_len);
2651 ilo_cp_write(cp, cmd | (cmd_len - 2));
2652 for (i = 0; i < 32; i++)
2653 ilo_cp_write(cp, pattern->stipple[i]);
2658 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
2659 unsigned pattern, unsigned factor,
2662 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
2663 const uint8_t cmd_len = 3;
2666 ILO_GPE_VALID_GEN(dev, 6, 7);
2667 assert((pattern & 0xffff) == pattern);
2668 assert(factor >= 1 && factor <= 256);
2670 ilo_cp_begin(cp, cmd_len);
2671 ilo_cp_write(cp, cmd | (cmd_len - 2));
2672 ilo_cp_write(cp, pattern);
2674 if (dev->gen >= ILO_GEN(7)) {
2676 inverse = (unsigned) (65536.0f / factor);
2677 ilo_cp_write(cp, inverse << 15 | factor);
2681 inverse = (unsigned) (8192.0f / factor);
2682 ilo_cp_write(cp, inverse << 16 | factor);
2689 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
2692 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
2693 const uint8_t cmd_len = 3;
2695 ILO_GPE_VALID_GEN(dev, 6, 7);
2697 ilo_cp_begin(cp, cmd_len);
2698 ilo_cp_write(cp, cmd | (cmd_len - 2));
2699 ilo_cp_write(cp, 0 << 16 | 0);
2700 ilo_cp_write(cp, 0 << 16 | 0);
2705 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
2706 int index, unsigned svbi,
2708 bool load_vertex_count,
2711 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
2712 const uint8_t cmd_len = 4;
2715 ILO_GPE_VALID_GEN(dev, 6, 6);
2716 assert(index >= 0 && index < 4);
2718 dw1 = index << SVB_INDEX_SHIFT;
2719 if (load_vertex_count)
2720 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
2722 ilo_cp_begin(cp, cmd_len);
2723 ilo_cp_write(cp, cmd | (cmd_len - 2));
2724 ilo_cp_write(cp, dw1);
2725 ilo_cp_write(cp, svbi);
2726 ilo_cp_write(cp, max_svbi);
2731 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
2733 const uint32_t *packed_sample_pos,
2734 bool pixel_location_center,
2737 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
2738 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
2739 uint32_t dw1, dw2, dw3;
2741 ILO_GPE_VALID_GEN(dev, 6, 7);
2743 dw1 = (pixel_location_center) ?
2744 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
2746 switch (num_samples) {
2749 dw1 |= MS_NUMSAMPLES_1;
2754 dw1 |= MS_NUMSAMPLES_4;
2755 dw2 = packed_sample_pos[0];
2759 assert(dev->gen >= ILO_GEN(7));
2760 dw1 |= MS_NUMSAMPLES_8;
2761 dw2 = packed_sample_pos[0];
2762 dw3 = packed_sample_pos[1];
2765 assert(!"unsupported sample count");
2766 dw1 |= MS_NUMSAMPLES_1;
2772 ilo_cp_begin(cp, cmd_len);
2773 ilo_cp_write(cp, cmd | (cmd_len - 2));
2774 ilo_cp_write(cp, dw1);
2775 ilo_cp_write(cp, dw2);
2776 if (dev->gen >= ILO_GEN(7))
2777 ilo_cp_write(cp, dw3);
2782 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
2783 const struct pipe_surface *surface,
2786 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2787 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2788 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2789 const uint8_t cmd_len = 3;
2790 struct ilo_texture *tex;
2791 uint32_t slice_offset, x_offset, y_offset;
2794 ILO_GPE_VALID_GEN(dev, 6, 7);
2796 tex = (surface) ? ilo_texture(surface->texture) : NULL;
2797 if (tex && surface->format != PIPE_FORMAT_S8_UINT)
2798 tex = tex->separate_s8;
2801 ilo_cp_begin(cp, cmd_len);
2802 ilo_cp_write(cp, cmd | (cmd_len - 2));
2803 ilo_cp_write(cp, 0);
2804 ilo_cp_write(cp, 0);
2811 slice_offset = ilo_texture_get_slice_offset(tex,
2812 surface->u.tex.level, surface->u.tex.first_layer,
2813 &x_offset, &y_offset);
2814 /* XXX X/Y offsets inherit from 3DSTATE_DEPTH_BUFFER */
2823 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2825 * "The pitch must be set to 2x the value computed based on width, as
2826 * the stencil buffer is stored with two rows interleaved."
2828 * According to the classic driver, we need to do the same for GEN7+ even
2829 * though the Ivy Bridge PRM does not say anything about it.
2831 pitch = 2 * tex->bo_stride;
2832 assert(pitch > 0 && pitch < 128 * 1024 && pitch % 128 == 0);
2834 ilo_cp_begin(cp, cmd_len);
2835 ilo_cp_write(cp, cmd | (cmd_len - 2));
2836 ilo_cp_write(cp, pitch - 1);
2837 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2838 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2843 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2844 const struct pipe_surface *surface,
2847 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2848 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2849 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2850 const uint8_t cmd_len = 3;
2851 const bool hiz = false;
2852 struct ilo_texture *tex;
2853 uint32_t slice_offset;
2855 ILO_GPE_VALID_GEN(dev, 6, 7);
2857 if (!surface || !hiz) {
2858 ilo_cp_begin(cp, cmd_len);
2859 ilo_cp_write(cp, cmd | (cmd_len - 2));
2860 ilo_cp_write(cp, 0);
2861 ilo_cp_write(cp, 0);
2867 tex = ilo_texture(surface->texture);
2872 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2873 tex->bo_stride % 128 == 0);
2875 ilo_cp_begin(cp, cmd_len);
2876 ilo_cp_write(cp, cmd | (cmd_len - 2));
2877 ilo_cp_write(cp, tex->bo_stride - 1);
2878 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2879 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2884 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
2888 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
2889 const uint8_t cmd_len = 2;
2891 ILO_GPE_VALID_GEN(dev, 6, 6);
2893 ilo_cp_begin(cp, cmd_len);
2894 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2895 GEN5_DEPTH_CLEAR_VALID);
2896 ilo_cp_write(cp, clear_val);
2901 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
2903 struct intel_bo *bo, uint32_t bo_offset,
2907 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
2908 const uint8_t cmd_len = (write_qword) ? 5 : 4;
2909 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
2910 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
2912 ILO_GPE_VALID_GEN(dev, 6, 7);
2914 if (dw1 & PIPE_CONTROL_CS_STALL) {
2916 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2918 * "1 of the following must also be set (when CS stall is set):
2920 * * Depth Cache Flush Enable ([0] of DW1)
2921 * * Stall at Pixel Scoreboard ([1] of DW1)
2922 * * Depth Stall ([13] of DW1)
2923 * * Post-Sync Operation ([13] of DW1)
2924 * * Render Target Cache Flush Enable ([12] of DW1)
2925 * * Notify Enable ([8] of DW1)"
2927 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
2929 * "One of the following must also be set (when CS stall is set):
2931 * * Render Target Cache Flush Enable ([12] of DW1)
2932 * * Depth Cache Flush Enable ([0] of DW1)
2933 * * Stall at Pixel Scoreboard ([1] of DW1)
2934 * * Depth Stall ([13] of DW1)
2935 * * Post-Sync Operation ([13] of DW1)"
2937 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
2938 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2939 PIPE_CONTROL_STALL_AT_SCOREBOARD |
2940 PIPE_CONTROL_DEPTH_STALL;
2943 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
2944 PIPE_CONTROL_WRITE_DEPTH_COUNT |
2945 PIPE_CONTROL_WRITE_TIMESTAMP;
2947 if (dev->gen == ILO_GEN(6))
2948 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
2950 assert(dw1 & bit_test);
2953 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
2955 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2957 * "Following bits must be clear (when Depth Stall is set):
2959 * * Render Target Cache Flush Enable ([12] of DW1)
2960 * * Depth Cache Flush Enable ([0] of DW1)"
2962 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
2963 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
2966 ilo_cp_begin(cp, cmd_len);
2967 ilo_cp_write(cp, cmd | (cmd_len - 2));
2968 ilo_cp_write(cp, dw1);
2969 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
2970 ilo_cp_write(cp, 0);
2972 ilo_cp_write(cp, 0);
2977 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
2978 const struct pipe_draw_info *info,
2982 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
2983 const uint8_t cmd_len = 6;
2984 const int prim = (rectlist) ?
2985 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
2986 const int vb_access = (info->indexed) ?
2987 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
2988 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
2990 ILO_GPE_VALID_GEN(dev, 6, 6);
2992 ilo_cp_begin(cp, cmd_len);
2993 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2994 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
2996 ilo_cp_write(cp, info->count);
2997 ilo_cp_write(cp, info->start);
2998 ilo_cp_write(cp, info->instance_count);
2999 ilo_cp_write(cp, info->start_instance);
3000 ilo_cp_write(cp, info->index_bias);
3005 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
3006 const struct ilo_shader **cs,
3007 uint32_t *sampler_state,
3009 uint32_t *binding_table_state,
3015 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
3017 * "(Interface Descriptor Total Length) This field must have the same
3018 * alignment as the Interface Descriptor Data Start Address.
3020 * It must be DQWord (32-byte) aligned..."
3022 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
3024 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
3025 * aligned address of the Interface Descriptor data."
3027 const int state_align = 32 / 4;
3028 const int state_len = (32 / 4) * num_ids;
3029 uint32_t state_offset, *dw;
3032 ILO_GPE_VALID_GEN(dev, 6, 6);
3034 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
3035 state_len, state_align, &state_offset);
3037 for (i = 0; i < num_ids; i++) {
3040 curbe_read_len = (cs[i]->pcb.clip_state_size + 31) / 32;
3042 dw[0] = cs[i]->cache_offset;
3043 dw[1] = 1 << 18; /* SPF */
3044 dw[2] = sampler_state[i] |
3045 (num_samplers[i] + 3) / 4 << 2;
3046 dw[3] = binding_table_state[i] |
3048 dw[4] = curbe_read_len << 16 | /* CURBE Read Length */
3049 0; /* CURBE Read Offset */
3050 dw[5] = 0; /* Barrier ID */
3057 return state_offset;
3061 viewport_get_guardband(const struct ilo_dev_info *dev,
3062 int center_x, int center_y,
3063 int *min_gbx, int *max_gbx,
3064 int *min_gby, int *max_gby)
3067 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
3069 * "Per-Device Guardband Extents
3071 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
3072 * - Maximum Post-Clamp Delta (X or Y): 16K"
3074 * "In addition, in order to be correctly rendered, objects must have a
3075 * screenspace bounding box not exceeding 8K in the X or Y direction.
3076 * This additional restriction must also be comprehended by software,
3077 * i.e., enforced by use of clipping."
3079 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
3081 * "Per-Device Guardband Extents
3083 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
3084 * - Maximum Post-Clamp Delta (X or Y): N/A"
3086 * "In addition, in order to be correctly rendered, objects must have a
3087 * screenspace bounding box not exceeding 8K in the X or Y direction.
3088 * This additional restriction must also be comprehended by software,
3089 * i.e., enforced by use of clipping."
3091 * Combined, the bounding box of any object can not exceed 8K in both
3094 * Below we set the guardband as a squre of length 8K, centered at where
3095 * the viewport is. This makes sure all objects passing the GB test are
3096 * valid to the renderer, and those failing the XY clipping have a
3097 * better chance of passing the GB test.
3099 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
3100 const int half_len = 8192 / 2;
3102 /* make sure the guardband is within the valid range */
3103 if (center_x - half_len < -max_extent)
3104 center_x = -max_extent + half_len;
3105 else if (center_x + half_len > max_extent - 1)
3106 center_x = max_extent - half_len;
3108 if (center_y - half_len < -max_extent)
3109 center_y = -max_extent + half_len;
3110 else if (center_y + half_len > max_extent - 1)
3111 center_y = max_extent - half_len;
3113 *min_gbx = (float) (center_x - half_len);
3114 *max_gbx = (float) (center_x + half_len);
3115 *min_gby = (float) (center_y - half_len);
3116 *max_gby = (float) (center_y + half_len);
3120 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
3121 const struct pipe_viewport_state *state,
3122 struct ilo_viewport_cso *vp)
3124 const float scale_x = fabs(state->scale[0]);
3125 const float scale_y = fabs(state->scale[1]);
3126 const float scale_z = fabs(state->scale[2]);
3127 int min_gbx, max_gbx, min_gby, max_gby;
3129 ILO_GPE_VALID_GEN(dev, 6, 7);
3131 viewport_get_guardband(dev,
3132 (int) state->translate[0],
3133 (int) state->translate[1],
3134 &min_gbx, &max_gbx, &min_gby, &max_gby);
3137 vp->m00 = state->scale[0];
3138 vp->m11 = state->scale[1];
3139 vp->m22 = state->scale[2];
3140 vp->m30 = state->translate[0];
3141 vp->m31 = state->translate[1];
3142 vp->m32 = state->translate[2];
3144 /* guardband in NDC space */
3145 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
3146 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
3147 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
3148 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
3150 /* viewport in screen space */
3151 vp->min_x = scale_x * -1.0f + state->translate[0];
3152 vp->max_x = scale_x * 1.0f + state->translate[0];
3153 vp->min_y = scale_y * -1.0f + state->translate[1];
3154 vp->max_y = scale_y * 1.0f + state->translate[1];
3155 vp->min_z = scale_z * -1.0f + state->translate[2];
3156 vp->max_z = scale_z * 1.0f + state->translate[2];
3160 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
3161 const struct ilo_viewport_cso *viewports,
3162 unsigned num_viewports,
3165 const int state_align = 32 / 4;
3166 const int state_len = 8 * num_viewports;
3167 uint32_t state_offset, *dw;
3170 ILO_GPE_VALID_GEN(dev, 6, 6);
3173 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3175 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3176 * stored as an array of up to 16 elements..."
3178 assert(num_viewports && num_viewports <= 16);
3180 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
3181 state_len, state_align, &state_offset);
3183 for (i = 0; i < num_viewports; i++) {
3184 const struct ilo_viewport_cso *vp = &viewports[i];
3186 dw[0] = fui(vp->m00);
3187 dw[1] = fui(vp->m11);
3188 dw[2] = fui(vp->m22);
3189 dw[3] = fui(vp->m30);
3190 dw[4] = fui(vp->m31);
3191 dw[5] = fui(vp->m32);
3198 return state_offset;
3202 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
3203 const struct ilo_viewport_cso *viewports,
3204 unsigned num_viewports,
3207 const int state_align = 32 / 4;
3208 const int state_len = 4 * num_viewports;
3209 uint32_t state_offset, *dw;
3212 ILO_GPE_VALID_GEN(dev, 6, 6);
3215 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3217 * "The viewport-related state is stored as an array of up to 16
3220 assert(num_viewports && num_viewports <= 16);
3222 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
3223 state_len, state_align, &state_offset);
3225 for (i = 0; i < num_viewports; i++) {
3226 const struct ilo_viewport_cso *vp = &viewports[i];
3228 dw[0] = fui(vp->min_gbx);
3229 dw[1] = fui(vp->max_gbx);
3230 dw[2] = fui(vp->min_gby);
3231 dw[3] = fui(vp->max_gby);
3236 return state_offset;
3240 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
3241 const struct ilo_viewport_cso *viewports,
3242 unsigned num_viewports,
3245 const int state_align = 32 / 4;
3246 const int state_len = 2 * num_viewports;
3247 uint32_t state_offset, *dw;
3250 ILO_GPE_VALID_GEN(dev, 6, 7);
3253 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3255 * "The viewport state is stored as an array of up to 16 elements..."
3257 assert(num_viewports && num_viewports <= 16);
3259 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
3260 state_len, state_align, &state_offset);
3262 for (i = 0; i < num_viewports; i++) {
3263 const struct ilo_viewport_cso *vp = &viewports[i];
3265 dw[0] = fui(vp->min_z);
3266 dw[1] = fui(vp->max_z);
3271 return state_offset;
3275 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
3276 const struct pipe_stencil_ref *stencil_ref,
3278 const struct pipe_blend_color *blend_color,
3281 const int state_align = 64 / 4;
3282 const int state_len = 6;
3283 uint32_t state_offset, *dw;
3285 ILO_GPE_VALID_GEN(dev, 6, 7);
3287 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
3288 state_len, state_align, &state_offset);
3290 dw[0] = stencil_ref->ref_value[0] << 24 |
3291 stencil_ref->ref_value[1] << 16 |
3292 BRW_ALPHATEST_FORMAT_UNORM8;
3293 dw[1] = float_to_ubyte(alpha_ref);
3294 dw[2] = fui(blend_color->color[0]);
3295 dw[3] = fui(blend_color->color[1]);
3296 dw[4] = fui(blend_color->color[2]);
3297 dw[5] = fui(blend_color->color[3]);
3299 return state_offset;
3303 gen6_blend_factor_dst_alpha_forced_one(int factor)
3306 case BRW_BLENDFACTOR_DST_ALPHA:
3307 return BRW_BLENDFACTOR_ONE;
3308 case BRW_BLENDFACTOR_INV_DST_ALPHA:
3309 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
3310 return BRW_BLENDFACTOR_ZERO;
3317 blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
3318 const struct pipe_rt_blend_state *rt,
3319 bool dst_alpha_forced_one)
3321 int rgb_src, rgb_dst, a_src, a_dst;
3324 if (!rt->blend_enable)
3327 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
3328 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
3329 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
3330 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
3332 if (dst_alpha_forced_one) {
3333 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
3334 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
3335 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
3336 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
3340 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
3343 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
3347 if (rt->rgb_func != rt->alpha_func ||
3348 rgb_src != a_src || rgb_dst != a_dst)
3355 ilo_gpe_init_blend(const struct ilo_dev_info *dev,
3356 const struct pipe_blend_state *state,
3357 struct ilo_blend_state *blend)
3359 unsigned num_cso, i;
3361 ILO_GPE_VALID_GEN(dev, 6, 7);
3363 if (state->independent_blend_enable) {
3364 num_cso = Elements(blend->cso);
3367 memset(blend->cso, 0, sizeof(blend->cso));
3371 blend->independent_blend_enable = state->independent_blend_enable;
3372 blend->alpha_to_coverage = state->alpha_to_coverage;
3373 blend->dual_blend = false;
3375 for (i = 0; i < num_cso; i++) {
3376 const struct pipe_rt_blend_state *rt = &state->rt[i];
3377 struct ilo_blend_cso *cso = &blend->cso[i];
3380 cso->payload[0] = 0;
3381 cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
3384 if (!(rt->colormask & PIPE_MASK_A))
3385 cso->payload[1] |= 1 << 27;
3386 if (!(rt->colormask & PIPE_MASK_R))
3387 cso->payload[1] |= 1 << 26;
3388 if (!(rt->colormask & PIPE_MASK_G))
3389 cso->payload[1] |= 1 << 25;
3390 if (!(rt->colormask & PIPE_MASK_B))
3391 cso->payload[1] |= 1 << 24;
3394 cso->payload[1] |= 1 << 12;
3397 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3399 * "Color Buffer Blending and Logic Ops must not be enabled
3400 * simultaneously, or behavior is UNDEFINED."
3402 * Since state->logicop_enable takes precedence over rt->blend_enable,
3403 * no special care is needed.
3405 if (state->logicop_enable) {
3406 cso->dw_logicop = 1 << 22 |
3407 gen6_translate_pipe_logicop(state->logicop_func) << 18;
3410 cso->dw_blend_dst_alpha_forced_one = 0;
3415 cso->dw_logicop = 0;
3417 cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
3418 cso->dw_blend_dst_alpha_forced_one =
3419 blend_get_rt_blend_enable(dev, rt, true);
3421 dual_blend = (rt->blend_enable &&
3422 util_blend_state_is_dual(state, i));
3425 cso->dw_alpha_mod = 0;
3427 if (state->alpha_to_coverage) {
3428 cso->dw_alpha_mod |= 1 << 31;
3430 if (dev->gen >= ILO_GEN(7))
3431 cso->dw_alpha_mod |= 1 << 29;
3435 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3437 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
3438 * must be disabled."
3440 if (state->alpha_to_one && !dual_blend)
3441 cso->dw_alpha_mod |= 1 << 30;
3444 blend->dual_blend = true;
3449 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
3450 const struct ilo_blend_state *blend,
3451 const struct ilo_fb_state *fb,
3452 const struct pipe_alpha_state *alpha,
3455 const int state_align = 64 / 4;
3457 uint32_t state_offset, *dw;
3458 unsigned num_targets, i;
3460 ILO_GPE_VALID_GEN(dev, 6, 7);
3463 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3465 * "The blend state is stored as an array of up to 8 elements..."
3467 num_targets = fb->state.nr_cbufs;
3468 assert(num_targets <= 8);
3471 if (!alpha->enabled)
3473 /* to be able to reference alpha func */
3477 state_len = 2 * num_targets;
3479 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
3480 state_len, state_align, &state_offset);
3482 for (i = 0; i < num_targets; i++) {
3483 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
3484 const struct ilo_blend_cso *cso = &blend->cso[idx];
3485 const int num_samples = fb->num_samples;
3486 const struct util_format_description *format_desc =
3487 (idx < fb->state.nr_cbufs) ?
3488 util_format_description(fb->state.cbufs[idx]->format) : NULL;
3489 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
3492 rt_is_pure_integer = false;
3493 rt_dst_alpha_forced_one = false;
3498 switch (format_desc->format) {
3499 case PIPE_FORMAT_B8G8R8X8_UNORM:
3500 /* force alpha to one when the HW format has alpha */
3501 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
3502 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
3503 rt_dst_alpha_forced_one = true;
3509 for (ch = 0; ch < 4; ch++) {
3510 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
3513 if (format_desc->channel[ch].pure_integer) {
3514 rt_is_unorm = false;
3515 rt_is_pure_integer = true;
3519 if (!format_desc->channel[ch].normalized ||
3520 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
3521 rt_is_unorm = false;
3525 dw[0] = cso->payload[0];
3526 dw[1] = cso->payload[1];
3528 if (!rt_is_pure_integer) {
3529 if (rt_dst_alpha_forced_one)
3530 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
3532 dw[0] |= cso->dw_blend;
3536 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3538 * "Logic Ops are only supported on *_UNORM surfaces (excluding
3539 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3541 * Since logicop is ignored for non-UNORM color buffers, no special care
3545 dw[1] |= cso->dw_logicop;
3548 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3550 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3551 * Dither both must be disabled."
3553 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3554 * requires that anyway.
3556 if (num_samples > 1)
3557 dw[1] |= cso->dw_alpha_mod;
3560 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3562 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3565 if (alpha->enabled && !rt_is_pure_integer) {
3567 gen6_translate_dsa_func(alpha->func) << 13;
3573 return state_offset;
3577 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
3578 const struct pipe_depth_stencil_alpha_state *state,
3579 struct ilo_dsa_state *dsa)
3581 const struct pipe_depth_state *depth = &state->depth;
3582 const struct pipe_stencil_state *stencil0 = &state->stencil[0];
3583 const struct pipe_stencil_state *stencil1 = &state->stencil[1];
3586 ILO_GPE_VALID_GEN(dev, 6, 7);
3588 /* copy alpha state for later use */
3589 dsa->alpha = state->alpha;
3591 STATIC_ASSERT(Elements(dsa->payload) >= 3);
3595 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3597 * "If the Depth Buffer is either undefined or does not have a surface
3598 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3599 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3601 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3603 * "This field (Stencil Test Enable) cannot be enabled if
3604 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3606 * TODO We do not check these yet.
3608 if (stencil0->enabled) {
3610 gen6_translate_dsa_func(stencil0->func) << 28 |
3611 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
3612 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
3613 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
3614 if (stencil0->writemask)
3617 dw[1] = stencil0->valuemask << 24 |
3618 stencil0->writemask << 16;
3620 if (stencil1->enabled) {
3622 gen6_translate_dsa_func(stencil1->func) << 12 |
3623 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
3624 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
3625 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
3626 if (stencil1->writemask)
3629 dw[1] |= stencil1->valuemask << 8 |
3630 stencil1->writemask;
3639 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3641 * "Enabling the Depth Test function without defining a Depth Buffer is
3644 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3646 * "A Depth Buffer must be defined before enabling writes to it, or
3647 * operation is UNDEFINED."
3649 * TODO We do not check these yet.
3651 dw[2] = depth->enabled << 31 |
3652 depth->writemask << 26;
3654 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
3656 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
3660 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
3661 const struct ilo_dsa_state *dsa,
3664 const int state_align = 64 / 4;
3665 const int state_len = 3;
3666 uint32_t state_offset, *dw;
3669 ILO_GPE_VALID_GEN(dev, 6, 7);
3671 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
3672 state_len, state_align, &state_offset);
3674 dw[0] = dsa->payload[0];
3675 dw[1] = dsa->payload[1];
3676 dw[2] = dsa->payload[2];
3678 return state_offset;
3682 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
3683 unsigned start_slot,
3684 unsigned num_states,
3685 const struct pipe_scissor_state *states,
3686 struct ilo_scissor_state *scissor)
3690 ILO_GPE_VALID_GEN(dev, 6, 7);
3692 for (i = 0; i < num_states; i++) {
3693 uint16_t min_x, min_y, max_x, max_y;
3695 /* both max and min are inclusive in SCISSOR_RECT */
3696 if (states[i].minx < states[i].maxx &&
3697 states[i].miny < states[i].maxy) {
3698 min_x = states[i].minx;
3699 min_y = states[i].miny;
3700 max_x = states[i].maxx - 1;
3701 max_y = states[i].maxy - 1;
3704 /* we have to make min greater than max */
3711 scissor->payload[start_slot * 2 + 0] = min_y << 16 | min_x;
3712 scissor->payload[start_slot * 2 + 1] = max_y << 16 | max_x;
3718 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
3719 struct ilo_scissor_state *scissor)
3723 for (i = 0; i < Elements(scissor->payload); i += 2) {
3724 scissor->payload[i + 0] = 1 << 16 | 1;
3725 scissor->payload[i + 1] = 0;
3730 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
3731 const struct ilo_scissor_state *scissor,
3732 unsigned num_viewports,
3735 const int state_align = 32 / 4;
3736 const int state_len = 2 * num_viewports;
3737 uint32_t state_offset, *dw;
3739 ILO_GPE_VALID_GEN(dev, 6, 7);
3742 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3744 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3745 * stored as an array of up to 16 elements..."
3747 assert(num_viewports && num_viewports <= 16);
3749 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
3750 state_len, state_align, &state_offset);
3752 memcpy(dw, scissor->payload, state_len * 4);
3754 return state_offset;
3758 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
3759 uint32_t *surface_states,
3760 int num_surface_states,
3763 const int state_align = 32 / 4;
3764 const int state_len = num_surface_states;
3765 uint32_t state_offset, *dw;
3767 ILO_GPE_VALID_GEN(dev, 6, 7);
3770 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3772 * "It is stored as an array of up to 256 elements..."
3774 assert(num_surface_states <= 256);
3776 if (!num_surface_states)
3779 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
3780 state_len, state_align, &state_offset);
3781 memcpy(dw, surface_states,
3782 num_surface_states * sizeof(surface_states[0]));
3784 return state_offset;
3788 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
3789 unsigned width, unsigned height,
3790 unsigned depth, unsigned level,
3791 struct ilo_view_surface *surf)
3795 ILO_GPE_VALID_GEN(dev, 6, 6);
3798 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3800 * "A null surface will be used in instances where an actual surface is
3801 * not bound. When a write message is generated to a null surface, no
3802 * actual surface is written to. When a read message (including any
3803 * sampling engine message) is generated to a null surface, the result
3804 * is all zeros. Note that a null surface type is allowed to be used
3805 * with all messages, even if it is not specificially indicated as
3806 * supported. All of the remaining fields in surface state are ignored
3807 * for null surfaces, with the following exceptions:
3809 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3810 * depth buffer's corresponding state for all render target
3811 * surfaces, including null.
3812 * * Surface Format must be R8G8B8A8_UNORM."
3814 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3816 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3820 STATIC_ASSERT(Elements(surf->payload) >= 6);
3823 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
3824 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
3828 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3829 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3830 level << BRW_SURFACE_LOD_SHIFT;
3832 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3842 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
3843 const struct ilo_buffer *buf,
3844 unsigned offset, unsigned size,
3845 unsigned struct_size,
3846 enum pipe_format elem_format,
3847 bool is_rt, bool render_cache_rw,
3848 struct ilo_view_surface *surf)
3850 const int elem_size = util_format_get_blocksize(elem_format);
3851 int width, height, depth, pitch;
3852 int surface_format, num_entries;
3855 ILO_GPE_VALID_GEN(dev, 6, 6);
3858 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3859 * structure in a buffer.
3862 surface_format = ilo_translate_color_format(elem_format);
3864 num_entries = size / struct_size;
3865 /* see if there is enough space to fit another element */
3866 if (size % struct_size >= elem_size)
3870 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3872 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3873 * Address) specifies the base address of first element of the
3874 * surface. The surface is interpreted as a simple array of that
3875 * single element type. The address must be naturally-aligned to the
3876 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3877 * must be 16-byte aligned).
3879 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3880 * the base address of the first element of the surface, computed in
3881 * software by adding the surface base address to the byte offset of
3882 * the element in the buffer."
3885 assert(offset % elem_size == 0);
3888 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3890 * "For buffer surfaces, the number of entries in the buffer ranges
3893 assert(num_entries >= 1 && num_entries <= 1 << 27);
3896 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3898 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
3899 * indicates the size of the structure."
3901 pitch = struct_size;
3906 width = (num_entries & 0x0000007f);
3908 height = (num_entries & 0x000fff80) >> 7;
3910 depth = (num_entries & 0x07f00000) >> 20;
3912 STATIC_ASSERT(Elements(surf->payload) >= 6);
3915 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
3916 surface_format << BRW_SURFACE_FORMAT_SHIFT;
3917 if (render_cache_rw)
3918 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
3922 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
3923 width << BRW_SURFACE_WIDTH_SHIFT;
3925 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
3926 pitch << BRW_SURFACE_PITCH_SHIFT;
3931 /* do not increment reference count */
3936 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
3937 const struct ilo_texture *tex,
3938 enum pipe_format format,
3939 unsigned first_level,
3940 unsigned num_levels,
3941 unsigned first_layer,
3942 unsigned num_layers,
3943 bool is_rt, bool render_cache_rw,
3944 struct ilo_view_surface *surf)
3946 int surface_type, surface_format;
3947 int width, height, depth, pitch, lod;
3948 unsigned layer_offset, x_offset, y_offset;
3951 ILO_GPE_VALID_GEN(dev, 6, 6);
3953 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
3954 assert(surface_type != BRW_SURFACE_BUFFER);
3956 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
3957 format = PIPE_FORMAT_Z32_FLOAT;
3960 surface_format = ilo_translate_render_format(format);
3962 surface_format = ilo_translate_texture_format(format);
3963 assert(surface_format >= 0);
3965 width = tex->base.width0;
3966 height = tex->base.height0;
3967 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
3968 tex->base.depth0 : num_layers;
3969 pitch = tex->bo_stride;
3971 if (surface_type == BRW_SURFACE_CUBE) {
3973 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3975 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
3976 * range of this field (Depth) is [0,84], indicating the number of
3977 * cube array elements (equal to the number of underlying 2D array
3978 * elements divided by 6). For other surfaces, this field must be
3981 * When is_rt is true, we treat the texture as a 2D one to avoid the
3985 surface_type = BRW_SURFACE_2D;
3988 assert(num_layers % 6 == 0);
3989 depth = num_layers / 6;
3993 /* sanity check the size */
3994 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
3995 switch (surface_type) {
3996 case BRW_SURFACE_1D:
3997 assert(width <= 8192 && height == 1 && depth <= 512);
3998 assert(first_layer < 512 && num_layers <= 512);
4000 case BRW_SURFACE_2D:
4001 assert(width <= 8192 && height <= 8192 && depth <= 512);
4002 assert(first_layer < 512 && num_layers <= 512);
4004 case BRW_SURFACE_3D:
4005 assert(width <= 2048 && height <= 2048 && depth <= 2048);
4006 assert(first_layer < 2048 && num_layers <= 512);
4008 assert(first_layer == 0);
4010 case BRW_SURFACE_CUBE:
4011 assert(width <= 8192 && height <= 8192 && depth <= 85);
4012 assert(width == height);
4013 assert(first_layer < 512 && num_layers <= 512);
4015 assert(first_layer == 0);
4018 assert(!"unexpected surface type");
4022 /* non-full array spacing is supported only on GEN7+ */
4023 assert(tex->array_spacing_full);
4024 /* non-interleaved samples are supported only on GEN7+ */
4025 if (tex->base.nr_samples > 1)
4026 assert(tex->interleaved);
4030 * Compute the offset to the layer manually.
4032 * For rendering, the hardware requires LOD to be the same for all
4033 * render targets and the depth buffer. We need to compute the offset
4034 * to the layer manually and always set LOD to 0.
4037 /* we lose the capability for layered rendering */
4038 assert(num_layers == 1);
4040 layer_offset = ilo_texture_get_slice_offset(tex,
4041 first_level, first_layer, &x_offset, &y_offset);
4043 assert(x_offset % 4 == 0);
4044 assert(y_offset % 2 == 0);
4048 /* derive the size for the LOD */
4049 width = u_minify(width, first_level);
4050 height = u_minify(height, first_level);
4051 if (surface_type == BRW_SURFACE_3D)
4052 depth = u_minify(depth, first_level);
4066 assert(num_levels == 1);
4074 lod = num_levels - 1;
4078 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
4080 * "Linear render target surface base addresses must be element-size
4081 * aligned, for non-YUV surface formats, or a multiple of 2
4082 * element-sizes for YUV surface formats. Other linear surfaces have
4083 * no alignment requirements (byte alignment is sufficient.)"
4085 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4087 * "For linear render target surfaces, the pitch must be a multiple
4088 * of the element size for non-YUV surface formats. Pitch must be a
4089 * multiple of 2 * element size for YUV surface formats."
4091 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
4093 * "For linear surfaces, this field (X Offset) must be zero"
4095 if (tex->tiling == INTEL_TILING_NONE) {
4097 const int elem_size = util_format_get_blocksize(format);
4098 assert(layer_offset % elem_size == 0);
4099 assert(pitch % elem_size == 0);
4105 STATIC_ASSERT(Elements(surf->payload) >= 6);
4108 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
4109 surface_format << BRW_SURFACE_FORMAT_SHIFT |
4110 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
4112 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
4114 BRW_SURFACE_CUBEFACE_ENABLES;
4117 if (render_cache_rw)
4118 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
4120 dw[1] = layer_offset;
4122 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
4123 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
4124 lod << BRW_SURFACE_LOD_SHIFT;
4126 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
4127 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
4128 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
4130 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
4132 (num_layers - 1) << 8 |
4133 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
4134 BRW_SURFACE_MULTISAMPLECOUNT_1);
4136 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
4137 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
4139 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
4141 /* do not increment reference count */
4146 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
4147 const struct ilo_view_surface *surf,
4151 const int state_align = 32 / 4;
4152 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
4153 uint32_t state_offset;
4154 uint32_t read_domains, write_domain;
4156 ILO_GPE_VALID_GEN(dev, 6, 7);
4159 read_domains = INTEL_DOMAIN_RENDER;
4160 write_domain = INTEL_DOMAIN_RENDER;
4163 read_domains = INTEL_DOMAIN_SAMPLER;
4167 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
4169 STATIC_ASSERT(Elements(surf->payload) >= 8);
4171 ilo_cp_write(cp, surf->payload[0]);
4172 ilo_cp_write_bo(cp, surf->payload[1],
4173 surf->bo, read_domains, write_domain);
4174 ilo_cp_write(cp, surf->payload[2]);
4175 ilo_cp_write(cp, surf->payload[3]);
4176 ilo_cp_write(cp, surf->payload[4]);
4177 ilo_cp_write(cp, surf->payload[5]);
4179 if (dev->gen >= ILO_GEN(7)) {
4180 ilo_cp_write(cp, surf->payload[6]);
4181 ilo_cp_write(cp, surf->payload[7]);
4186 return state_offset;
4190 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
4191 const struct pipe_stream_output_target *so,
4192 const struct pipe_stream_output_info *so_info,
4196 struct ilo_buffer *buf = ilo_buffer(so->buffer);
4197 unsigned bo_offset, struct_size;
4198 enum pipe_format elem_format;
4199 struct ilo_view_surface surf;
4201 ILO_GPE_VALID_GEN(dev, 6, 6);
4203 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
4204 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
4206 switch (so_info->output[so_index].num_components) {
4208 elem_format = PIPE_FORMAT_R32_FLOAT;
4211 elem_format = PIPE_FORMAT_R32G32_FLOAT;
4214 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
4217 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
4220 assert(!"unexpected SO components length");
4221 elem_format = PIPE_FORMAT_R32_FLOAT;
4225 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
4226 struct_size, elem_format, false, true, &surf);
4228 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
4232 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
4233 const union pipe_color_union *color,
4234 uint32_t *dw, int num_dwords)
4237 color->f[0], color->f[1], color->f[2], color->f[3],
4240 ILO_GPE_VALID_GEN(dev, 6, 6);
4242 assert(num_dwords >= 12);
4245 * This state is not documented in the Sandy Bridge PRM, but in the
4246 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4250 dw[1] = fui(rgba[0]);
4251 dw[2] = fui(rgba[1]);
4252 dw[3] = fui(rgba[2]);
4253 dw[4] = fui(rgba[3]);
4256 dw[5] = util_float_to_half(rgba[0]) |
4257 util_float_to_half(rgba[1]) << 16;
4258 dw[6] = util_float_to_half(rgba[2]) |
4259 util_float_to_half(rgba[3]) << 16;
4261 /* clamp to [-1.0f, 1.0f] */
4262 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
4263 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
4264 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
4265 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
4268 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
4269 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
4270 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
4271 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
4274 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
4275 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
4276 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
4277 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
4279 /* clamp to [0.0f, 1.0f] */
4280 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
4281 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
4282 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
4283 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
4286 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
4287 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
4288 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
4289 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
4292 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
4293 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
4294 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
4295 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
4299 ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
4300 const struct pipe_sampler_state *state,
4301 struct ilo_sampler_cso *sampler)
4303 int mip_filter, min_filter, mag_filter, max_aniso;
4304 int lod_bias, max_lod, min_lod;
4305 int wrap_s, wrap_t, wrap_r, wrap_cube;
4306 bool clamp_is_to_edge;
4307 uint32_t dw0, dw1, dw3;
4309 ILO_GPE_VALID_GEN(dev, 6, 7);
4311 memset(sampler, 0, sizeof(*sampler));
4313 mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
4314 min_filter = gen6_translate_tex_filter(state->min_img_filter);
4315 mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
4317 sampler->anisotropic = state->max_anisotropy;
4319 if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
4320 max_aniso = state->max_anisotropy / 2 - 1;
4321 else if (state->max_anisotropy > 16)
4322 max_aniso = BRW_ANISORATIO_16;
4324 max_aniso = BRW_ANISORATIO_2;
4328 * Here is how the hardware calculate per-pixel LOD, from my reading of the
4331 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4332 * other ways. The number of texels is measured using level
4334 * 2) Bias is added to LOD.
4335 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4336 * compared with Base to determine whether magnification or
4337 * minification is needed. (if preclamp is disabled, LOD is compared
4338 * with Base before clamping)
4339 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4340 * set to floor(MinLod).
4341 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4343 * With Gallium interface, Base is always zero and
4344 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
4346 if (dev->gen >= ILO_GEN(7)) {
4347 const float scale = 256.0f;
4349 /* [-16.0, 16.0) in S4.8 */
4351 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4354 /* [0.0, 14.0] in U4.8 */
4355 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
4356 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
4359 const float scale = 64.0f;
4361 /* [-16.0, 16.0) in S4.6 */
4363 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4366 /* [0.0, 13.0] in U4.6 */
4367 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
4368 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
4372 * We want LOD to be clamped to determine magnification/minification, and
4373 * get set to zero when it is magnification or when mipmapping is disabled.
4374 * The hardware would set LOD to floor(MinLod) and that is a problem when
4375 * MinLod is greater than or equal to 1.0f.
4377 * With Base being zero, it is always minification when MinLod is non-zero.
4378 * To achieve our goal, we just need to set MinLod to zero and set
4379 * MagFilter to MinFilter when mipmapping is disabled.
4381 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
4383 mag_filter = min_filter;
4387 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4388 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
4389 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
4390 * texture coordinates to [0.0, 1.0].
4392 * The clamping will be taken care of in the shaders. There are two
4393 * filters here, but let the minification one has a say.
4395 clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
4396 if (!clamp_is_to_edge) {
4397 sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
4398 sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
4399 sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
4402 /* determine wrap s/t/r */
4403 wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
4404 wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
4405 wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
4408 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4410 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
4411 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
4412 * must have the same Address Control mode."
4414 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4416 * "This field (Cube Surface Control Mode) must be set to
4417 * CUBECTRLMODE_PROGRAMMED"
4419 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
4422 if (state->seamless_cube_map &&
4423 (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
4424 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
4425 wrap_cube = BRW_TEXCOORDMODE_CUBE;
4428 wrap_cube = BRW_TEXCOORDMODE_CLAMP;
4431 if (!state->normalized_coords) {
4433 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4435 * "The following state must be set as indicated if this field
4436 * (Non-normalized Coordinate Enable) is enabled:
4438 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4439 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4440 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4441 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4443 * - Min Mode Filter must be MAPFILTER_NEAREST or
4445 * - Mip Mode Filter must be MIPFILTER_NONE.
4446 * - Min LOD must be 0.
4447 * - Max LOD must be 0.
4448 * - MIP Count must be 0.
4449 * - Surface Min LOD must be 0.
4450 * - Texture LOD Bias must be 0."
4452 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
4453 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
4454 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
4455 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
4456 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
4457 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
4459 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
4460 mag_filter == BRW_MAPFILTER_LINEAR);
4461 assert(min_filter == BRW_MAPFILTER_NEAREST ||
4462 min_filter == BRW_MAPFILTER_LINEAR);
4464 /* work around a bug in util_blitter */
4465 mip_filter = BRW_MIPFILTER_NONE;
4467 assert(mip_filter == BRW_MIPFILTER_NONE);
4470 if (dev->gen >= ILO_GEN(7)) {
4475 sampler->dw_filter = mag_filter << 17 |
4478 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4479 BRW_MAPFILTER_ANISOTROPIC << 14 |
4482 dw1 = min_lod << 20 |
4485 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4486 dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
4488 dw3 = max_aniso << 19;
4490 /* round the coordinates for linear filtering */
4491 if (min_filter != BRW_MAPFILTER_NEAREST) {
4492 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4493 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4494 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4496 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4497 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4498 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4499 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4502 if (!state->normalized_coords)
4505 sampler->dw_wrap = wrap_s << 6 |
4510 * As noted in the classic i965 driver, the HW may still reference
4511 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
4514 sampler->dw_wrap_1d = wrap_s << 6 |
4515 BRW_TEXCOORDMODE_WRAP << 3 |
4516 BRW_TEXCOORDMODE_WRAP;
4518 sampler->dw_wrap_cube = wrap_cube << 6 |
4522 STATIC_ASSERT(Elements(sampler->payload) >= 7);
4524 sampler->payload[0] = dw0;
4525 sampler->payload[1] = dw1;
4526 sampler->payload[2] = dw3;
4528 memcpy(&sampler->payload[3],
4529 state->border_color.ui, sizeof(state->border_color.ui));
4536 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4537 dw0 |= gen6_translate_shadow_func(state->compare_func);
4539 sampler->dw_filter = (min_filter != mag_filter) << 27 |
4543 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4544 BRW_MAPFILTER_ANISOTROPIC << 14;
4546 dw1 = min_lod << 22 |
4549 sampler->dw_wrap = wrap_s << 6 |
4553 sampler->dw_wrap_1d = wrap_s << 6 |
4554 BRW_TEXCOORDMODE_WRAP << 3 |
4555 BRW_TEXCOORDMODE_WRAP;
4557 sampler->dw_wrap_cube = wrap_cube << 6 |
4561 dw3 = max_aniso << 19;
4563 /* round the coordinates for linear filtering */
4564 if (min_filter != BRW_MAPFILTER_NEAREST) {
4565 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4566 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4567 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4569 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4570 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4571 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4572 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4575 if (!state->normalized_coords)
4578 STATIC_ASSERT(Elements(sampler->payload) >= 15);
4580 sampler->payload[0] = dw0;
4581 sampler->payload[1] = dw1;
4582 sampler->payload[2] = dw3;
4584 sampler_init_border_color_gen6(dev,
4585 &state->border_color, &sampler->payload[3], 12);
4590 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
4591 const struct ilo_sampler_cso * const *samplers,
4592 const struct pipe_sampler_view * const *views,
4593 const uint32_t *sampler_border_colors,
4597 const int state_align = 32 / 4;
4598 const int state_len = 4 * num_samplers;
4599 uint32_t state_offset, *dw;
4602 ILO_GPE_VALID_GEN(dev, 6, 7);
4605 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
4607 * "The sampler state is stored as an array of up to 16 elements..."
4609 assert(num_samplers <= 16);
4614 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
4615 state_len, state_align, &state_offset);
4617 for (i = 0; i < num_samplers; i++) {
4618 const struct ilo_sampler_cso *sampler = samplers[i];
4619 const struct pipe_sampler_view *view = views[i];
4620 const uint32_t border_color = sampler_border_colors[i];
4621 uint32_t dw_filter, dw_wrap;
4623 /* there may be holes */
4624 if (!sampler || !view) {
4625 /* disabled sampler */
4635 /* determine filter and wrap modes */
4636 switch (view->texture->target) {
4637 case PIPE_TEXTURE_1D:
4638 dw_filter = (sampler->anisotropic) ?
4639 sampler->dw_filter_aniso : sampler->dw_filter;
4640 dw_wrap = sampler->dw_wrap_1d;
4642 case PIPE_TEXTURE_3D:
4644 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4646 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4647 * surfaces of type SURFTYPE_3D."
4649 dw_filter = sampler->dw_filter;
4650 dw_wrap = sampler->dw_wrap;
4652 case PIPE_TEXTURE_CUBE:
4653 dw_filter = (sampler->anisotropic) ?
4654 sampler->dw_filter_aniso : sampler->dw_filter;
4655 dw_wrap = sampler->dw_wrap_cube;
4658 dw_filter = (sampler->anisotropic) ?
4659 sampler->dw_filter_aniso : sampler->dw_filter;
4660 dw_wrap = sampler->dw_wrap;
4664 dw[0] = sampler->payload[0];
4665 dw[1] = sampler->payload[1];
4666 assert(!(border_color & 0x1f));
4667 dw[2] = border_color;
4668 dw[3] = sampler->payload[2];
4672 if (dev->gen >= ILO_GEN(7)) {
4677 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4679 * "[DevSNB] Errata: Incorrect behavior is observed in cases
4680 * where the min and mag mode filters are different and
4681 * SurfMinLOD is nonzero. The determination of MagMode uses the
4682 * following equation instead of the one in the above
4683 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
4685 * As a way to work around that, we set Base to
4686 * view->u.tex.first_level.
4688 dw[0] |= view->u.tex.first_level << 22;
4696 return state_offset;
4700 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
4701 const struct ilo_sampler_cso *sampler,
4704 const int state_align = 32 / 4;
4705 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
4706 uint32_t state_offset, *dw;
4708 ILO_GPE_VALID_GEN(dev, 6, 7);
4710 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
4711 state_len, state_align, &state_offset);
4713 memcpy(dw, &sampler->payload[3], state_len * 4);
4715 return state_offset;
4719 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
4720 int size, void **pcb,
4724 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4725 * to 32 bytes, and their sizes are specified in 256-bit units.
4727 const int state_align = 32 / 4;
4728 const int state_len = align(size, 32) / 4;
4729 uint32_t state_offset;
4732 ILO_GPE_VALID_GEN(dev, 6, 7);
4734 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
4735 state_len, state_align, &state_offset);
4737 /* zero out the unused range */
4738 if (size < state_len * 4)
4739 memset(&buf[size], 0, state_len * 4 - size);
4744 return state_offset;
4748 gen6_estimate_command_size(const struct ilo_dev_info *dev,
4749 enum ilo_gpe_gen6_command cmd,
4752 static const struct {
4755 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
4756 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
4757 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
4758 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
4759 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
4760 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
4761 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
4762 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
4763 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
4764 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
4765 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
4766 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
4767 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
4768 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
4769 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
4770 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
4771 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
4772 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
4773 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
4774 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
4775 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
4776 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
4777 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
4778 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
4779 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
4780 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
4781 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
4782 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
4783 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
4784 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
4785 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
4786 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
4787 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
4788 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
4789 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
4790 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
4791 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
4792 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
4793 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
4794 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
4795 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
4796 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
4798 const int header = gen6_command_size_table[cmd].header;
4799 const int body = gen6_command_size_table[arg].body;
4800 const int count = arg;
4802 ILO_GPE_VALID_GEN(dev, 6, 6);
4803 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
4805 return (likely(count)) ? header + body * count : 0;
4809 gen6_estimate_state_size(const struct ilo_dev_info *dev,
4810 enum ilo_gpe_gen6_state state,
4813 static const struct {
4817 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
4818 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
4819 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
4820 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
4821 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
4822 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
4823 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
4824 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
4825 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
4826 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
4827 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
4828 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
4829 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
4830 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
4832 const int alignment = gen6_state_size_table[state].alignment;
4833 const int body = gen6_state_size_table[state].body;
4834 const bool is_array = gen6_state_size_table[state].is_array;
4835 const int count = arg;
4838 ILO_GPE_VALID_GEN(dev, 6, 6);
4839 assert(state < ILO_GPE_GEN6_STATE_COUNT);
4841 if (likely(count)) {
4843 estimate = (alignment - 1) + body * count;
4846 estimate = (alignment - 1) + body;
4847 /* all states are aligned */
4849 estimate += util_align_npot(body, alignment) * (count - 1);
4859 static const struct ilo_gpe_gen6 gen6_gpe = {
4860 .estimate_command_size = gen6_estimate_command_size,
4861 .estimate_state_size = gen6_estimate_state_size,
4863 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4864 GEN6_SET(STATE_BASE_ADDRESS),
4865 GEN6_SET(STATE_SIP),
4866 GEN6_SET(3DSTATE_VF_STATISTICS),
4867 GEN6_SET(PIPELINE_SELECT),
4868 GEN6_SET(MEDIA_VFE_STATE),
4869 GEN6_SET(MEDIA_CURBE_LOAD),
4870 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
4871 GEN6_SET(MEDIA_GATEWAY_STATE),
4872 GEN6_SET(MEDIA_STATE_FLUSH),
4873 GEN6_SET(MEDIA_OBJECT_WALKER),
4874 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
4875 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
4876 GEN6_SET(3DSTATE_URB),
4877 GEN6_SET(3DSTATE_VERTEX_BUFFERS),
4878 GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
4879 GEN6_SET(3DSTATE_INDEX_BUFFER),
4880 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
4881 GEN6_SET(3DSTATE_CC_STATE_POINTERS),
4882 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
4883 GEN6_SET(3DSTATE_VS),
4884 GEN6_SET(3DSTATE_GS),
4885 GEN6_SET(3DSTATE_CLIP),
4886 GEN6_SET(3DSTATE_SF),
4887 GEN6_SET(3DSTATE_WM),
4888 GEN6_SET(3DSTATE_CONSTANT_VS),
4889 GEN6_SET(3DSTATE_CONSTANT_GS),
4890 GEN6_SET(3DSTATE_CONSTANT_PS),
4891 GEN6_SET(3DSTATE_SAMPLE_MASK),
4892 GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
4893 GEN6_SET(3DSTATE_DEPTH_BUFFER),
4894 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
4895 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
4896 GEN6_SET(3DSTATE_LINE_STIPPLE),
4897 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
4898 GEN6_SET(3DSTATE_GS_SVB_INDEX),
4899 GEN6_SET(3DSTATE_MULTISAMPLE),
4900 GEN6_SET(3DSTATE_STENCIL_BUFFER),
4901 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
4902 GEN6_SET(3DSTATE_CLEAR_PARAMS),
4903 GEN6_SET(PIPE_CONTROL),
4904 GEN6_SET(3DPRIMITIVE),
4905 GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
4906 GEN6_SET(SF_VIEWPORT),
4907 GEN6_SET(CLIP_VIEWPORT),
4908 GEN6_SET(CC_VIEWPORT),
4909 GEN6_SET(COLOR_CALC_STATE),
4910 GEN6_SET(BLEND_STATE),
4911 GEN6_SET(DEPTH_STENCIL_STATE),
4912 GEN6_SET(SCISSOR_RECT),
4913 GEN6_SET(BINDING_TABLE_STATE),
4914 GEN6_SET(SURFACE_STATE),
4915 GEN6_SET(so_SURFACE_STATE),
4916 GEN6_SET(SAMPLER_STATE),
4917 GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
4918 GEN6_SET(push_constant_buffer),
4922 const struct ilo_gpe_gen6 *
4923 ilo_gpe_gen6_get(void)