OSDN Git Service

intel: Drop program size pointer from vec4/fs assembly getters.
[android-x86/external-mesa.git] / src / intel / compiler / brw_shader.cpp
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include "brw_cfg.h"
25 #include "brw_eu.h"
26 #include "brw_fs.h"
27 #include "brw_nir.h"
28 #include "brw_vec4_tes.h"
29 #include "common/gen_debug.h"
30 #include "main/uniforms.h"
31 #include "util/macros.h"
32
33 enum brw_reg_type
34 brw_type_for_base_type(const struct glsl_type *type)
35 {
36    switch (type->base_type) {
37    case GLSL_TYPE_FLOAT16:
38       return BRW_REGISTER_TYPE_HF;
39    case GLSL_TYPE_FLOAT:
40       return BRW_REGISTER_TYPE_F;
41    case GLSL_TYPE_INT:
42    case GLSL_TYPE_BOOL:
43    case GLSL_TYPE_SUBROUTINE:
44       return BRW_REGISTER_TYPE_D;
45    case GLSL_TYPE_INT16:
46       return BRW_REGISTER_TYPE_W;
47    case GLSL_TYPE_UINT:
48       return BRW_REGISTER_TYPE_UD;
49    case GLSL_TYPE_UINT16:
50       return BRW_REGISTER_TYPE_UW;
51    case GLSL_TYPE_ARRAY:
52       return brw_type_for_base_type(type->fields.array);
53    case GLSL_TYPE_STRUCT:
54    case GLSL_TYPE_SAMPLER:
55    case GLSL_TYPE_ATOMIC_UINT:
56       /* These should be overridden with the type of the member when
57        * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
58        * way to trip up if we don't.
59        */
60       return BRW_REGISTER_TYPE_UD;
61    case GLSL_TYPE_IMAGE:
62       return BRW_REGISTER_TYPE_UD;
63    case GLSL_TYPE_DOUBLE:
64       return BRW_REGISTER_TYPE_DF;
65    case GLSL_TYPE_UINT64:
66       return BRW_REGISTER_TYPE_UQ;
67    case GLSL_TYPE_INT64:
68       return BRW_REGISTER_TYPE_Q;
69    case GLSL_TYPE_VOID:
70    case GLSL_TYPE_ERROR:
71    case GLSL_TYPE_INTERFACE:
72    case GLSL_TYPE_FUNCTION:
73       unreachable("not reached");
74    }
75
76    return BRW_REGISTER_TYPE_F;
77 }
78
79 enum brw_conditional_mod
80 brw_conditional_for_comparison(unsigned int op)
81 {
82    switch (op) {
83    case ir_binop_less:
84       return BRW_CONDITIONAL_L;
85    case ir_binop_gequal:
86       return BRW_CONDITIONAL_GE;
87    case ir_binop_equal:
88    case ir_binop_all_equal: /* same as equal for scalars */
89       return BRW_CONDITIONAL_Z;
90    case ir_binop_nequal:
91    case ir_binop_any_nequal: /* same as nequal for scalars */
92       return BRW_CONDITIONAL_NZ;
93    default:
94       unreachable("not reached: bad operation for comparison");
95    }
96 }
97
98 uint32_t
99 brw_math_function(enum opcode op)
100 {
101    switch (op) {
102    case SHADER_OPCODE_RCP:
103       return BRW_MATH_FUNCTION_INV;
104    case SHADER_OPCODE_RSQ:
105       return BRW_MATH_FUNCTION_RSQ;
106    case SHADER_OPCODE_SQRT:
107       return BRW_MATH_FUNCTION_SQRT;
108    case SHADER_OPCODE_EXP2:
109       return BRW_MATH_FUNCTION_EXP;
110    case SHADER_OPCODE_LOG2:
111       return BRW_MATH_FUNCTION_LOG;
112    case SHADER_OPCODE_POW:
113       return BRW_MATH_FUNCTION_POW;
114    case SHADER_OPCODE_SIN:
115       return BRW_MATH_FUNCTION_SIN;
116    case SHADER_OPCODE_COS:
117       return BRW_MATH_FUNCTION_COS;
118    case SHADER_OPCODE_INT_QUOTIENT:
119       return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
120    case SHADER_OPCODE_INT_REMAINDER:
121       return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
122    default:
123       unreachable("not reached: unknown math function");
124    }
125 }
126
127 bool
128 brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits)
129 {
130    if (!offsets) return false;  /* nonconstant offset; caller will handle it. */
131
132    /* offset out of bounds; caller will handle it. */
133    for (unsigned i = 0; i < num_components; i++)
134       if (offsets[i] > 7 || offsets[i] < -8)
135          return false;
136
137    /* Combine all three offsets into a single unsigned dword:
138     *
139     *    bits 11:8 - U Offset (X component)
140     *    bits  7:4 - V Offset (Y component)
141     *    bits  3:0 - R Offset (Z component)
142     */
143    *offset_bits = 0;
144    for (unsigned i = 0; i < num_components; i++) {
145       const unsigned shift = 4 * (2 - i);
146       *offset_bits |= (offsets[i] << shift) & (0xF << shift);
147    }
148    return true;
149 }
150
151 const char *
152 brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
153 {
154    switch (op) {
155    case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP:
156       /* The DO instruction doesn't exist on Gen6+, but we use it to mark the
157        * start of a loop in the IR.
158        */
159       if (devinfo->gen >= 6 && op == BRW_OPCODE_DO)
160          return "do";
161
162       /* The following conversion opcodes doesn't exist on Gen8+, but we use
163        * then to mark that we want to do the conversion.
164        */
165       if (devinfo->gen > 7 && op == BRW_OPCODE_F32TO16)
166          return "f32to16";
167
168       if (devinfo->gen > 7 && op == BRW_OPCODE_F16TO32)
169          return "f16to32";
170
171       assert(brw_opcode_desc(devinfo, op)->name);
172       return brw_opcode_desc(devinfo, op)->name;
173    case FS_OPCODE_FB_WRITE:
174       return "fb_write";
175    case FS_OPCODE_FB_WRITE_LOGICAL:
176       return "fb_write_logical";
177    case FS_OPCODE_REP_FB_WRITE:
178       return "rep_fb_write";
179    case FS_OPCODE_FB_READ:
180       return "fb_read";
181    case FS_OPCODE_FB_READ_LOGICAL:
182       return "fb_read_logical";
183
184    case SHADER_OPCODE_RCP:
185       return "rcp";
186    case SHADER_OPCODE_RSQ:
187       return "rsq";
188    case SHADER_OPCODE_SQRT:
189       return "sqrt";
190    case SHADER_OPCODE_EXP2:
191       return "exp2";
192    case SHADER_OPCODE_LOG2:
193       return "log2";
194    case SHADER_OPCODE_POW:
195       return "pow";
196    case SHADER_OPCODE_INT_QUOTIENT:
197       return "int_quot";
198    case SHADER_OPCODE_INT_REMAINDER:
199       return "int_rem";
200    case SHADER_OPCODE_SIN:
201       return "sin";
202    case SHADER_OPCODE_COS:
203       return "cos";
204
205    case SHADER_OPCODE_TEX:
206       return "tex";
207    case SHADER_OPCODE_TEX_LOGICAL:
208       return "tex_logical";
209    case SHADER_OPCODE_TXD:
210       return "txd";
211    case SHADER_OPCODE_TXD_LOGICAL:
212       return "txd_logical";
213    case SHADER_OPCODE_TXF:
214       return "txf";
215    case SHADER_OPCODE_TXF_LOGICAL:
216       return "txf_logical";
217    case SHADER_OPCODE_TXF_LZ:
218       return "txf_lz";
219    case SHADER_OPCODE_TXL:
220       return "txl";
221    case SHADER_OPCODE_TXL_LOGICAL:
222       return "txl_logical";
223    case SHADER_OPCODE_TXL_LZ:
224       return "txl_lz";
225    case SHADER_OPCODE_TXS:
226       return "txs";
227    case SHADER_OPCODE_TXS_LOGICAL:
228       return "txs_logical";
229    case FS_OPCODE_TXB:
230       return "txb";
231    case FS_OPCODE_TXB_LOGICAL:
232       return "txb_logical";
233    case SHADER_OPCODE_TXF_CMS:
234       return "txf_cms";
235    case SHADER_OPCODE_TXF_CMS_LOGICAL:
236       return "txf_cms_logical";
237    case SHADER_OPCODE_TXF_CMS_W:
238       return "txf_cms_w";
239    case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
240       return "txf_cms_w_logical";
241    case SHADER_OPCODE_TXF_UMS:
242       return "txf_ums";
243    case SHADER_OPCODE_TXF_UMS_LOGICAL:
244       return "txf_ums_logical";
245    case SHADER_OPCODE_TXF_MCS:
246       return "txf_mcs";
247    case SHADER_OPCODE_TXF_MCS_LOGICAL:
248       return "txf_mcs_logical";
249    case SHADER_OPCODE_LOD:
250       return "lod";
251    case SHADER_OPCODE_LOD_LOGICAL:
252       return "lod_logical";
253    case SHADER_OPCODE_TG4:
254       return "tg4";
255    case SHADER_OPCODE_TG4_LOGICAL:
256       return "tg4_logical";
257    case SHADER_OPCODE_TG4_OFFSET:
258       return "tg4_offset";
259    case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
260       return "tg4_offset_logical";
261    case SHADER_OPCODE_SAMPLEINFO:
262       return "sampleinfo";
263    case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
264       return "sampleinfo_logical";
265
266    case SHADER_OPCODE_SHADER_TIME_ADD:
267       return "shader_time_add";
268
269    case SHADER_OPCODE_UNTYPED_ATOMIC:
270       return "untyped_atomic";
271    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
272       return "untyped_atomic_logical";
273    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
274       return "untyped_surface_read";
275    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
276       return "untyped_surface_read_logical";
277    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
278       return "untyped_surface_write";
279    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
280       return "untyped_surface_write_logical";
281    case SHADER_OPCODE_TYPED_ATOMIC:
282       return "typed_atomic";
283    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
284       return "typed_atomic_logical";
285    case SHADER_OPCODE_TYPED_SURFACE_READ:
286       return "typed_surface_read";
287    case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
288       return "typed_surface_read_logical";
289    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
290       return "typed_surface_write";
291    case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
292       return "typed_surface_write_logical";
293    case SHADER_OPCODE_MEMORY_FENCE:
294       return "memory_fence";
295
296    case SHADER_OPCODE_BYTE_SCATTERED_READ:
297       return "byte_scattered_read";
298    case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
299       return "byte_scattered_read_logical";
300    case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
301       return "byte_scattered_write";
302    case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
303       return "byte_scattered_write_logical";
304
305    case SHADER_OPCODE_LOAD_PAYLOAD:
306       return "load_payload";
307    case FS_OPCODE_PACK:
308       return "pack";
309
310    case SHADER_OPCODE_GEN4_SCRATCH_READ:
311       return "gen4_scratch_read";
312    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
313       return "gen4_scratch_write";
314    case SHADER_OPCODE_GEN7_SCRATCH_READ:
315       return "gen7_scratch_read";
316    case SHADER_OPCODE_URB_WRITE_SIMD8:
317       return "gen8_urb_write_simd8";
318    case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
319       return "gen8_urb_write_simd8_per_slot";
320    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
321       return "gen8_urb_write_simd8_masked";
322    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
323       return "gen8_urb_write_simd8_masked_per_slot";
324    case SHADER_OPCODE_URB_READ_SIMD8:
325       return "urb_read_simd8";
326    case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
327       return "urb_read_simd8_per_slot";
328
329    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
330       return "find_live_channel";
331    case SHADER_OPCODE_BROADCAST:
332       return "broadcast";
333
334    case SHADER_OPCODE_GET_BUFFER_SIZE:
335       return "get_buffer_size";
336
337    case VEC4_OPCODE_MOV_BYTES:
338       return "mov_bytes";
339    case VEC4_OPCODE_PACK_BYTES:
340       return "pack_bytes";
341    case VEC4_OPCODE_UNPACK_UNIFORM:
342       return "unpack_uniform";
343    case VEC4_OPCODE_DOUBLE_TO_F32:
344       return "double_to_f32";
345    case VEC4_OPCODE_DOUBLE_TO_D32:
346       return "double_to_d32";
347    case VEC4_OPCODE_DOUBLE_TO_U32:
348       return "double_to_u32";
349    case VEC4_OPCODE_TO_DOUBLE:
350       return "single_to_double";
351    case VEC4_OPCODE_PICK_LOW_32BIT:
352       return "pick_low_32bit";
353    case VEC4_OPCODE_PICK_HIGH_32BIT:
354       return "pick_high_32bit";
355    case VEC4_OPCODE_SET_LOW_32BIT:
356       return "set_low_32bit";
357    case VEC4_OPCODE_SET_HIGH_32BIT:
358       return "set_high_32bit";
359
360    case FS_OPCODE_DDX_COARSE:
361       return "ddx_coarse";
362    case FS_OPCODE_DDX_FINE:
363       return "ddx_fine";
364    case FS_OPCODE_DDY_COARSE:
365       return "ddy_coarse";
366    case FS_OPCODE_DDY_FINE:
367       return "ddy_fine";
368
369    case FS_OPCODE_CINTERP:
370       return "cinterp";
371    case FS_OPCODE_LINTERP:
372       return "linterp";
373
374    case FS_OPCODE_PIXEL_X:
375       return "pixel_x";
376    case FS_OPCODE_PIXEL_Y:
377       return "pixel_y";
378
379    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
380       return "uniform_pull_const";
381    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
382       return "uniform_pull_const_gen7";
383    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
384       return "varying_pull_const_gen4";
385    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
386       return "varying_pull_const_gen7";
387    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
388       return "varying_pull_const_logical";
389
390    case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
391       return "mov_dispatch_to_flags";
392    case FS_OPCODE_DISCARD_JUMP:
393       return "discard_jump";
394
395    case FS_OPCODE_SET_SAMPLE_ID:
396       return "set_sample_id";
397
398    case FS_OPCODE_PACK_HALF_2x16_SPLIT:
399       return "pack_half_2x16_split";
400    case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
401       return "unpack_half_2x16_split_x";
402    case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
403       return "unpack_half_2x16_split_y";
404
405    case FS_OPCODE_PLACEHOLDER_HALT:
406       return "placeholder_halt";
407
408    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
409       return "interp_sample";
410    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
411       return "interp_shared_offset";
412    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
413       return "interp_per_slot_offset";
414
415    case VS_OPCODE_URB_WRITE:
416       return "vs_urb_write";
417    case VS_OPCODE_PULL_CONSTANT_LOAD:
418       return "pull_constant_load";
419    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
420       return "pull_constant_load_gen7";
421
422    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
423       return "set_simd4x2_header_gen9";
424
425    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
426       return "unpack_flags_simd4x2";
427
428    case GS_OPCODE_URB_WRITE:
429       return "gs_urb_write";
430    case GS_OPCODE_URB_WRITE_ALLOCATE:
431       return "gs_urb_write_allocate";
432    case GS_OPCODE_THREAD_END:
433       return "gs_thread_end";
434    case GS_OPCODE_SET_WRITE_OFFSET:
435       return "set_write_offset";
436    case GS_OPCODE_SET_VERTEX_COUNT:
437       return "set_vertex_count";
438    case GS_OPCODE_SET_DWORD_2:
439       return "set_dword_2";
440    case GS_OPCODE_PREPARE_CHANNEL_MASKS:
441       return "prepare_channel_masks";
442    case GS_OPCODE_SET_CHANNEL_MASKS:
443       return "set_channel_masks";
444    case GS_OPCODE_GET_INSTANCE_ID:
445       return "get_instance_id";
446    case GS_OPCODE_FF_SYNC:
447       return "ff_sync";
448    case GS_OPCODE_SET_PRIMITIVE_ID:
449       return "set_primitive_id";
450    case GS_OPCODE_SVB_WRITE:
451       return "gs_svb_write";
452    case GS_OPCODE_SVB_SET_DST_INDEX:
453       return "gs_svb_set_dst_index";
454    case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
455       return "gs_ff_sync_set_primitives";
456    case CS_OPCODE_CS_TERMINATE:
457       return "cs_terminate";
458    case SHADER_OPCODE_BARRIER:
459       return "barrier";
460    case SHADER_OPCODE_MULH:
461       return "mulh";
462    case SHADER_OPCODE_MOV_INDIRECT:
463       return "mov_indirect";
464
465    case VEC4_OPCODE_URB_READ:
466       return "urb_read";
467    case TCS_OPCODE_GET_INSTANCE_ID:
468       return "tcs_get_instance_id";
469    case TCS_OPCODE_URB_WRITE:
470       return "tcs_urb_write";
471    case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
472       return "tcs_set_input_urb_offsets";
473    case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
474       return "tcs_set_output_urb_offsets";
475    case TCS_OPCODE_GET_PRIMITIVE_ID:
476       return "tcs_get_primitive_id";
477    case TCS_OPCODE_CREATE_BARRIER_HEADER:
478       return "tcs_create_barrier_header";
479    case TCS_OPCODE_SRC0_010_IS_ZERO:
480       return "tcs_src0<0,1,0>_is_zero";
481    case TCS_OPCODE_RELEASE_INPUT:
482       return "tcs_release_input";
483    case TCS_OPCODE_THREAD_END:
484       return "tcs_thread_end";
485    case TES_OPCODE_CREATE_INPUT_READ_HEADER:
486       return "tes_create_input_read_header";
487    case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
488       return "tes_add_indirect_urb_offset";
489    case TES_OPCODE_GET_PRIMITIVE_ID:
490       return "tes_get_primitive_id";
491
492    case SHADER_OPCODE_RND_MODE:
493       return "rnd_mode";
494    }
495
496    unreachable("not reached");
497 }
498
499 bool
500 brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
501 {
502    union {
503       unsigned ud;
504       int d;
505       float f;
506       double df;
507    } imm, sat_imm = { 0 };
508
509    const unsigned size = type_sz(type);
510
511    /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
512     * irrelevant, so just check the size of the type and copy from/to an
513     * appropriately sized field.
514     */
515    if (size < 8)
516       imm.ud = reg->ud;
517    else
518       imm.df = reg->df;
519
520    switch (type) {
521    case BRW_REGISTER_TYPE_UD:
522    case BRW_REGISTER_TYPE_D:
523    case BRW_REGISTER_TYPE_UW:
524    case BRW_REGISTER_TYPE_W:
525    case BRW_REGISTER_TYPE_UQ:
526    case BRW_REGISTER_TYPE_Q:
527       /* Nothing to do. */
528       return false;
529    case BRW_REGISTER_TYPE_F:
530       sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
531       break;
532    case BRW_REGISTER_TYPE_DF:
533       sat_imm.df = CLAMP(imm.df, 0.0, 1.0);
534       break;
535    case BRW_REGISTER_TYPE_UB:
536    case BRW_REGISTER_TYPE_B:
537       unreachable("no UB/B immediates");
538    case BRW_REGISTER_TYPE_V:
539    case BRW_REGISTER_TYPE_UV:
540    case BRW_REGISTER_TYPE_VF:
541       unreachable("unimplemented: saturate vector immediate");
542    case BRW_REGISTER_TYPE_HF:
543       unreachable("unimplemented: saturate HF immediate");
544    case BRW_REGISTER_TYPE_NF:
545       unreachable("no NF immediates");
546    }
547
548    if (size < 8) {
549       if (imm.ud != sat_imm.ud) {
550          reg->ud = sat_imm.ud;
551          return true;
552       }
553    } else {
554       if (imm.df != sat_imm.df) {
555          reg->df = sat_imm.df;
556          return true;
557       }
558    }
559    return false;
560 }
561
562 bool
563 brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
564 {
565    switch (type) {
566    case BRW_REGISTER_TYPE_D:
567    case BRW_REGISTER_TYPE_UD:
568       reg->d = -reg->d;
569       return true;
570    case BRW_REGISTER_TYPE_W:
571    case BRW_REGISTER_TYPE_UW:
572       reg->d = -(int16_t)reg->ud;
573       return true;
574    case BRW_REGISTER_TYPE_F:
575       reg->f = -reg->f;
576       return true;
577    case BRW_REGISTER_TYPE_VF:
578       reg->ud ^= 0x80808080;
579       return true;
580    case BRW_REGISTER_TYPE_DF:
581       reg->df = -reg->df;
582       return true;
583    case BRW_REGISTER_TYPE_UQ:
584    case BRW_REGISTER_TYPE_Q:
585       reg->d64 = -reg->d64;
586       return true;
587    case BRW_REGISTER_TYPE_UB:
588    case BRW_REGISTER_TYPE_B:
589       unreachable("no UB/B immediates");
590    case BRW_REGISTER_TYPE_UV:
591    case BRW_REGISTER_TYPE_V:
592       assert(!"unimplemented: negate UV/V immediate");
593    case BRW_REGISTER_TYPE_HF:
594       assert(!"unimplemented: negate HF immediate");
595    case BRW_REGISTER_TYPE_NF:
596       unreachable("no NF immediates");
597    }
598
599    return false;
600 }
601
602 bool
603 brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
604 {
605    switch (type) {
606    case BRW_REGISTER_TYPE_D:
607       reg->d = abs(reg->d);
608       return true;
609    case BRW_REGISTER_TYPE_W:
610       reg->d = abs((int16_t)reg->ud);
611       return true;
612    case BRW_REGISTER_TYPE_F:
613       reg->f = fabsf(reg->f);
614       return true;
615    case BRW_REGISTER_TYPE_DF:
616       reg->df = fabs(reg->df);
617       return true;
618    case BRW_REGISTER_TYPE_VF:
619       reg->ud &= ~0x80808080;
620       return true;
621    case BRW_REGISTER_TYPE_Q:
622       reg->d64 = imaxabs(reg->d64);
623       return true;
624    case BRW_REGISTER_TYPE_UB:
625    case BRW_REGISTER_TYPE_B:
626       unreachable("no UB/B immediates");
627    case BRW_REGISTER_TYPE_UQ:
628    case BRW_REGISTER_TYPE_UD:
629    case BRW_REGISTER_TYPE_UW:
630    case BRW_REGISTER_TYPE_UV:
631       /* Presumably the absolute value modifier on an unsigned source is a
632        * nop, but it would be nice to confirm.
633        */
634       assert(!"unimplemented: abs unsigned immediate");
635    case BRW_REGISTER_TYPE_V:
636       assert(!"unimplemented: abs V immediate");
637    case BRW_REGISTER_TYPE_HF:
638       assert(!"unimplemented: abs HF immediate");
639    case BRW_REGISTER_TYPE_NF:
640       unreachable("no NF immediates");
641    }
642
643    return false;
644 }
645
646 backend_shader::backend_shader(const struct brw_compiler *compiler,
647                                void *log_data,
648                                void *mem_ctx,
649                                const nir_shader *shader,
650                                struct brw_stage_prog_data *stage_prog_data)
651    : compiler(compiler),
652      log_data(log_data),
653      devinfo(compiler->devinfo),
654      nir(shader),
655      stage_prog_data(stage_prog_data),
656      mem_ctx(mem_ctx),
657      cfg(NULL),
658      stage(shader->info.stage)
659 {
660    debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
661    stage_name = _mesa_shader_stage_to_string(stage);
662    stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
663 }
664
665 backend_shader::~backend_shader()
666 {
667 }
668
669 bool
670 backend_reg::equals(const backend_reg &r) const
671 {
672    return brw_regs_equal(this, &r) && offset == r.offset;
673 }
674
675 bool
676 backend_reg::is_zero() const
677 {
678    if (file != IMM)
679       return false;
680
681    switch (type) {
682    case BRW_REGISTER_TYPE_F:
683       return f == 0;
684    case BRW_REGISTER_TYPE_DF:
685       return df == 0;
686    case BRW_REGISTER_TYPE_D:
687    case BRW_REGISTER_TYPE_UD:
688       return d == 0;
689    case BRW_REGISTER_TYPE_UQ:
690    case BRW_REGISTER_TYPE_Q:
691       return u64 == 0;
692    default:
693       return false;
694    }
695 }
696
697 bool
698 backend_reg::is_one() const
699 {
700    if (file != IMM)
701       return false;
702
703    switch (type) {
704    case BRW_REGISTER_TYPE_F:
705       return f == 1.0f;
706    case BRW_REGISTER_TYPE_DF:
707       return df == 1.0;
708    case BRW_REGISTER_TYPE_D:
709    case BRW_REGISTER_TYPE_UD:
710       return d == 1;
711    case BRW_REGISTER_TYPE_UQ:
712    case BRW_REGISTER_TYPE_Q:
713       return u64 == 1;
714    default:
715       return false;
716    }
717 }
718
719 bool
720 backend_reg::is_negative_one() const
721 {
722    if (file != IMM)
723       return false;
724
725    switch (type) {
726    case BRW_REGISTER_TYPE_F:
727       return f == -1.0;
728    case BRW_REGISTER_TYPE_DF:
729       return df == -1.0;
730    case BRW_REGISTER_TYPE_D:
731       return d == -1;
732    case BRW_REGISTER_TYPE_Q:
733       return d64 == -1;
734    default:
735       return false;
736    }
737 }
738
739 bool
740 backend_reg::is_null() const
741 {
742    return file == ARF && nr == BRW_ARF_NULL;
743 }
744
745
746 bool
747 backend_reg::is_accumulator() const
748 {
749    return file == ARF && nr == BRW_ARF_ACCUMULATOR;
750 }
751
752 bool
753 backend_instruction::is_commutative() const
754 {
755    switch (opcode) {
756    case BRW_OPCODE_AND:
757    case BRW_OPCODE_OR:
758    case BRW_OPCODE_XOR:
759    case BRW_OPCODE_ADD:
760    case BRW_OPCODE_MUL:
761    case SHADER_OPCODE_MULH:
762       return true;
763    case BRW_OPCODE_SEL:
764       /* MIN and MAX are commutative. */
765       if (conditional_mod == BRW_CONDITIONAL_GE ||
766           conditional_mod == BRW_CONDITIONAL_L) {
767          return true;
768       }
769       /* fallthrough */
770    default:
771       return false;
772    }
773 }
774
775 bool
776 backend_instruction::is_3src(const struct gen_device_info *devinfo) const
777 {
778    return ::is_3src(devinfo, opcode);
779 }
780
781 bool
782 backend_instruction::is_tex() const
783 {
784    return (opcode == SHADER_OPCODE_TEX ||
785            opcode == FS_OPCODE_TXB ||
786            opcode == SHADER_OPCODE_TXD ||
787            opcode == SHADER_OPCODE_TXF ||
788            opcode == SHADER_OPCODE_TXF_LZ ||
789            opcode == SHADER_OPCODE_TXF_CMS ||
790            opcode == SHADER_OPCODE_TXF_CMS_W ||
791            opcode == SHADER_OPCODE_TXF_UMS ||
792            opcode == SHADER_OPCODE_TXF_MCS ||
793            opcode == SHADER_OPCODE_TXL ||
794            opcode == SHADER_OPCODE_TXL_LZ ||
795            opcode == SHADER_OPCODE_TXS ||
796            opcode == SHADER_OPCODE_LOD ||
797            opcode == SHADER_OPCODE_TG4 ||
798            opcode == SHADER_OPCODE_TG4_OFFSET ||
799            opcode == SHADER_OPCODE_SAMPLEINFO);
800 }
801
802 bool
803 backend_instruction::is_math() const
804 {
805    return (opcode == SHADER_OPCODE_RCP ||
806            opcode == SHADER_OPCODE_RSQ ||
807            opcode == SHADER_OPCODE_SQRT ||
808            opcode == SHADER_OPCODE_EXP2 ||
809            opcode == SHADER_OPCODE_LOG2 ||
810            opcode == SHADER_OPCODE_SIN ||
811            opcode == SHADER_OPCODE_COS ||
812            opcode == SHADER_OPCODE_INT_QUOTIENT ||
813            opcode == SHADER_OPCODE_INT_REMAINDER ||
814            opcode == SHADER_OPCODE_POW);
815 }
816
817 bool
818 backend_instruction::is_control_flow() const
819 {
820    switch (opcode) {
821    case BRW_OPCODE_DO:
822    case BRW_OPCODE_WHILE:
823    case BRW_OPCODE_IF:
824    case BRW_OPCODE_ELSE:
825    case BRW_OPCODE_ENDIF:
826    case BRW_OPCODE_BREAK:
827    case BRW_OPCODE_CONTINUE:
828       return true;
829    default:
830       return false;
831    }
832 }
833
834 bool
835 backend_instruction::can_do_source_mods() const
836 {
837    switch (opcode) {
838    case BRW_OPCODE_ADDC:
839    case BRW_OPCODE_BFE:
840    case BRW_OPCODE_BFI1:
841    case BRW_OPCODE_BFI2:
842    case BRW_OPCODE_BFREV:
843    case BRW_OPCODE_CBIT:
844    case BRW_OPCODE_FBH:
845    case BRW_OPCODE_FBL:
846    case BRW_OPCODE_SUBB:
847    case SHADER_OPCODE_BROADCAST:
848    case SHADER_OPCODE_MOV_INDIRECT:
849       return false;
850    default:
851       return true;
852    }
853 }
854
855 bool
856 backend_instruction::can_do_saturate() const
857 {
858    switch (opcode) {
859    case BRW_OPCODE_ADD:
860    case BRW_OPCODE_ASR:
861    case BRW_OPCODE_AVG:
862    case BRW_OPCODE_DP2:
863    case BRW_OPCODE_DP3:
864    case BRW_OPCODE_DP4:
865    case BRW_OPCODE_DPH:
866    case BRW_OPCODE_F16TO32:
867    case BRW_OPCODE_F32TO16:
868    case BRW_OPCODE_LINE:
869    case BRW_OPCODE_LRP:
870    case BRW_OPCODE_MAC:
871    case BRW_OPCODE_MAD:
872    case BRW_OPCODE_MATH:
873    case BRW_OPCODE_MOV:
874    case BRW_OPCODE_MUL:
875    case SHADER_OPCODE_MULH:
876    case BRW_OPCODE_PLN:
877    case BRW_OPCODE_RNDD:
878    case BRW_OPCODE_RNDE:
879    case BRW_OPCODE_RNDU:
880    case BRW_OPCODE_RNDZ:
881    case BRW_OPCODE_SEL:
882    case BRW_OPCODE_SHL:
883    case BRW_OPCODE_SHR:
884    case FS_OPCODE_LINTERP:
885    case SHADER_OPCODE_COS:
886    case SHADER_OPCODE_EXP2:
887    case SHADER_OPCODE_LOG2:
888    case SHADER_OPCODE_POW:
889    case SHADER_OPCODE_RCP:
890    case SHADER_OPCODE_RSQ:
891    case SHADER_OPCODE_SIN:
892    case SHADER_OPCODE_SQRT:
893       return true;
894    default:
895       return false;
896    }
897 }
898
899 bool
900 backend_instruction::can_do_cmod() const
901 {
902    switch (opcode) {
903    case BRW_OPCODE_ADD:
904    case BRW_OPCODE_ADDC:
905    case BRW_OPCODE_AND:
906    case BRW_OPCODE_ASR:
907    case BRW_OPCODE_AVG:
908    case BRW_OPCODE_CMP:
909    case BRW_OPCODE_CMPN:
910    case BRW_OPCODE_DP2:
911    case BRW_OPCODE_DP3:
912    case BRW_OPCODE_DP4:
913    case BRW_OPCODE_DPH:
914    case BRW_OPCODE_F16TO32:
915    case BRW_OPCODE_F32TO16:
916    case BRW_OPCODE_FRC:
917    case BRW_OPCODE_LINE:
918    case BRW_OPCODE_LRP:
919    case BRW_OPCODE_LZD:
920    case BRW_OPCODE_MAC:
921    case BRW_OPCODE_MACH:
922    case BRW_OPCODE_MAD:
923    case BRW_OPCODE_MOV:
924    case BRW_OPCODE_MUL:
925    case BRW_OPCODE_NOT:
926    case BRW_OPCODE_OR:
927    case BRW_OPCODE_PLN:
928    case BRW_OPCODE_RNDD:
929    case BRW_OPCODE_RNDE:
930    case BRW_OPCODE_RNDU:
931    case BRW_OPCODE_RNDZ:
932    case BRW_OPCODE_SAD2:
933    case BRW_OPCODE_SADA2:
934    case BRW_OPCODE_SHL:
935    case BRW_OPCODE_SHR:
936    case BRW_OPCODE_SUBB:
937    case BRW_OPCODE_XOR:
938    case FS_OPCODE_CINTERP:
939    case FS_OPCODE_LINTERP:
940       return true;
941    default:
942       return false;
943    }
944 }
945
946 bool
947 backend_instruction::reads_accumulator_implicitly() const
948 {
949    switch (opcode) {
950    case BRW_OPCODE_MAC:
951    case BRW_OPCODE_MACH:
952    case BRW_OPCODE_SADA2:
953       return true;
954    default:
955       return false;
956    }
957 }
958
959 bool
960 backend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const
961 {
962    return writes_accumulator ||
963           (devinfo->gen < 6 &&
964            ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
965             (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
966              opcode != FS_OPCODE_CINTERP)));
967 }
968
969 bool
970 backend_instruction::has_side_effects() const
971 {
972    switch (opcode) {
973    case SHADER_OPCODE_UNTYPED_ATOMIC:
974    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
975    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
976    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
977    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
978    case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
979    case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
980    case SHADER_OPCODE_TYPED_ATOMIC:
981    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
982    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
983    case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
984    case SHADER_OPCODE_MEMORY_FENCE:
985    case SHADER_OPCODE_URB_WRITE_SIMD8:
986    case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
987    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
988    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
989    case FS_OPCODE_FB_WRITE:
990    case FS_OPCODE_FB_WRITE_LOGICAL:
991    case SHADER_OPCODE_BARRIER:
992    case TCS_OPCODE_URB_WRITE:
993    case TCS_OPCODE_RELEASE_INPUT:
994    case SHADER_OPCODE_RND_MODE:
995       return true;
996    default:
997       return eot;
998    }
999 }
1000
1001 bool
1002 backend_instruction::is_volatile() const
1003 {
1004    switch (opcode) {
1005    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
1006    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
1007    case SHADER_OPCODE_TYPED_SURFACE_READ:
1008    case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
1009    case SHADER_OPCODE_BYTE_SCATTERED_READ:
1010    case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
1011    case SHADER_OPCODE_URB_READ_SIMD8:
1012    case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
1013    case VEC4_OPCODE_URB_READ:
1014       return true;
1015    default:
1016       return false;
1017    }
1018 }
1019
1020 #ifndef NDEBUG
1021 static bool
1022 inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
1023 {
1024    bool found = false;
1025    foreach_inst_in_block (backend_instruction, i, block) {
1026       if (inst == i) {
1027          found = true;
1028       }
1029    }
1030    return found;
1031 }
1032 #endif
1033
1034 static void
1035 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
1036 {
1037    for (bblock_t *block_iter = start_block->next();
1038         block_iter;
1039         block_iter = block_iter->next()) {
1040       block_iter->start_ip += ip_adjustment;
1041       block_iter->end_ip += ip_adjustment;
1042    }
1043 }
1044
1045 void
1046 backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
1047 {
1048    assert(this != inst);
1049
1050    if (!this->is_head_sentinel())
1051       assert(inst_is_in_block(block, this) || !"Instruction not in block");
1052
1053    block->end_ip++;
1054
1055    adjust_later_block_ips(block, 1);
1056
1057    exec_node::insert_after(inst);
1058 }
1059
1060 void
1061 backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
1062 {
1063    assert(this != inst);
1064
1065    if (!this->is_tail_sentinel())
1066       assert(inst_is_in_block(block, this) || !"Instruction not in block");
1067
1068    block->end_ip++;
1069
1070    adjust_later_block_ips(block, 1);
1071
1072    exec_node::insert_before(inst);
1073 }
1074
1075 void
1076 backend_instruction::insert_before(bblock_t *block, exec_list *list)
1077 {
1078    assert(inst_is_in_block(block, this) || !"Instruction not in block");
1079
1080    unsigned num_inst = list->length();
1081
1082    block->end_ip += num_inst;
1083
1084    adjust_later_block_ips(block, num_inst);
1085
1086    exec_node::insert_before(list);
1087 }
1088
1089 void
1090 backend_instruction::remove(bblock_t *block)
1091 {
1092    assert(inst_is_in_block(block, this) || !"Instruction not in block");
1093
1094    adjust_later_block_ips(block, -1);
1095
1096    if (block->start_ip == block->end_ip) {
1097       block->cfg->remove_block(block);
1098    } else {
1099       block->end_ip--;
1100    }
1101
1102    exec_node::remove();
1103 }
1104
1105 void
1106 backend_shader::dump_instructions()
1107 {
1108    dump_instructions(NULL);
1109 }
1110
1111 void
1112 backend_shader::dump_instructions(const char *name)
1113 {
1114    FILE *file = stderr;
1115    if (name && geteuid() != 0) {
1116       file = fopen(name, "w");
1117       if (!file)
1118          file = stderr;
1119    }
1120
1121    if (cfg) {
1122       int ip = 0;
1123       foreach_block_and_inst(block, backend_instruction, inst, cfg) {
1124          if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
1125             fprintf(file, "%4d: ", ip++);
1126          dump_instruction(inst, file);
1127       }
1128    } else {
1129       int ip = 0;
1130       foreach_in_list(backend_instruction, inst, &instructions) {
1131          if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
1132             fprintf(file, "%4d: ", ip++);
1133          dump_instruction(inst, file);
1134       }
1135    }
1136
1137    if (file != stderr) {
1138       fclose(file);
1139    }
1140 }
1141
1142 void
1143 backend_shader::calculate_cfg()
1144 {
1145    if (this->cfg)
1146       return;
1147    cfg = new(mem_ctx) cfg_t(&this->instructions);
1148 }
1149
1150 extern "C" const unsigned *
1151 brw_compile_tes(const struct brw_compiler *compiler,
1152                 void *log_data,
1153                 void *mem_ctx,
1154                 const struct brw_tes_prog_key *key,
1155                 const struct brw_vue_map *input_vue_map,
1156                 struct brw_tes_prog_data *prog_data,
1157                 const nir_shader *src_shader,
1158                 struct gl_program *prog,
1159                 int shader_time_index,
1160                 char **error_str)
1161 {
1162    const struct gen_device_info *devinfo = compiler->devinfo;
1163    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
1164    const unsigned *assembly;
1165
1166    nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
1167    nir->info.inputs_read = key->inputs_read;
1168    nir->info.patch_inputs_read = key->patch_inputs_read;
1169
1170    nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar);
1171    brw_nir_lower_tes_inputs(nir, input_vue_map);
1172    brw_nir_lower_vue_outputs(nir, is_scalar);
1173    nir = brw_postprocess_nir(nir, compiler, is_scalar);
1174
1175    brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
1176                        nir->info.outputs_written,
1177                        nir->info.separate_shader);
1178
1179    unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
1180
1181    assert(output_size_bytes >= 1);
1182    if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
1183       if (error_str)
1184          *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
1185       return NULL;
1186    }
1187
1188    prog_data->base.clip_distance_mask =
1189       ((1 << nir->info.clip_distance_array_size) - 1);
1190    prog_data->base.cull_distance_mask =
1191       ((1 << nir->info.cull_distance_array_size) - 1) <<
1192       nir->info.clip_distance_array_size;
1193
1194    /* URB entry sizes are stored as a multiple of 64 bytes. */
1195    prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
1196
1197    /* On Cannonlake software shall not program an allocation size that
1198     * specifies a size that is a multiple of 3 64B (512-bit) cachelines.
1199     */
1200    if (devinfo->gen == 10 &&
1201        prog_data->base.urb_entry_size % 3 == 0)
1202       prog_data->base.urb_entry_size++;
1203
1204    prog_data->base.urb_read_length = 0;
1205
1206    STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
1207    STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL ==
1208                  TESS_SPACING_FRACTIONAL_ODD - 1);
1209    STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL ==
1210                  TESS_SPACING_FRACTIONAL_EVEN - 1);
1211
1212    prog_data->partitioning =
1213       (enum brw_tess_partitioning) (nir->info.tess.spacing - 1);
1214
1215    switch (nir->info.tess.primitive_mode) {
1216    case GL_QUADS:
1217       prog_data->domain = BRW_TESS_DOMAIN_QUAD;
1218       break;
1219    case GL_TRIANGLES:
1220       prog_data->domain = BRW_TESS_DOMAIN_TRI;
1221       break;
1222    case GL_ISOLINES:
1223       prog_data->domain = BRW_TESS_DOMAIN_ISOLINE;
1224       break;
1225    default:
1226       unreachable("invalid domain shader primitive mode");
1227    }
1228
1229    if (nir->info.tess.point_mode) {
1230       prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1231    } else if (nir->info.tess.primitive_mode == GL_ISOLINES) {
1232       prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE;
1233    } else {
1234       /* Hardware winding order is backwards from OpenGL */
1235       prog_data->output_topology =
1236          nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW
1237                              : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
1238    }
1239
1240    if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
1241       fprintf(stderr, "TES Input ");
1242       brw_print_vue_map(stderr, input_vue_map);
1243       fprintf(stderr, "TES Output ");
1244       brw_print_vue_map(stderr, &prog_data->base.vue_map);
1245    }
1246
1247    if (is_scalar) {
1248       fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
1249                    &prog_data->base.base, NULL, nir, 8,
1250                    shader_time_index, input_vue_map);
1251       if (!v.run_tes()) {
1252          if (error_str)
1253             *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
1254          return NULL;
1255       }
1256
1257       prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
1258       prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
1259
1260       fs_generator g(compiler, log_data, mem_ctx, (void *) key,
1261                      &prog_data->base.base, v.promoted_constants, false,
1262                      MESA_SHADER_TESS_EVAL);
1263       if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
1264          g.enable_debug(ralloc_asprintf(mem_ctx,
1265                                         "%s tessellation evaluation shader %s",
1266                                         nir->info.label ? nir->info.label
1267                                                         : "unnamed",
1268                                         nir->info.name));
1269       }
1270
1271       g.generate_code(v.cfg, 8);
1272
1273       assembly = g.get_assembly();
1274    } else {
1275       brw::vec4_tes_visitor v(compiler, log_data, key, prog_data,
1276                               nir, mem_ctx, shader_time_index);
1277       if (!v.run()) {
1278          if (error_str)
1279             *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
1280          return NULL;
1281       }
1282
1283       if (unlikely(INTEL_DEBUG & DEBUG_TES))
1284          v.dump_instructions();
1285
1286       assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
1287                                             &prog_data->base, v.cfg);
1288    }
1289
1290    return assembly;
1291 }