OSDN Git Service

i965: new integrated graphics chipset support
[android-x86/external-mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4  develop this 3D driver.
5  
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13  
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17  
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keith@tungstengraphics.com>
30   */
31      
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41  * Internal helper for constructing instructions
42  */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45                                   struct brw_reg reg )
46 {
47    if (reg.width == BRW_WIDTH_8 && 
48        insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 
49       insn->header.execution_size = BRW_EXECUTE_16;
50    else
51       insn->header.execution_size = reg.width;  /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56                           struct brw_reg dest )
57 {
58    insn->bits1.da1.dest_reg_file = dest.file;
59    insn->bits1.da1.dest_reg_type = dest.type;
60    insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62    if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
63       insn->bits1.da1.dest_reg_nr = dest.nr;
64
65       if (insn->header.access_mode == BRW_ALIGN_1) {
66          insn->bits1.da1.dest_subreg_nr = dest.subnr;
67          insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68       }
69       else {
70          insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71          insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72       }
73    }
74    else {
75       insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77       /* These are different sizes in align1 vs align16:
78        */
79       if (insn->header.access_mode == BRW_ALIGN_1) {
80          insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81          insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82       }
83       else {
84          insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85       }
86    }
87
88    /* NEW: Set the execution size based on dest.width and
89     * insn->compression_control:
90     */
91    guess_execution_size(insn, dest);
92 }
93
94 static void brw_set_src0( struct brw_instruction *insn,
95                       struct brw_reg reg )
96 {
97    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99    insn->bits1.da1.src0_reg_file = reg.file;
100    insn->bits1.da1.src0_reg_type = reg.type;
101    insn->bits2.da1.src0_abs = reg.abs;
102    insn->bits2.da1.src0_negate = reg.negate;
103    insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105    if (reg.file == BRW_IMMEDIATE_VALUE) {
106       insn->bits3.ud = reg.dw1.ud;
107    
108       /* Required to set some fields in src1 as well:
109        */
110       insn->bits1.da1.src1_reg_file = 0; /* arf */
111       insn->bits1.da1.src1_reg_type = reg.type;
112    }
113    else 
114    {
115       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
116          if (insn->header.access_mode == BRW_ALIGN_1) {
117             insn->bits2.da1.src0_subreg_nr = reg.subnr;
118             insn->bits2.da1.src0_reg_nr = reg.nr;
119          }
120          else {
121             insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
122             insn->bits2.da16.src0_reg_nr = reg.nr;
123          }
124       }
125       else {
126          insn->bits2.ia1.src0_subreg_nr = reg.subnr;
127
128          if (insn->header.access_mode == BRW_ALIGN_1) {
129             insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
130          }
131          else {
132             insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
133          }
134       }
135
136       if (insn->header.access_mode == BRW_ALIGN_1) {
137          if (reg.width == BRW_WIDTH_1 && 
138              insn->header.execution_size == BRW_EXECUTE_1) {
139             insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
140             insn->bits2.da1.src0_width = BRW_WIDTH_1;
141             insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
142          }
143          else {
144             insn->bits2.da1.src0_horiz_stride = reg.hstride;
145             insn->bits2.da1.src0_width = reg.width;
146             insn->bits2.da1.src0_vert_stride = reg.vstride;
147          }
148       }
149       else {
150          insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
151          insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
152          insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
153          insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
154
155          /* This is an oddity of the fact we're using the same
156           * descriptions for registers in align_16 as align_1:
157           */
158          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
159             insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
160          else
161             insn->bits2.da16.src0_vert_stride = reg.vstride;
162       }
163    }
164 }
165
166
167 void brw_set_src1( struct brw_instruction *insn,
168                           struct brw_reg reg )
169 {
170    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
171
172    insn->bits1.da1.src1_reg_file = reg.file;
173    insn->bits1.da1.src1_reg_type = reg.type;
174    insn->bits3.da1.src1_abs = reg.abs;
175    insn->bits3.da1.src1_negate = reg.negate;
176
177    /* Only src1 can be immediate in two-argument instructions.
178     */
179    assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
180
181    if (reg.file == BRW_IMMEDIATE_VALUE) {
182       insn->bits3.ud = reg.dw1.ud;
183    }
184    else {
185       /* This is a hardware restriction, which may or may not be lifted
186        * in the future:
187        */
188       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
189       //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
190
191       if (insn->header.access_mode == BRW_ALIGN_1) {
192          insn->bits3.da1.src1_subreg_nr = reg.subnr;
193          insn->bits3.da1.src1_reg_nr = reg.nr;
194       }
195       else {
196          insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
197          insn->bits3.da16.src1_reg_nr = reg.nr;
198       }
199
200       if (insn->header.access_mode == BRW_ALIGN_1) {
201          if (reg.width == BRW_WIDTH_1 && 
202              insn->header.execution_size == BRW_EXECUTE_1) {
203             insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
204             insn->bits3.da1.src1_width = BRW_WIDTH_1;
205             insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
206          }
207          else {
208             insn->bits3.da1.src1_horiz_stride = reg.hstride;
209             insn->bits3.da1.src1_width = reg.width;
210             insn->bits3.da1.src1_vert_stride = reg.vstride;
211          }
212       }
213       else {
214          insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
215          insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
216          insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
217          insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
218
219          /* This is an oddity of the fact we're using the same
220           * descriptions for registers in align_16 as align_1:
221           */
222          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
223             insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
224          else
225             insn->bits3.da16.src1_vert_stride = reg.vstride;
226       }
227    }
228 }
229
230
231
232 static void brw_set_math_message( struct brw_instruction *insn,
233                                   GLuint msg_length,
234                                   GLuint response_length,
235                                   GLuint function,
236                                   GLuint integer_type,
237                                   GLboolean low_precision,
238                                   GLboolean saturate,
239                                   GLuint dataType )
240 {
241    brw_set_src1(insn, brw_imm_d(0));
242
243    insn->bits3.math.function = function;
244    insn->bits3.math.int_type = integer_type;
245    insn->bits3.math.precision = low_precision;
246    insn->bits3.math.saturate = saturate;
247    insn->bits3.math.data_type = dataType;
248    insn->bits3.math.response_length = response_length;
249    insn->bits3.math.msg_length = msg_length;
250    insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
251    insn->bits3.math.end_of_thread = 0;
252 }
253
254 static void brw_set_urb_message( struct brw_instruction *insn,
255                                  GLboolean allocate,
256                                  GLboolean used,
257                                  GLuint msg_length,
258                                  GLuint response_length,
259                                  GLboolean end_of_thread,
260                                  GLboolean complete,
261                                  GLuint offset,
262                                  GLuint swizzle_control )
263 {
264    brw_set_src1(insn, brw_imm_d(0));
265
266    insn->bits3.urb.opcode = 0;  /* ? */
267    insn->bits3.urb.offset = offset;
268    insn->bits3.urb.swizzle_control = swizzle_control;
269    insn->bits3.urb.allocate = allocate;
270    insn->bits3.urb.used = used; /* ? */
271    insn->bits3.urb.complete = complete;
272    insn->bits3.urb.response_length = response_length;
273    insn->bits3.urb.msg_length = msg_length;
274    insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
275    insn->bits3.urb.end_of_thread = end_of_thread;
276 }
277
278 static void brw_set_dp_write_message( struct brw_instruction *insn,
279                                       GLuint binding_table_index,
280                                       GLuint msg_control,
281                                       GLuint msg_type,
282                                       GLuint msg_length,
283                                       GLuint pixel_scoreboard_clear,
284                                       GLuint response_length,
285                                       GLuint end_of_thread )
286 {
287    brw_set_src1(insn, brw_imm_d(0));
288
289    insn->bits3.dp_write.binding_table_index = binding_table_index;
290    insn->bits3.dp_write.msg_control = msg_control;
291    insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
292    insn->bits3.dp_write.msg_type = msg_type;
293    insn->bits3.dp_write.send_commit_msg = 0;
294    insn->bits3.dp_write.response_length = response_length;
295    insn->bits3.dp_write.msg_length = msg_length;
296    insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
297    insn->bits3.urb.end_of_thread = end_of_thread;
298 }
299
300 static void brw_set_dp_read_message( struct brw_instruction *insn,
301                                       GLuint binding_table_index,
302                                       GLuint msg_control,
303                                       GLuint msg_type,
304                                       GLuint target_cache,
305                                       GLuint msg_length,
306                                       GLuint response_length,
307                                       GLuint end_of_thread )
308 {
309    brw_set_src1(insn, brw_imm_d(0));
310
311    insn->bits3.dp_read.binding_table_index = binding_table_index;
312    insn->bits3.dp_read.msg_control = msg_control;
313    insn->bits3.dp_read.msg_type = msg_type;
314    insn->bits3.dp_read.target_cache = target_cache;
315    insn->bits3.dp_read.response_length = response_length;
316    insn->bits3.dp_read.msg_length = msg_length;
317    insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
318    insn->bits3.dp_read.end_of_thread = end_of_thread;
319 }
320
321 static void brw_set_sampler_message(struct brw_context *brw,
322                  struct brw_instruction *insn,
323                                      GLuint binding_table_index,
324                                      GLuint sampler,
325                                      GLuint msg_type,
326                                      GLuint response_length,
327                                      GLuint msg_length,
328                                      GLboolean eot)
329 {
330    brw_set_src1(insn, brw_imm_d(0));
331
332    if (BRW_IS_IGD(brw)) {
333       insn->bits3.sampler_igd.binding_table_index = binding_table_index;
334       insn->bits3.sampler_igd.sampler = sampler;
335       insn->bits3.sampler_igd.msg_type = msg_type;
336       insn->bits3.sampler_igd.response_length = response_length;
337       insn->bits3.sampler_igd.msg_length = msg_length;
338       insn->bits3.sampler_igd.end_of_thread = eot;
339       insn->bits3.sampler_igd.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
340    } else {
341       insn->bits3.sampler.binding_table_index = binding_table_index;
342       insn->bits3.sampler.sampler = sampler;
343       insn->bits3.sampler.msg_type = msg_type;
344       insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
345       insn->bits3.sampler.response_length = response_length;
346       insn->bits3.sampler.msg_length = msg_length;
347       insn->bits3.sampler.end_of_thread = eot;
348       insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
349    }
350 }
351
352
353
354 static struct brw_instruction *next_insn( struct brw_compile *p, 
355                                           GLuint opcode )
356 {
357    struct brw_instruction *insn;
358
359    assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
360
361    insn = &p->store[p->nr_insn++];
362    memcpy(insn, p->current, sizeof(*insn));
363
364    /* Reset this one-shot flag: 
365     */
366
367    if (p->current->header.destreg__conditonalmod) {
368       p->current->header.destreg__conditonalmod = 0;   
369       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
370    }
371
372    insn->header.opcode = opcode;
373    return insn;
374 }
375
376
377 static struct brw_instruction *brw_alu1( struct brw_compile *p,
378                                          GLuint opcode,
379                                          struct brw_reg dest,
380                                          struct brw_reg src )
381 {
382    struct brw_instruction *insn = next_insn(p, opcode);
383    brw_set_dest(insn, dest);
384    brw_set_src0(insn, src);   
385    return insn;
386 }
387
388 static struct brw_instruction *brw_alu2(struct brw_compile *p,
389                                         GLuint opcode,
390                                         struct brw_reg dest,
391                                         struct brw_reg src0,
392                                         struct brw_reg src1 )
393 {
394    struct brw_instruction *insn = next_insn(p, opcode);   
395    brw_set_dest(insn, dest);
396    brw_set_src0(insn, src0);
397    brw_set_src1(insn, src1);
398    return insn;
399 }
400
401
402 /***********************************************************************
403  * Convenience routines.
404  */
405 #define ALU1(OP)                                        \
406 struct brw_instruction *brw_##OP(struct brw_compile *p,                 \
407               struct brw_reg dest,                      \
408               struct brw_reg src0)                      \
409 {                                                       \
410    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);     \
411 }
412
413 #define ALU2(OP)                                        \
414 struct brw_instruction *brw_##OP(struct brw_compile *p,                 \
415               struct brw_reg dest,                      \
416               struct brw_reg src0,                      \
417               struct brw_reg src1)                      \
418 {                                                       \
419    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);       \
420 }
421
422
423 ALU1(MOV)
424 ALU2(SEL)
425 ALU1(NOT)
426 ALU2(AND)
427 ALU2(OR)
428 ALU2(XOR)
429 ALU2(SHR)
430 ALU2(SHL)
431 ALU2(RSR)
432 ALU2(RSL)
433 ALU2(ASR)
434 ALU2(ADD)
435 ALU2(MUL)
436 ALU1(FRC)
437 ALU1(RNDD)
438 ALU2(MAC)
439 ALU2(MACH)
440 ALU1(LZD)
441 ALU2(DP4)
442 ALU2(DPH)
443 ALU2(DP3)
444 ALU2(DP2)
445 ALU2(LINE)
446
447
448
449
450 void brw_NOP(struct brw_compile *p)
451 {
452    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
453    brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
454    brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
455    brw_set_src1(insn, brw_imm_ud(0x0));
456 }
457
458
459
460
461
462 /***********************************************************************
463  * Comparisons, if/else/endif
464  */
465
466 struct brw_instruction *brw_JMPI(struct brw_compile *p, 
467               struct brw_reg dest,
468               struct brw_reg src0,
469               struct brw_reg src1)
470 {
471    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
472
473    p->current->header.predicate_control = BRW_PREDICATE_NONE;
474
475    return insn;
476 }
477
478 /* EU takes the value from the flag register and pushes it onto some
479  * sort of a stack (presumably merging with any flag value already on
480  * the stack).  Within an if block, the flags at the top of the stack
481  * control execution on each channel of the unit, eg. on each of the
482  * 16 pixel values in our wm programs.
483  *
484  * When the matching 'else' instruction is reached (presumably by
485  * countdown of the instruction count patched in by our ELSE/ENDIF
486  * functions), the relevent flags are inverted.
487  *
488  * When the matching 'endif' instruction is reached, the flags are
489  * popped off.  If the stack is now empty, normal execution resumes.
490  *
491  * No attempt is made to deal with stack overflow (14 elements?).
492  */
493 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
494 {
495    struct brw_instruction *insn;
496
497    if (p->single_program_flow) {
498       assert(execute_size == BRW_EXECUTE_1);
499
500       insn = next_insn(p, BRW_OPCODE_ADD);
501       insn->header.predicate_inverse = 1;
502    } else {
503       insn = next_insn(p, BRW_OPCODE_IF);
504    }
505
506    /* Override the defaults for this instruction:
507     */
508    brw_set_dest(insn, brw_ip_reg());
509    brw_set_src0(insn, brw_ip_reg());
510    brw_set_src1(insn, brw_imm_d(0x0));
511
512    insn->header.execution_size = execute_size;
513    insn->header.compression_control = BRW_COMPRESSION_NONE;
514    insn->header.predicate_control = BRW_PREDICATE_NORMAL;
515    insn->header.mask_control = BRW_MASK_ENABLE;
516
517    p->current->header.predicate_control = BRW_PREDICATE_NONE;
518
519    return insn;
520 }
521
522
523 struct brw_instruction *brw_ELSE(struct brw_compile *p, 
524                                  struct brw_instruction *if_insn)
525 {
526    struct brw_instruction *insn;
527
528    if (p->single_program_flow) {
529       insn = next_insn(p, BRW_OPCODE_ADD);
530    } else {
531       insn = next_insn(p, BRW_OPCODE_ELSE);
532    }
533
534    brw_set_dest(insn, brw_ip_reg());
535    brw_set_src0(insn, brw_ip_reg());
536    brw_set_src1(insn, brw_imm_d(0x0));
537
538    insn->header.compression_control = BRW_COMPRESSION_NONE;
539    insn->header.execution_size = if_insn->header.execution_size;
540    insn->header.mask_control = BRW_MASK_ENABLE;
541
542    /* Patch the if instruction to point at this instruction.
543     */
544    if (p->single_program_flow) {
545       assert(if_insn->header.opcode == BRW_OPCODE_ADD);
546
547       if_insn->bits3.ud = (insn - if_insn + 1) * 16;
548    } else {
549       assert(if_insn->header.opcode == BRW_OPCODE_IF);
550
551       if_insn->bits3.if_else.jump_count = insn - if_insn;
552       if_insn->bits3.if_else.pop_count = 1;
553       if_insn->bits3.if_else.pad0 = 0;
554    }
555
556    return insn;
557 }
558
559 void brw_ENDIF(struct brw_compile *p, 
560                struct brw_instruction *patch_insn)
561 {
562    if (p->single_program_flow) {
563       /* In single program flow mode, there's no need to execute an ENDIF,
564        * since we don't need to do any stack operations, and if we're executing
565        * currently, we want to just continue executing.
566        */
567       struct brw_instruction *next = &p->store[p->nr_insn];
568
569       assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
570
571       patch_insn->bits3.ud = (next - patch_insn) * 16;
572    } else {
573       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
574
575       brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
576       brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
577       brw_set_src1(insn, brw_imm_d(0x0));
578
579       insn->header.compression_control = BRW_COMPRESSION_NONE;
580       insn->header.execution_size = patch_insn->header.execution_size;
581       insn->header.mask_control = BRW_MASK_ENABLE;
582
583       assert(patch_insn->bits3.if_else.jump_count == 0);
584
585       /* Patch the if or else instructions to point at this or the next
586        * instruction respectively.
587        */
588       if (patch_insn->header.opcode == BRW_OPCODE_IF) {
589          /* Automagically turn it into an IFF:
590           */
591          patch_insn->header.opcode = BRW_OPCODE_IFF;
592          patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
593          patch_insn->bits3.if_else.pop_count = 0;
594          patch_insn->bits3.if_else.pad0 = 0;
595       } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
596          patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
597          patch_insn->bits3.if_else.pop_count = 1;
598          patch_insn->bits3.if_else.pad0 = 0;
599       } else {
600          assert(0);
601       }
602
603       /* Also pop item off the stack in the endif instruction:
604        */
605       insn->bits3.if_else.jump_count = 0;
606       insn->bits3.if_else.pop_count = 1;
607       insn->bits3.if_else.pad0 = 0;
608    }
609 }
610
611 struct brw_instruction *brw_BREAK(struct brw_compile *p)
612 {
613    struct brw_instruction *insn;
614    insn = next_insn(p, BRW_OPCODE_BREAK);
615    brw_set_dest(insn, brw_ip_reg());
616    brw_set_src0(insn, brw_ip_reg());
617    brw_set_src1(insn, brw_imm_d(0x0));
618    insn->header.compression_control = BRW_COMPRESSION_NONE;
619    insn->header.execution_size = BRW_EXECUTE_8;
620    insn->header.mask_control = BRW_MASK_DISABLE;
621    insn->bits3.if_else.pad0 = 0;
622    return insn;
623 }
624
625 struct brw_instruction *brw_CONT(struct brw_compile *p)
626 {
627    struct brw_instruction *insn;
628    insn = next_insn(p, BRW_OPCODE_CONTINUE);
629    brw_set_dest(insn, brw_ip_reg());
630    brw_set_src0(insn, brw_ip_reg());
631    brw_set_src1(insn, brw_imm_d(0x0));
632    insn->header.compression_control = BRW_COMPRESSION_NONE;
633    insn->header.execution_size = BRW_EXECUTE_8;
634    insn->header.mask_control = BRW_MASK_DISABLE;
635    insn->bits3.if_else.pad0 = 0;
636    return insn;
637 }
638
639 /* DO/WHILE loop:
640  */
641 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
642 {
643    if (p->single_program_flow) {
644       return &p->store[p->nr_insn];
645    } else {
646       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
647
648       /* Override the defaults for this instruction:
649        */
650       brw_set_dest(insn, brw_null_reg());
651       brw_set_src0(insn, brw_null_reg());
652       brw_set_src1(insn, brw_null_reg());
653
654       insn->header.compression_control = BRW_COMPRESSION_NONE;
655       insn->header.execution_size = execute_size;
656       insn->header.predicate_control = BRW_PREDICATE_NONE;
657       /* insn->header.mask_control = BRW_MASK_ENABLE; */
658       insn->header.mask_control = BRW_MASK_DISABLE;
659
660       return insn;
661    }
662 }
663
664
665
666 struct brw_instruction *brw_WHILE(struct brw_compile *p, 
667                struct brw_instruction *do_insn)
668 {
669    struct brw_instruction *insn;
670
671    if (p->single_program_flow)
672       insn = next_insn(p, BRW_OPCODE_ADD);
673    else
674       insn = next_insn(p, BRW_OPCODE_WHILE);
675
676    brw_set_dest(insn, brw_ip_reg());
677    brw_set_src0(insn, brw_ip_reg());
678    brw_set_src1(insn, brw_imm_d(0x0));
679
680    insn->header.compression_control = BRW_COMPRESSION_NONE;
681
682    if (p->single_program_flow) {
683       insn->header.execution_size = BRW_EXECUTE_1;
684
685       insn->bits3.d = (do_insn - insn) * 16;
686    } else {
687       insn->header.execution_size = do_insn->header.execution_size;
688
689       assert(do_insn->header.opcode == BRW_OPCODE_DO);
690       insn->bits3.if_else.jump_count = do_insn - insn + 1;
691       insn->bits3.if_else.pop_count = 0;
692       insn->bits3.if_else.pad0 = 0;
693    }
694
695 /*    insn->header.mask_control = BRW_MASK_ENABLE; */
696
697    insn->header.mask_control = BRW_MASK_DISABLE;
698    p->current->header.predicate_control = BRW_PREDICATE_NONE;   
699    return insn;
700 }
701
702
703 /* FORWARD JUMPS:
704  */
705 void brw_land_fwd_jump(struct brw_compile *p, 
706                        struct brw_instruction *jmp_insn)
707 {
708    struct brw_instruction *landing = &p->store[p->nr_insn];
709
710    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
711    assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
712
713    jmp_insn->bits3.ud = (landing - jmp_insn) - 1; 
714 }
715
716
717
718 /* To integrate with the above, it makes sense that the comparison
719  * instruction should populate the flag register.  It might be simpler
720  * just to use the flag reg for most WM tasks?
721  */
722 void brw_CMP(struct brw_compile *p,
723              struct brw_reg dest,
724              GLuint conditional,
725              struct brw_reg src0,
726              struct brw_reg src1)
727 {
728    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
729
730    insn->header.destreg__conditonalmod = conditional;
731    brw_set_dest(insn, dest);
732    brw_set_src0(insn, src0);
733    brw_set_src1(insn, src1);
734
735 /*    guess_execution_size(insn, src0); */
736
737
738    /* Make it so that future instructions will use the computed flag
739     * value until brw_set_predicate_control_flag_value() is called
740     * again.  
741     */
742    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
743        dest.nr == 0) {
744       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
745       p->flag_value = 0xff;
746    }
747 }
748
749
750
751 /***********************************************************************
752  * Helpers for the various SEND message types:
753  */
754
755 /* Invert 8 values
756  */
757 void brw_math( struct brw_compile *p,
758                struct brw_reg dest,
759                GLuint function,
760                GLuint saturate,
761                GLuint msg_reg_nr,
762                struct brw_reg src,
763                GLuint data_type,
764                GLuint precision )
765 {
766    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
767    GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
768    GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
769
770    /* Example code doesn't set predicate_control for send
771     * instructions.
772     */
773    insn->header.predicate_control = 0; 
774    insn->header.destreg__conditonalmod = msg_reg_nr;
775
776    brw_set_dest(insn, dest);
777    brw_set_src0(insn, src);
778    brw_set_math_message(insn, 
779                         msg_length, response_length, 
780                         function,
781                         BRW_MATH_INTEGER_UNSIGNED,
782                         precision,
783                         saturate,
784                         data_type);
785 }
786
787 /* Use 2 send instructions to invert 16 elements
788  */
789 void brw_math_16( struct brw_compile *p,
790                   struct brw_reg dest,
791                   GLuint function,
792                   GLuint saturate,
793                   GLuint msg_reg_nr,
794                   struct brw_reg src,
795                   GLuint precision )
796 {
797    struct brw_instruction *insn;
798    GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
799    GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
800
801    /* First instruction:
802     */
803    brw_push_insn_state(p);
804    brw_set_predicate_control_flag_value(p, 0xff);
805    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
806
807    insn = next_insn(p, BRW_OPCODE_SEND);
808    insn->header.destreg__conditonalmod = msg_reg_nr;
809
810    brw_set_dest(insn, dest);
811    brw_set_src0(insn, src);
812    brw_set_math_message(insn, 
813                         msg_length, response_length, 
814                         function,
815                         BRW_MATH_INTEGER_UNSIGNED,
816                         precision,
817                         saturate,
818                         BRW_MATH_DATA_VECTOR);
819
820    /* Second instruction:
821     */
822    insn = next_insn(p, BRW_OPCODE_SEND);
823    insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
824    insn->header.destreg__conditonalmod = msg_reg_nr+1;
825
826    brw_set_dest(insn, offset(dest,1));
827    brw_set_src0(insn, src);
828    brw_set_math_message(insn, 
829                         msg_length, response_length, 
830                         function,
831                         BRW_MATH_INTEGER_UNSIGNED,
832                         precision,
833                         saturate,
834                         BRW_MATH_DATA_VECTOR);
835
836    brw_pop_insn_state(p);
837 }
838
839
840
841
842 void brw_dp_WRITE_16( struct brw_compile *p,
843                       struct brw_reg src,
844                       GLuint msg_reg_nr,
845                       GLuint scratch_offset )
846 {
847    {
848       brw_push_insn_state(p);
849       brw_set_mask_control(p, BRW_MASK_DISABLE);
850       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
851
852       brw_MOV(p,
853               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
854               brw_imm_d(scratch_offset));
855                            
856       brw_pop_insn_state(p);
857    }
858
859    {
860       GLuint msg_length = 3;
861       struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
862       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
863    
864       insn->header.predicate_control = 0; /* XXX */
865       insn->header.compression_control = BRW_COMPRESSION_NONE; 
866       insn->header.destreg__conditonalmod = msg_reg_nr;
867   
868       brw_set_dest(insn, dest);
869       brw_set_src0(insn, src);
870
871       brw_set_dp_write_message(insn,
872                                255, /* bti */
873                                BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
874                                BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
875                                msg_length,
876                                0, /* pixel scoreboard */
877                                0, /* response_length */
878                                0); /* eot */
879    }
880
881 }
882
883
884 void brw_dp_READ_16( struct brw_compile *p,
885                       struct brw_reg dest,
886                       GLuint msg_reg_nr,
887                       GLuint scratch_offset )
888 {
889    {
890       brw_push_insn_state(p);
891       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
892       brw_set_mask_control(p, BRW_MASK_DISABLE);
893
894       brw_MOV(p,
895               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
896               brw_imm_d(scratch_offset));
897                            
898       brw_pop_insn_state(p);
899    }
900
901    {
902       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
903    
904       insn->header.predicate_control = 0; /* XXX */
905       insn->header.compression_control = BRW_COMPRESSION_NONE; 
906       insn->header.destreg__conditonalmod = msg_reg_nr;
907   
908       brw_set_dest(insn, dest); /* UW? */
909       brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
910
911       brw_set_dp_read_message(insn,
912                               255, /* bti */
913                               3,  /* msg_control */
914                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
915                               1, /* target cache */
916                               1, /* msg_length */
917                               2, /* response_length */
918                               0); /* eot */
919    }
920 }
921
922
923 void brw_fb_WRITE(struct brw_compile *p,
924                    struct brw_reg dest,
925                    GLuint msg_reg_nr,
926                    struct brw_reg src0,
927                    GLuint binding_table_index,
928                    GLuint msg_length,
929                    GLuint response_length,
930                    GLboolean eot)
931 {
932    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
933    
934    insn->header.predicate_control = 0; /* XXX */
935    insn->header.compression_control = BRW_COMPRESSION_NONE; 
936    insn->header.destreg__conditonalmod = msg_reg_nr;
937   
938    brw_set_dest(insn, dest);
939    brw_set_src0(insn, src0);
940    brw_set_dp_write_message(insn,
941                             binding_table_index,
942                             BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
943                             BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
944                             msg_length,
945                             1,  /* pixel scoreboard */
946                             response_length, 
947                             eot);
948 }
949
950
951
952 void brw_SAMPLE(struct brw_compile *p,
953                 struct brw_reg dest,
954                 GLuint msg_reg_nr,
955                 struct brw_reg src0,
956                 GLuint binding_table_index,
957                 GLuint sampler,
958                 GLuint writemask,
959                 GLuint msg_type,
960                 GLuint response_length,
961                 GLuint msg_length,
962                 GLboolean eot)
963 {
964    GLboolean need_stall = 0;
965    
966    if(writemask == 0) {
967 /*       _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
968       return;
969    }
970    
971    /* Hardware doesn't do destination dependency checking on send
972     * instructions properly.  Add a workaround which generates the
973     * dependency by other means.  In practice it seems like this bug
974     * only crops up for texture samples, and only where registers are
975     * written by the send and then written again later without being
976     * read in between.  Luckily for us, we already track that
977     * information and use it to modify the writemask for the
978     * instruction, so that is a guide for whether a workaround is
979     * needed.
980     */
981    if (writemask != WRITEMASK_XYZW) {
982       GLuint dst_offset = 0;
983       GLuint i, newmask = 0, len = 0;
984
985       for (i = 0; i < 4; i++) {
986          if (writemask & (1<<i))
987             break;
988          dst_offset += 2;
989       }
990       for (; i < 4; i++) {
991          if (!(writemask & (1<<i)))
992             break;
993          newmask |= 1<<i;
994          len++;
995       }
996
997       if (newmask != writemask) {
998          need_stall = 1;
999 /*       _mesa_printf("need stall %x %x\n", newmask , writemask); */
1000       }
1001       else {
1002          struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1003          
1004          newmask = ~newmask & WRITEMASK_XYZW;
1005
1006          brw_push_insn_state(p);
1007
1008          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1009          brw_set_mask_control(p, BRW_MASK_DISABLE);
1010
1011          brw_MOV(p, m1, brw_vec8_grf(0,0));      
1012          brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
1013
1014          brw_pop_insn_state(p);
1015
1016          src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
1017          dest = offset(dest, dst_offset);
1018          response_length = len * 2;
1019       }
1020    }
1021
1022    {
1023       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1024    
1025       insn->header.predicate_control = 0; /* XXX */
1026       insn->header.compression_control = BRW_COMPRESSION_NONE;
1027       insn->header.destreg__conditonalmod = msg_reg_nr;
1028
1029       brw_set_dest(insn, dest);
1030       brw_set_src0(insn, src0);
1031       brw_set_sampler_message(p->brw, insn,
1032                               binding_table_index,
1033                               sampler,
1034                               msg_type,
1035                               response_length, 
1036                               msg_length,
1037                               eot);
1038    }
1039
1040    if (need_stall)
1041    {
1042       struct brw_reg reg = vec8(offset(dest, response_length-1));
1043
1044       /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1045        */
1046       brw_push_insn_state(p);
1047       brw_set_compression_control(p, GL_FALSE);
1048       brw_MOV(p, reg, reg);           
1049       brw_pop_insn_state(p);
1050    }
1051
1052 }
1053
1054 /* All these variables are pretty confusing - we might be better off
1055  * using bitmasks and macros for this, in the old style.  Or perhaps
1056  * just having the caller instantiate the fields in dword3 itself.
1057  */
1058 void brw_urb_WRITE(struct brw_compile *p,
1059                    struct brw_reg dest,
1060                    GLuint msg_reg_nr,
1061                    struct brw_reg src0,
1062                    GLboolean allocate,
1063                    GLboolean used,
1064                    GLuint msg_length,
1065                    GLuint response_length,
1066                    GLboolean eot,
1067                    GLboolean writes_complete,
1068                    GLuint offset,
1069                    GLuint swizzle)
1070 {
1071    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1072
1073    assert(msg_length < 16);
1074
1075    brw_set_dest(insn, dest);
1076    brw_set_src0(insn, src0);
1077    brw_set_src1(insn, brw_imm_d(0));
1078
1079    insn->header.destreg__conditonalmod = msg_reg_nr;
1080
1081    brw_set_urb_message(insn,
1082                        allocate,
1083                        used,
1084                        msg_length,
1085                        response_length, 
1086                        eot, 
1087                        writes_complete, 
1088                        offset,
1089                        swizzle);
1090 }
1091