OSDN Git Service

284307341cb159fce35730b5bf9f929e5c36c134
[android-x86/external-mesa.git] / src / mesa / drivers / dri / i965 / brw_eu_emit.c
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4  develop this 3D driver.
5  
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13  
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17  
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keith@tungstengraphics.com>
30   */
31      
32
33 #include "brw_context.h"
34 #include "brw_defines.h"
35 #include "brw_eu.h"
36
37
38
39
40 /***********************************************************************
41  * Internal helper for constructing instructions
42  */
43
44 static void guess_execution_size( struct brw_instruction *insn,
45                                   struct brw_reg reg )
46 {
47    if (reg.width == BRW_WIDTH_8 && 
48        insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 
49       insn->header.execution_size = BRW_EXECUTE_16;
50    else
51       insn->header.execution_size = reg.width;  /* note - definitions are compatible */
52 }
53
54
55 static void brw_set_dest( struct brw_instruction *insn,
56                           struct brw_reg dest )
57 {
58    insn->bits1.da1.dest_reg_file = dest.file;
59    insn->bits1.da1.dest_reg_type = dest.type;
60    insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62    if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
63       insn->bits1.da1.dest_reg_nr = dest.nr;
64
65       if (insn->header.access_mode == BRW_ALIGN_1) {
66          insn->bits1.da1.dest_subreg_nr = dest.subnr;
67          insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68       }
69       else {
70          insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71          insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72       }
73    }
74    else {
75       insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77       /* These are different sizes in align1 vs align16:
78        */
79       if (insn->header.access_mode == BRW_ALIGN_1) {
80          insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81          insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82       }
83       else {
84          insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85       }
86    }
87
88    /* NEW: Set the execution size based on dest.width and
89     * insn->compression_control:
90     */
91    guess_execution_size(insn, dest);
92 }
93
94 static void brw_set_src0( struct brw_instruction *insn,
95                       struct brw_reg reg )
96 {
97    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99    insn->bits1.da1.src0_reg_file = reg.file;
100    insn->bits1.da1.src0_reg_type = reg.type;
101    insn->bits2.da1.src0_abs = reg.abs;
102    insn->bits2.da1.src0_negate = reg.negate;
103    insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105    if (reg.file == BRW_IMMEDIATE_VALUE) {
106       insn->bits3.ud = reg.dw1.ud;
107    
108       /* Required to set some fields in src1 as well:
109        */
110       insn->bits1.da1.src1_reg_file = 0; /* arf */
111       insn->bits1.da1.src1_reg_type = reg.type;
112    }
113    else 
114    {
115       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
116          if (insn->header.access_mode == BRW_ALIGN_1) {
117             insn->bits2.da1.src0_subreg_nr = reg.subnr;
118             insn->bits2.da1.src0_reg_nr = reg.nr;
119          }
120          else {
121             insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
122             insn->bits2.da16.src0_reg_nr = reg.nr;
123          }
124       }
125       else {
126          insn->bits2.ia1.src0_subreg_nr = reg.subnr;
127
128          if (insn->header.access_mode == BRW_ALIGN_1) {
129             insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
130          }
131          else {
132             insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
133          }
134       }
135
136       if (insn->header.access_mode == BRW_ALIGN_1) {
137          if (reg.width == BRW_WIDTH_1 && 
138              insn->header.execution_size == BRW_EXECUTE_1) {
139             insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
140             insn->bits2.da1.src0_width = BRW_WIDTH_1;
141             insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
142          }
143          else {
144             insn->bits2.da1.src0_horiz_stride = reg.hstride;
145             insn->bits2.da1.src0_width = reg.width;
146             insn->bits2.da1.src0_vert_stride = reg.vstride;
147          }
148       }
149       else {
150          insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
151          insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
152          insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
153          insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
154
155          /* This is an oddity of the fact we're using the same
156           * descriptions for registers in align_16 as align_1:
157           */
158          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
159             insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
160          else
161             insn->bits2.da16.src0_vert_stride = reg.vstride;
162       }
163    }
164 }
165
166
167 void brw_set_src1( struct brw_instruction *insn,
168                           struct brw_reg reg )
169 {
170    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
171
172    insn->bits1.da1.src1_reg_file = reg.file;
173    insn->bits1.da1.src1_reg_type = reg.type;
174    insn->bits3.da1.src1_abs = reg.abs;
175    insn->bits3.da1.src1_negate = reg.negate;
176
177    /* Only src1 can be immediate in two-argument instructions.
178     */
179    assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
180
181    if (reg.file == BRW_IMMEDIATE_VALUE) {
182       insn->bits3.ud = reg.dw1.ud;
183    }
184    else {
185       /* This is a hardware restriction, which may or may not be lifted
186        * in the future:
187        */
188       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
189       //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
190
191       if (insn->header.access_mode == BRW_ALIGN_1) {
192          insn->bits3.da1.src1_subreg_nr = reg.subnr;
193          insn->bits3.da1.src1_reg_nr = reg.nr;
194       }
195       else {
196          insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
197          insn->bits3.da16.src1_reg_nr = reg.nr;
198       }
199
200       if (insn->header.access_mode == BRW_ALIGN_1) {
201          if (reg.width == BRW_WIDTH_1 && 
202              insn->header.execution_size == BRW_EXECUTE_1) {
203             insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
204             insn->bits3.da1.src1_width = BRW_WIDTH_1;
205             insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
206          }
207          else {
208             insn->bits3.da1.src1_horiz_stride = reg.hstride;
209             insn->bits3.da1.src1_width = reg.width;
210             insn->bits3.da1.src1_vert_stride = reg.vstride;
211          }
212       }
213       else {
214          insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
215          insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
216          insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
217          insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
218
219          /* This is an oddity of the fact we're using the same
220           * descriptions for registers in align_16 as align_1:
221           */
222          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
223             insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
224          else
225             insn->bits3.da16.src1_vert_stride = reg.vstride;
226       }
227    }
228 }
229
230
231
232 static void brw_set_math_message( struct brw_instruction *insn,
233                                   GLuint msg_length,
234                                   GLuint response_length,
235                                   GLuint function,
236                                   GLuint integer_type,
237                                   GLboolean low_precision,
238                                   GLboolean saturate,
239                                   GLuint dataType )
240 {
241    brw_set_src1(insn, brw_imm_d(0));
242
243    insn->bits3.math.function = function;
244    insn->bits3.math.int_type = integer_type;
245    insn->bits3.math.precision = low_precision;
246    insn->bits3.math.saturate = saturate;
247    insn->bits3.math.data_type = dataType;
248    insn->bits3.math.response_length = response_length;
249    insn->bits3.math.msg_length = msg_length;
250    insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
251    insn->bits3.math.end_of_thread = 0;
252 }
253
254 static void brw_set_urb_message( struct brw_instruction *insn,
255                                  GLboolean allocate,
256                                  GLboolean used,
257                                  GLuint msg_length,
258                                  GLuint response_length,
259                                  GLboolean end_of_thread,
260                                  GLboolean complete,
261                                  GLuint offset,
262                                  GLuint swizzle_control )
263 {
264    brw_set_src1(insn, brw_imm_d(0));
265
266    insn->bits3.urb.opcode = 0;  /* ? */
267    insn->bits3.urb.offset = offset;
268    insn->bits3.urb.swizzle_control = swizzle_control;
269    insn->bits3.urb.allocate = allocate;
270    insn->bits3.urb.used = used; /* ? */
271    insn->bits3.urb.complete = complete;
272    insn->bits3.urb.response_length = response_length;
273    insn->bits3.urb.msg_length = msg_length;
274    insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
275    insn->bits3.urb.end_of_thread = end_of_thread;
276 }
277
278 static void brw_set_dp_write_message( struct brw_instruction *insn,
279                                       GLuint binding_table_index,
280                                       GLuint msg_control,
281                                       GLuint msg_type,
282                                       GLuint msg_length,
283                                       GLuint pixel_scoreboard_clear,
284                                       GLuint response_length,
285                                       GLuint end_of_thread )
286 {
287    brw_set_src1(insn, brw_imm_d(0));
288
289    insn->bits3.dp_write.binding_table_index = binding_table_index;
290    insn->bits3.dp_write.msg_control = msg_control;
291    insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
292    insn->bits3.dp_write.msg_type = msg_type;
293    insn->bits3.dp_write.send_commit_msg = 0;
294    insn->bits3.dp_write.response_length = response_length;
295    insn->bits3.dp_write.msg_length = msg_length;
296    insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
297    insn->bits3.urb.end_of_thread = end_of_thread;
298 }
299
300 static void brw_set_dp_read_message( struct brw_instruction *insn,
301                                       GLuint binding_table_index,
302                                       GLuint msg_control,
303                                       GLuint msg_type,
304                                       GLuint target_cache,
305                                       GLuint msg_length,
306                                       GLuint response_length,
307                                       GLuint end_of_thread )
308 {
309    brw_set_src1(insn, brw_imm_d(0));
310
311    insn->bits3.dp_read.binding_table_index = binding_table_index;
312    insn->bits3.dp_read.msg_control = msg_control;
313    insn->bits3.dp_read.msg_type = msg_type;
314    insn->bits3.dp_read.target_cache = target_cache;
315    insn->bits3.dp_read.response_length = response_length;
316    insn->bits3.dp_read.msg_length = msg_length;
317    insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
318    insn->bits3.dp_read.end_of_thread = end_of_thread;
319 }
320
321 static void brw_set_sampler_message( struct brw_instruction *insn,
322                                      GLuint binding_table_index,
323                                      GLuint sampler,
324                                      GLuint msg_type,
325                                      GLuint response_length,
326                                      GLuint msg_length,
327                                      GLboolean eot)
328 {
329    brw_set_src1(insn, brw_imm_d(0));
330
331    insn->bits3.sampler.binding_table_index = binding_table_index;
332    insn->bits3.sampler.sampler = sampler;
333    insn->bits3.sampler.msg_type = msg_type;
334    insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
335    insn->bits3.sampler.response_length = response_length;
336    insn->bits3.sampler.msg_length = msg_length;
337    insn->bits3.sampler.end_of_thread = eot;
338    insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
339 }
340
341
342
343 static struct brw_instruction *next_insn( struct brw_compile *p, 
344                                           GLuint opcode )
345 {
346    struct brw_instruction *insn;
347
348    assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
349
350    insn = &p->store[p->nr_insn++];
351    memcpy(insn, p->current, sizeof(*insn));
352
353    /* Reset this one-shot flag: 
354     */
355
356    if (p->current->header.destreg__conditonalmod) {
357       p->current->header.destreg__conditonalmod = 0;   
358       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
359    }
360
361    insn->header.opcode = opcode;
362    return insn;
363 }
364
365
366 static struct brw_instruction *brw_alu1( struct brw_compile *p,
367                                          GLuint opcode,
368                                          struct brw_reg dest,
369                                          struct brw_reg src )
370 {
371    struct brw_instruction *insn = next_insn(p, opcode);
372    brw_set_dest(insn, dest);
373    brw_set_src0(insn, src);   
374    return insn;
375 }
376
377 static struct brw_instruction *brw_alu2(struct brw_compile *p,
378                                         GLuint opcode,
379                                         struct brw_reg dest,
380                                         struct brw_reg src0,
381                                         struct brw_reg src1 )
382 {
383    struct brw_instruction *insn = next_insn(p, opcode);   
384    brw_set_dest(insn, dest);
385    brw_set_src0(insn, src0);
386    brw_set_src1(insn, src1);
387    return insn;
388 }
389
390
391 /***********************************************************************
392  * Convenience routines.
393  */
394 #define ALU1(OP)                                        \
395 struct brw_instruction *brw_##OP(struct brw_compile *p,                 \
396               struct brw_reg dest,                      \
397               struct brw_reg src0)                      \
398 {                                                       \
399    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);     \
400 }
401
402 #define ALU2(OP)                                        \
403 struct brw_instruction *brw_##OP(struct brw_compile *p,                 \
404               struct brw_reg dest,                      \
405               struct brw_reg src0,                      \
406               struct brw_reg src1)                      \
407 {                                                       \
408    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);       \
409 }
410
411
412 ALU1(MOV)
413 ALU2(SEL)
414 ALU1(NOT)
415 ALU2(AND)
416 ALU2(OR)
417 ALU2(XOR)
418 ALU2(SHR)
419 ALU2(SHL)
420 ALU2(RSR)
421 ALU2(RSL)
422 ALU2(ASR)
423 ALU2(ADD)
424 ALU2(MUL)
425 ALU1(FRC)
426 ALU1(RNDD)
427 ALU2(MAC)
428 ALU2(MACH)
429 ALU1(LZD)
430 ALU2(DP4)
431 ALU2(DPH)
432 ALU2(DP3)
433 ALU2(DP2)
434 ALU2(LINE)
435
436
437
438
439 void brw_NOP(struct brw_compile *p)
440 {
441    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
442    brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
443    brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
444    brw_set_src1(insn, brw_imm_ud(0x0));
445 }
446
447
448
449
450
451 /***********************************************************************
452  * Comparisons, if/else/endif
453  */
454
455 struct brw_instruction *brw_JMPI(struct brw_compile *p, 
456               struct brw_reg dest,
457               struct brw_reg src0,
458               struct brw_reg src1)
459 {
460    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
461
462    p->current->header.predicate_control = BRW_PREDICATE_NONE;
463
464    return insn;
465 }
466
467 /* EU takes the value from the flag register and pushes it onto some
468  * sort of a stack (presumably merging with any flag value already on
469  * the stack).  Within an if block, the flags at the top of the stack
470  * control execution on each channel of the unit, eg. on each of the
471  * 16 pixel values in our wm programs.
472  *
473  * When the matching 'else' instruction is reached (presumably by
474  * countdown of the instruction count patched in by our ELSE/ENDIF
475  * functions), the relevent flags are inverted.
476  *
477  * When the matching 'endif' instruction is reached, the flags are
478  * popped off.  If the stack is now empty, normal execution resumes.
479  *
480  * No attempt is made to deal with stack overflow (14 elements?).
481  */
482 struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
483 {
484    struct brw_instruction *insn;
485
486    if (p->single_program_flow) {
487       assert(execute_size == BRW_EXECUTE_1);
488
489       insn = next_insn(p, BRW_OPCODE_ADD);
490       insn->header.predicate_inverse = 1;
491    } else {
492       insn = next_insn(p, BRW_OPCODE_IF);
493    }
494
495    /* Override the defaults for this instruction:
496     */
497    brw_set_dest(insn, brw_ip_reg());
498    brw_set_src0(insn, brw_ip_reg());
499    brw_set_src1(insn, brw_imm_d(0x0));
500
501    insn->header.execution_size = execute_size;
502    insn->header.compression_control = BRW_COMPRESSION_NONE;
503    insn->header.predicate_control = BRW_PREDICATE_NORMAL;
504    insn->header.mask_control = BRW_MASK_ENABLE;
505
506    p->current->header.predicate_control = BRW_PREDICATE_NONE;
507
508    return insn;
509 }
510
511
512 struct brw_instruction *brw_ELSE(struct brw_compile *p, 
513                                  struct brw_instruction *if_insn)
514 {
515    struct brw_instruction *insn;
516
517    if (p->single_program_flow) {
518       insn = next_insn(p, BRW_OPCODE_ADD);
519    } else {
520       insn = next_insn(p, BRW_OPCODE_ELSE);
521    }
522
523    brw_set_dest(insn, brw_ip_reg());
524    brw_set_src0(insn, brw_ip_reg());
525    brw_set_src1(insn, brw_imm_d(0x0));
526
527    insn->header.compression_control = BRW_COMPRESSION_NONE;
528    insn->header.execution_size = if_insn->header.execution_size;
529    insn->header.mask_control = BRW_MASK_ENABLE;
530
531    /* Patch the if instruction to point at this instruction.
532     */
533    if (p->single_program_flow) {
534       assert(if_insn->header.opcode == BRW_OPCODE_ADD);
535
536       if_insn->bits3.ud = (insn - if_insn + 1) * 16;
537    } else {
538       assert(if_insn->header.opcode == BRW_OPCODE_IF);
539
540       if_insn->bits3.if_else.jump_count = insn - if_insn;
541       if_insn->bits3.if_else.pop_count = 1;
542       if_insn->bits3.if_else.pad0 = 0;
543    }
544
545    return insn;
546 }
547
548 void brw_ENDIF(struct brw_compile *p, 
549                struct brw_instruction *patch_insn)
550 {
551    if (p->single_program_flow) {
552       /* In single program flow mode, there's no need to execute an ENDIF,
553        * since we don't need to do any stack operations, and if we're executing
554        * currently, we want to just continue executing.
555        */
556       struct brw_instruction *next = &p->store[p->nr_insn];
557
558       assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
559
560       patch_insn->bits3.ud = (next - patch_insn) * 16;
561    } else {
562       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
563
564       brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
565       brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
566       brw_set_src1(insn, brw_imm_d(0x0));
567
568       insn->header.compression_control = BRW_COMPRESSION_NONE;
569       insn->header.execution_size = patch_insn->header.execution_size;
570       insn->header.mask_control = BRW_MASK_ENABLE;
571
572       assert(patch_insn->bits3.if_else.jump_count == 0);
573
574       /* Patch the if or else instructions to point at this or the next
575        * instruction respectively.
576        */
577       if (patch_insn->header.opcode == BRW_OPCODE_IF) {
578          /* Automagically turn it into an IFF:
579           */
580          patch_insn->header.opcode = BRW_OPCODE_IFF;
581          patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
582          patch_insn->bits3.if_else.pop_count = 0;
583          patch_insn->bits3.if_else.pad0 = 0;
584       } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
585          patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
586          patch_insn->bits3.if_else.pop_count = 1;
587          patch_insn->bits3.if_else.pad0 = 0;
588       } else {
589          assert(0);
590       }
591
592       /* Also pop item off the stack in the endif instruction:
593        */
594       insn->bits3.if_else.jump_count = 0;
595       insn->bits3.if_else.pop_count = 1;
596       insn->bits3.if_else.pad0 = 0;
597    }
598 }
599
600 struct brw_instruction *brw_BREAK(struct brw_compile *p)
601 {
602    struct brw_instruction *insn;
603    insn = next_insn(p, BRW_OPCODE_BREAK);
604    brw_set_dest(insn, brw_ip_reg());
605    brw_set_src0(insn, brw_ip_reg());
606    brw_set_src1(insn, brw_imm_d(0x0));
607    insn->header.compression_control = BRW_COMPRESSION_NONE;
608    insn->header.execution_size = BRW_EXECUTE_8;
609    insn->header.mask_control = BRW_MASK_DISABLE;
610    insn->bits3.if_else.pad0 = 0;
611    return insn;
612 }
613
614 struct brw_instruction *brw_CONT(struct brw_compile *p)
615 {
616    struct brw_instruction *insn;
617    insn = next_insn(p, BRW_OPCODE_CONTINUE);
618    brw_set_dest(insn, brw_ip_reg());
619    brw_set_src0(insn, brw_ip_reg());
620    brw_set_src1(insn, brw_imm_d(0x0));
621    insn->header.compression_control = BRW_COMPRESSION_NONE;
622    insn->header.execution_size = BRW_EXECUTE_8;
623    insn->header.mask_control = BRW_MASK_DISABLE;
624    insn->bits3.if_else.pad0 = 0;
625    return insn;
626 }
627
628 /* DO/WHILE loop:
629  */
630 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
631 {
632    if (p->single_program_flow) {
633       return &p->store[p->nr_insn];
634    } else {
635       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
636
637       /* Override the defaults for this instruction:
638        */
639       brw_set_dest(insn, brw_null_reg());
640       brw_set_src0(insn, brw_null_reg());
641       brw_set_src1(insn, brw_null_reg());
642
643       insn->header.compression_control = BRW_COMPRESSION_NONE;
644       insn->header.execution_size = execute_size;
645       insn->header.predicate_control = BRW_PREDICATE_NONE;
646       /* insn->header.mask_control = BRW_MASK_ENABLE; */
647       insn->header.mask_control = BRW_MASK_DISABLE;
648
649       return insn;
650    }
651 }
652
653
654
655 struct brw_instruction *brw_WHILE(struct brw_compile *p, 
656                struct brw_instruction *do_insn)
657 {
658    struct brw_instruction *insn;
659
660    if (p->single_program_flow)
661       insn = next_insn(p, BRW_OPCODE_ADD);
662    else
663       insn = next_insn(p, BRW_OPCODE_WHILE);
664
665    brw_set_dest(insn, brw_ip_reg());
666    brw_set_src0(insn, brw_ip_reg());
667    brw_set_src1(insn, brw_imm_d(0x0));
668
669    insn->header.compression_control = BRW_COMPRESSION_NONE;
670
671    if (p->single_program_flow) {
672       insn->header.execution_size = BRW_EXECUTE_1;
673
674       insn->bits3.d = (do_insn - insn) * 16;
675    } else {
676       insn->header.execution_size = do_insn->header.execution_size;
677
678       assert(do_insn->header.opcode == BRW_OPCODE_DO);
679       insn->bits3.if_else.jump_count = do_insn - insn + 1;
680       insn->bits3.if_else.pop_count = 0;
681       insn->bits3.if_else.pad0 = 0;
682    }
683
684 /*    insn->header.mask_control = BRW_MASK_ENABLE; */
685
686    insn->header.mask_control = BRW_MASK_DISABLE;
687    p->current->header.predicate_control = BRW_PREDICATE_NONE;   
688    return insn;
689 }
690
691
692 /* FORWARD JUMPS:
693  */
694 void brw_land_fwd_jump(struct brw_compile *p, 
695                        struct brw_instruction *jmp_insn)
696 {
697    struct brw_instruction *landing = &p->store[p->nr_insn];
698
699    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
700    assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
701
702    jmp_insn->bits3.ud = (landing - jmp_insn) - 1; 
703 }
704
705
706
707 /* To integrate with the above, it makes sense that the comparison
708  * instruction should populate the flag register.  It might be simpler
709  * just to use the flag reg for most WM tasks?
710  */
711 void brw_CMP(struct brw_compile *p,
712              struct brw_reg dest,
713              GLuint conditional,
714              struct brw_reg src0,
715              struct brw_reg src1)
716 {
717    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
718
719    insn->header.destreg__conditonalmod = conditional;
720    brw_set_dest(insn, dest);
721    brw_set_src0(insn, src0);
722    brw_set_src1(insn, src1);
723
724 /*    guess_execution_size(insn, src0); */
725
726
727    /* Make it so that future instructions will use the computed flag
728     * value until brw_set_predicate_control_flag_value() is called
729     * again.  
730     */
731    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
732        dest.nr == 0) {
733       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
734       p->flag_value = 0xff;
735    }
736 }
737
738
739
740 /***********************************************************************
741  * Helpers for the various SEND message types:
742  */
743
744 /* Invert 8 values
745  */
746 void brw_math( struct brw_compile *p,
747                struct brw_reg dest,
748                GLuint function,
749                GLuint saturate,
750                GLuint msg_reg_nr,
751                struct brw_reg src,
752                GLuint data_type,
753                GLuint precision )
754 {
755    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
756    GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
757    GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
758
759    /* Example code doesn't set predicate_control for send
760     * instructions.
761     */
762    insn->header.predicate_control = 0; 
763    insn->header.destreg__conditonalmod = msg_reg_nr;
764
765    brw_set_dest(insn, dest);
766    brw_set_src0(insn, src);
767    brw_set_math_message(insn, 
768                         msg_length, response_length, 
769                         function,
770                         BRW_MATH_INTEGER_UNSIGNED,
771                         precision,
772                         saturate,
773                         data_type);
774 }
775
776 /* Use 2 send instructions to invert 16 elements
777  */
778 void brw_math_16( struct brw_compile *p,
779                   struct brw_reg dest,
780                   GLuint function,
781                   GLuint saturate,
782                   GLuint msg_reg_nr,
783                   struct brw_reg src,
784                   GLuint precision )
785 {
786    struct brw_instruction *insn;
787    GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
788    GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
789
790    /* First instruction:
791     */
792    brw_push_insn_state(p);
793    brw_set_predicate_control_flag_value(p, 0xff);
794    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
795
796    insn = next_insn(p, BRW_OPCODE_SEND);
797    insn->header.destreg__conditonalmod = msg_reg_nr;
798
799    brw_set_dest(insn, dest);
800    brw_set_src0(insn, src);
801    brw_set_math_message(insn, 
802                         msg_length, response_length, 
803                         function,
804                         BRW_MATH_INTEGER_UNSIGNED,
805                         precision,
806                         saturate,
807                         BRW_MATH_DATA_VECTOR);
808
809    /* Second instruction:
810     */
811    insn = next_insn(p, BRW_OPCODE_SEND);
812    insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
813    insn->header.destreg__conditonalmod = msg_reg_nr+1;
814
815    brw_set_dest(insn, offset(dest,1));
816    brw_set_src0(insn, src);
817    brw_set_math_message(insn, 
818                         msg_length, response_length, 
819                         function,
820                         BRW_MATH_INTEGER_UNSIGNED,
821                         precision,
822                         saturate,
823                         BRW_MATH_DATA_VECTOR);
824
825    brw_pop_insn_state(p);
826 }
827
828
829
830
831 void brw_dp_WRITE_16( struct brw_compile *p,
832                       struct brw_reg src,
833                       GLuint msg_reg_nr,
834                       GLuint scratch_offset )
835 {
836    {
837       brw_push_insn_state(p);
838       brw_set_mask_control(p, BRW_MASK_DISABLE);
839       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
840
841       brw_MOV(p,
842               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
843               brw_imm_d(scratch_offset));
844                            
845       brw_pop_insn_state(p);
846    }
847
848    {
849       GLuint msg_length = 3;
850       struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
851       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
852    
853       insn->header.predicate_control = 0; /* XXX */
854       insn->header.compression_control = BRW_COMPRESSION_NONE; 
855       insn->header.destreg__conditonalmod = msg_reg_nr;
856   
857       brw_set_dest(insn, dest);
858       brw_set_src0(insn, src);
859
860       brw_set_dp_write_message(insn,
861                                255, /* bti */
862                                BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
863                                BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
864                                msg_length,
865                                0, /* pixel scoreboard */
866                                0, /* response_length */
867                                0); /* eot */
868    }
869
870 }
871
872
873 void brw_dp_READ_16( struct brw_compile *p,
874                       struct brw_reg dest,
875                       GLuint msg_reg_nr,
876                       GLuint scratch_offset )
877 {
878    {
879       brw_push_insn_state(p);
880       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
881       brw_set_mask_control(p, BRW_MASK_DISABLE);
882
883       brw_MOV(p,
884               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
885               brw_imm_d(scratch_offset));
886                            
887       brw_pop_insn_state(p);
888    }
889
890    {
891       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
892    
893       insn->header.predicate_control = 0; /* XXX */
894       insn->header.compression_control = BRW_COMPRESSION_NONE; 
895       insn->header.destreg__conditonalmod = msg_reg_nr;
896   
897       brw_set_dest(insn, dest); /* UW? */
898       brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
899
900       brw_set_dp_read_message(insn,
901                               255, /* bti */
902                               3,  /* msg_control */
903                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
904                               1, /* target cache */
905                               1, /* msg_length */
906                               2, /* response_length */
907                               0); /* eot */
908    }
909 }
910
911
912 void brw_fb_WRITE(struct brw_compile *p,
913                    struct brw_reg dest,
914                    GLuint msg_reg_nr,
915                    struct brw_reg src0,
916                    GLuint binding_table_index,
917                    GLuint msg_length,
918                    GLuint response_length,
919                    GLboolean eot)
920 {
921    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
922    
923    insn->header.predicate_control = 0; /* XXX */
924    insn->header.compression_control = BRW_COMPRESSION_NONE; 
925    insn->header.destreg__conditonalmod = msg_reg_nr;
926   
927    brw_set_dest(insn, dest);
928    brw_set_src0(insn, src0);
929    brw_set_dp_write_message(insn,
930                             binding_table_index,
931                             BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
932                             BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
933                             msg_length,
934                             1,  /* pixel scoreboard */
935                             response_length, 
936                             eot);
937 }
938
939
940
941 void brw_SAMPLE(struct brw_compile *p,
942                 struct brw_reg dest,
943                 GLuint msg_reg_nr,
944                 struct brw_reg src0,
945                 GLuint binding_table_index,
946                 GLuint sampler,
947                 GLuint writemask,
948                 GLuint msg_type,
949                 GLuint response_length,
950                 GLuint msg_length,
951                 GLboolean eot)
952 {
953    GLboolean need_stall = 0;
954    
955    if(writemask == 0) {
956 /*       _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
957       return;
958    }
959    
960    /* Hardware doesn't do destination dependency checking on send
961     * instructions properly.  Add a workaround which generates the
962     * dependency by other means.  In practice it seems like this bug
963     * only crops up for texture samples, and only where registers are
964     * written by the send and then written again later without being
965     * read in between.  Luckily for us, we already track that
966     * information and use it to modify the writemask for the
967     * instruction, so that is a guide for whether a workaround is
968     * needed.
969     */
970    if (writemask != WRITEMASK_XYZW) {
971       GLuint dst_offset = 0;
972       GLuint i, newmask = 0, len = 0;
973
974       for (i = 0; i < 4; i++) {
975          if (writemask & (1<<i))
976             break;
977          dst_offset += 2;
978       }
979       for (; i < 4; i++) {
980          if (!(writemask & (1<<i)))
981             break;
982          newmask |= 1<<i;
983          len++;
984       }
985
986       if (newmask != writemask) {
987          need_stall = 1;
988 /*       _mesa_printf("need stall %x %x\n", newmask , writemask); */
989       }
990       else {
991          struct brw_reg m1 = brw_message_reg(msg_reg_nr);
992          
993          newmask = ~newmask & WRITEMASK_XYZW;
994
995          brw_push_insn_state(p);
996
997          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
998          brw_set_mask_control(p, BRW_MASK_DISABLE);
999
1000          brw_MOV(p, m1, brw_vec8_grf(0,0));      
1001          brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
1002
1003          brw_pop_insn_state(p);
1004
1005          src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
1006          dest = offset(dest, dst_offset);
1007          response_length = len * 2;
1008       }
1009    }
1010
1011    {
1012       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1013    
1014       insn->header.predicate_control = 0; /* XXX */
1015       insn->header.compression_control = BRW_COMPRESSION_NONE;
1016       insn->header.destreg__conditonalmod = msg_reg_nr;
1017
1018       brw_set_dest(insn, dest);
1019       brw_set_src0(insn, src0);
1020       brw_set_sampler_message(insn,
1021                               binding_table_index,
1022                               sampler,
1023                               msg_type,
1024                               response_length, 
1025                               msg_length,
1026                               eot);
1027    }
1028
1029    if (need_stall)
1030    {
1031       struct brw_reg reg = vec8(offset(dest, response_length-1));
1032
1033       /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1034        */
1035       brw_push_insn_state(p);
1036       brw_set_compression_control(p, GL_FALSE);
1037       brw_MOV(p, reg, reg);           
1038       brw_pop_insn_state(p);
1039    }
1040
1041 }
1042
1043 /* All these variables are pretty confusing - we might be better off
1044  * using bitmasks and macros for this, in the old style.  Or perhaps
1045  * just having the caller instantiate the fields in dword3 itself.
1046  */
1047 void brw_urb_WRITE(struct brw_compile *p,
1048                    struct brw_reg dest,
1049                    GLuint msg_reg_nr,
1050                    struct brw_reg src0,
1051                    GLboolean allocate,
1052                    GLboolean used,
1053                    GLuint msg_length,
1054                    GLuint response_length,
1055                    GLboolean eot,
1056                    GLboolean writes_complete,
1057                    GLuint offset,
1058                    GLuint swizzle)
1059 {
1060    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1061
1062    assert(msg_length < 16);
1063
1064    brw_set_dest(insn, dest);
1065    brw_set_src0(insn, src0);
1066    brw_set_src1(insn, brw_imm_d(0));
1067
1068    insn->header.destreg__conditonalmod = msg_reg_nr;
1069
1070    brw_set_urb_message(insn,
1071                        allocate,
1072                        used,
1073                        msg_length,
1074                        response_length, 
1075                        eot, 
1076                        writes_complete, 
1077                        offset,
1078                        swizzle);
1079 }
1080