OSDN Git Service

Matrix determinant and inverse implementation
[android-x86/external-swiftshader.git] / src / Shader / PixelProgram.cpp
1 // SwiftShader Software Renderer\r
2 //\r
3 // Copyright(c) 2015 Google Inc.\r
4 //\r
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,\r
6 // transcribed, stored in a retrieval system, translated into any human or computer\r
7 // language by any means, or disclosed to third parties without the explicit written\r
8 // agreement of Google Inc. Without such an agreement, no rights or licenses, express\r
9 // or implied, including but not limited to any patent rights, are granted to you.\r
10 //\r
11 \r
12 #include "PixelProgram.hpp"\r
13 #include "Primitive.hpp"\r
14 #include "Renderer.hpp"\r
15 #include "SamplerCore.hpp"\r
16 \r
17 namespace sw\r
18 {\r
19         extern bool postBlendSRGB;\r
20         extern bool booleanFaceRegister;
21         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
22         extern bool fullPixelPositionRegister;\r
23 \r
24         void PixelProgram::setBuiltins(PixelRoutine::Registers &rBase, Int &x, Int &y, Float4(&z)[4], Float4 &w)\r
25         {\r
26                 Registers& r = *static_cast<Registers*>(&rBase);\r
27 \r
28                 if(shader->getVersion() >= 0x0300)
29                 {
30                         if(shader->vPosDeclared)
31                         {
32                                 if(!halfIntegerCoordinates)
33                                 {
34                                         r.vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
35                                         r.vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
36                                 }
37                                 else
38                                 {
39                                         r.vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
40                                         r.vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
41                                 }
42
43                                 if(fullPixelPositionRegister)
44                                 {
45                                         r.vPos.z = z[0]; // FIXME: Centroid?
46                                         r.vPos.w = w;    // FIXME: Centroid?
47                                 }
48                         }
49
50                         if(shader->vFaceDeclared)
51                         {
52                                 Float4 area = *Pointer<Float>(r.primitive + OFFSET(Primitive, area));
53                                 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
54
55                                 r.vFace.x = face;
56                                 r.vFace.y = face;
57                                 r.vFace.z = face;
58                                 r.vFace.w = face;
59                         }
60                 }\r
61         }\r
62 \r
63         void PixelProgram::applyShader(PixelRoutine::Registers &rBase, Int cMask[4])
64         {\r
65                 Registers& r = *static_cast<Registers*>(&rBase);
66
67                 r.enableIndex = 0;
68                 r.stackIndex = 0;
69
70                 if(shader->containsLeaveInstruction())
71                 {
72                         r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
73                 }
74
75                 bool out[4][4] = { false };
76
77                 // Create all call site return blocks up front
78                 for(size_t i = 0; i < shader->getLength(); i++)
79                 {
80                         const Shader::Instruction *instruction = shader->getInstruction(i);
81                         Shader::Opcode opcode = instruction->opcode;
82
83                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
84                         {
85                                 const Dst &dst = instruction->dst;
86
87                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
88                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
89                         }
90                 }
91
92                 for(size_t i = 0; i < shader->getLength(); i++)
93                 {
94                         const Shader::Instruction *instruction = shader->getInstruction(i);
95                         Shader::Opcode opcode = instruction->opcode;
96
97                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
98                         {
99                                 continue;
100                         }
101
102                         const Dst &dst = instruction->dst;
103                         const Src &src0 = instruction->src[0];
104                         const Src &src1 = instruction->src[1];
105                         const Src &src2 = instruction->src[2];
106                         const Src &src3 = instruction->src[3];
107
108                         bool predicate = instruction->predicate;
109                         Control control = instruction->control;
110                         bool pp = dst.partialPrecision;
111                         bool project = instruction->project;
112                         bool bias = instruction->bias;
113
114                         Vector4f d;
115                         Vector4f s0;
116                         Vector4f s1;
117                         Vector4f s2;
118                         Vector4f s3;
119
120                         if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
121                         {
122                                 if(dst.type == Shader::PARAMETER_TEXTURE)
123                                 {
124                                         d.x = r.v[2 + dst.index].x;
125                                         d.y = r.v[2 + dst.index].y;
126                                         d.z = r.v[2 + dst.index].z;
127                                         d.w = r.v[2 + dst.index].w;
128                                 }
129                                 else
130                                 {
131                                         d = r.r[dst.index];
132                                 }
133                         }
134
135                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
136                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
137                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
138                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
139
140                         switch(opcode)
141                         {
142                         case Shader::OPCODE_PS_2_0:                                                    break;
143                         case Shader::OPCODE_PS_2_x:                                                    break;
144                         case Shader::OPCODE_PS_3_0:                                                    break;
145                         case Shader::OPCODE_DEF:                                                       break;
146                         case Shader::OPCODE_DCL:                                                       break;
147                         case Shader::OPCODE_NOP:                                                       break;
148                         case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
149                         case Shader::OPCODE_NEG:        neg(d, s0);                                    break;
150                         case Shader::OPCODE_INEG:       ineg(d, s0);                                   break;
151                         case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
152                         case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
153                         case Shader::OPCODE_F2I:        f2i(d, s0);                                    break;
154                         case Shader::OPCODE_I2F:        i2f(d, s0);                                    break;
155                         case Shader::OPCODE_F2U:        f2u(d, s0);                                    break;
156                         case Shader::OPCODE_U2F:        u2f(d, s0);                                    break;
157                         case Shader::OPCODE_I2B:        i2b(d, s0);                                    break;
158                         case Shader::OPCODE_B2I:        b2i(d, s0);                                    break;
159                         case Shader::OPCODE_U2B:        u2b(d, s0);                                    break;
160                         case Shader::OPCODE_B2U:        b2u(d, s0);                                    break;
161                         case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
162                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                               break;
163                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
164                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                               break;
165                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
166                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                               break;
167                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
168                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);                           break;
169                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
170                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
171                         case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
172                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
173                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
174                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                               break;
175                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);                           break;
176                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);                       break;
177                         case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
178                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
179                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);                      break;
180                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
181                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
182                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
183                         case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
184                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
185                         case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
186                         case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
187                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
188                         case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
189                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
190                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
191                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
192                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
193                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
194                         case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
195                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
196                         case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
197                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                               break;
198                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                               break;
199                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
200                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                               break;
201                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                               break;
202                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                                break;
203                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                               break;
204                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                               break;
205                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
206                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
207                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
208                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
209                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
210                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
211                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
212                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
213                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
214                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
215                         case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
216                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                               break;
217                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                               break;
218                         case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
219                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                               break;
220                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                               break;
221                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
222                         case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
223                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
224                         case Shader::OPCODE_FLOATBITSTOINT:
225                         case Shader::OPCODE_FLOATBITSTOUINT:
226                         case Shader::OPCODE_INTBITSTOFLOAT:
227                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                                   break;
228                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
229                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
230                         case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
231                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
232                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
233                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
234                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
235                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
236                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
237                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
238                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
239                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
240                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
241                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
242                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
243                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
244                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
245                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
246                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
247                         case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
248                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
249                         case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
250                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
251                         case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
252                         case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
253                         case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
254                         case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
255                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
256                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
257                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
258                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
259                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
260                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
261                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
262                         case Shader::OPCODE_M4X4:       M4X4(r, d, s0, src1);                          break;
263                         case Shader::OPCODE_M4X3:       M4X3(r, d, s0, src1);                          break;
264                         case Shader::OPCODE_M3X4:       M3X4(r, d, s0, src1);                          break;
265                         case Shader::OPCODE_M3X3:       M3X3(r, d, s0, src1);                          break;
266                         case Shader::OPCODE_M3X2:       M3X2(r, d, s0, src1);                          break;
267                         case Shader::OPCODE_TEX:        TEXLD(r, d, s0, src1, project, bias);          break;
268                         case Shader::OPCODE_TEXLDD:     TEXLDD(r, d, s0, src1, s2, s3, project, bias); break;
269                         case Shader::OPCODE_TEXLDL:     TEXLDL(r, d, s0, src1, project, bias);         break;
270                         case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
271                         case Shader::OPCODE_DISCARD:    DISCARD(r, cMask, instruction);                break;
272                         case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
273                         case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
274                         case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
275                         case Shader::OPCODE_BREAK:      BREAK(r);                                      break;
276                         case Shader::OPCODE_BREAKC:     BREAKC(r, s0, s1, control);                    break;
277                         case Shader::OPCODE_BREAKP:     BREAKP(r, src0);                               break;
278                         case Shader::OPCODE_CONTINUE:   CONTINUE(r);                                   break;
279                         case Shader::OPCODE_TEST:       TEST();                                        break;
280                         case Shader::OPCODE_CALL:       CALL(r, dst.label, dst.callSite);              break;
281                         case Shader::OPCODE_CALLNZ:     CALLNZ(r, dst.label, dst.callSite, src0);      break;
282                         case Shader::OPCODE_ELSE:       ELSE(r);                                       break;
283                         case Shader::OPCODE_ENDIF:      ENDIF(r);                                      break;
284                         case Shader::OPCODE_ENDLOOP:    ENDLOOP(r);                                    break;
285                         case Shader::OPCODE_ENDREP:     ENDREP(r);                                     break;
286                         case Shader::OPCODE_ENDWHILE:   ENDWHILE(r);                                   break;
287                         case Shader::OPCODE_IF:         IF(r, src0);                                   break;
288                         case Shader::OPCODE_IFC:        IFC(r, s0, s1, control);                       break;
289                         case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
290                         case Shader::OPCODE_LOOP:       LOOP(r, src1);                                 break;
291                         case Shader::OPCODE_REP:        REP(r, src0);                                  break;
292                         case Shader::OPCODE_WHILE:      WHILE(r, src0);                                break;
293                         case Shader::OPCODE_RET:        RET(r);                                        break;
294                         case Shader::OPCODE_LEAVE:      LEAVE(r);                                      break;
295                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
296                         case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
297                         case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
298                         case Shader::OPCODE_NOT:        not(d, s0);                                    break;
299                         case Shader::OPCODE_OR:         or(d, s0, s1);                                 break;
300                         case Shader::OPCODE_XOR:        xor(d, s0, s1);                                break;
301                         case Shader::OPCODE_AND:        and(d, s0, s1);                                break;
302                         case Shader::OPCODE_EQ:         equal(d, s0, s1);                              break;
303                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);                           break;
304                         case Shader::OPCODE_END:                                                       break;
305                         default:
306                                 ASSERT(false);
307                         }
308
309                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
310                         {
311                                 if(dst.integer)
312                                 {
313                                         switch(opcode)
314                                         {
315                                         case Shader::OPCODE_DIV:
316                                                 if(dst.x) d.x = Trunc(d.x);
317                                                 if(dst.y) d.y = Trunc(d.y);
318                                                 if(dst.z) d.z = Trunc(d.z);
319                                                 if(dst.w) d.w = Trunc(d.w);
320                                                 break;
321                                         default:
322                                                 break;   // No truncation to integer required when arguments are integer
323                                         }
324                                 }
325
326                                 if(dst.saturate)
327                                 {
328                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
329                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
330                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
331                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
332
333                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
334                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
335                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
336                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
337                                 }
338
339                                 if(instruction->isPredicated())
340                                 {
341                                         Vector4f pDst;   // FIXME: Rename
342
343                                         switch(dst.type)
344                                         {
345                                         case Shader::PARAMETER_TEMP:
346                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
347                                                 {
348                                                         if(dst.x) pDst.x = r.r[dst.index].x;
349                                                         if(dst.y) pDst.y = r.r[dst.index].y;
350                                                         if(dst.z) pDst.z = r.r[dst.index].z;
351                                                         if(dst.w) pDst.w = r.r[dst.index].w;
352                                                 }
353                                                 else
354                                                 {
355                                                         Int a = relativeAddress(r, dst);
356
357                                                         if(dst.x) pDst.x = r.r[dst.index + a].x;
358                                                         if(dst.y) pDst.y = r.r[dst.index + a].y;
359                                                         if(dst.z) pDst.z = r.r[dst.index + a].z;
360                                                         if(dst.w) pDst.w = r.r[dst.index + a].w;
361                                                 }
362                                                 break;
363                                         case Shader::PARAMETER_COLOROUT:
364                                                 ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
365                                                 if(dst.x) pDst.x = r.oC[dst.index].x;
366                                                 if(dst.y) pDst.y = r.oC[dst.index].y;
367                                                 if(dst.z) pDst.z = r.oC[dst.index].z;
368                                                 if(dst.w) pDst.w = r.oC[dst.index].w;
369                                                 break;
370                                         case Shader::PARAMETER_PREDICATE:
371                                                 if(dst.x) pDst.x = r.p0.x;
372                                                 if(dst.y) pDst.y = r.p0.y;
373                                                 if(dst.z) pDst.z = r.p0.z;
374                                                 if(dst.w) pDst.w = r.p0.w;
375                                                 break;
376                                         case Shader::PARAMETER_DEPTHOUT:
377                                                 pDst.x = r.oDepth;
378                                                 break;
379                                         default:
380                                                 ASSERT(false);
381                                         }
382
383                                         Int4 enable = enableMask(r, instruction);
384
385                                         Int4 xEnable = enable;
386                                         Int4 yEnable = enable;
387                                         Int4 zEnable = enable;
388                                         Int4 wEnable = enable;
389
390                                         if(predicate)
391                                         {
392                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
393
394                                                 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
395                                                 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
396                                                 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
397                                                 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
398
399                                                 if(!instruction->predicateNot)
400                                                 {
401                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
402                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
403                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
404                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
405                                                 }
406                                                 else
407                                                 {
408                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
409                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
410                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
411                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
412                                                 }
413                                         }
414
415                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
416                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
417                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
418                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
419
420                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
421                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
422                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
423                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
424                                 }
425
426                                 switch(dst.type)
427                                 {
428                                 case Shader::PARAMETER_TEMP:
429                                         if(dst.rel.type == Shader::PARAMETER_VOID)
430                                         {
431                                                 if(dst.x) r.r[dst.index].x = d.x;
432                                                 if(dst.y) r.r[dst.index].y = d.y;
433                                                 if(dst.z) r.r[dst.index].z = d.z;
434                                                 if(dst.w) r.r[dst.index].w = d.w;
435                                         }
436                                         else
437                                         {
438                                                 Int a = relativeAddress(r, dst);
439
440                                                 if(dst.x) r.r[dst.index + a].x = d.x;
441                                                 if(dst.y) r.r[dst.index + a].y = d.y;
442                                                 if(dst.z) r.r[dst.index + a].z = d.z;
443                                                 if(dst.w) r.r[dst.index + a].w = d.w;
444                                         }
445                                         break;
446                                 case Shader::PARAMETER_COLOROUT:
447                                         ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
448                                         if(dst.x) { r.oC[dst.index].x = d.x; out[dst.index][0] = true; }
449                                         if(dst.y) { r.oC[dst.index].y = d.y; out[dst.index][1] = true; }
450                                         if(dst.z) { r.oC[dst.index].z = d.z; out[dst.index][2] = true; }
451                                         if(dst.w) { r.oC[dst.index].w = d.w; out[dst.index][3] = true; }
452                                         break;
453                                 case Shader::PARAMETER_PREDICATE:
454                                         if(dst.x) r.p0.x = d.x;
455                                         if(dst.y) r.p0.y = d.y;
456                                         if(dst.z) r.p0.z = d.z;
457                                         if(dst.w) r.p0.w = d.w;
458                                         break;
459                                 case Shader::PARAMETER_DEPTHOUT:
460                                         r.oDepth = d.x;
461                                         break;
462                                 default:
463                                         ASSERT(false);
464                                 }
465                         }
466                 }
467
468                 if(currentLabel != -1)
469                 {
470                         Nucleus::setInsertBlock(returnBlock);
471                 }
472
473                 for(int i = 0; i < 4; i++)
474                 {
475                         if(state.targetFormat[i] != FORMAT_NULL)
476                         {
477                                 if(!out[i][0]) r.oC[i].x = Float4(0.0f);
478                                 if(!out[i][1]) r.oC[i].y = Float4(0.0f);
479                                 if(!out[i][2]) r.oC[i].z = Float4(0.0f);
480                                 if(!out[i][3]) r.oC[i].w = Float4(0.0f);
481                         }
482                 }
483         }
484 \r
485         Bool PixelProgram::alphaTest(PixelRoutine::Registers &rBase, Int cMask[4])\r
486         {\r
487                 Registers& r = *static_cast<Registers*>(&rBase);\r
488 \r
489                 clampColor(r.oC);\r
490 \r
491                 if(!state.alphaTestActive())
492                 {
493                         return true;
494                 }
495
496                 Int aMask;
497
498                 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
499                 {
500                         Short4 alpha = RoundShort4(r.oC[0].w * Float4(0x1000));
501
502                         PixelRoutine::alphaTest(r, aMask, alpha);
503
504                         for(unsigned int q = 0; q < state.multiSample; q++)
505                         {
506                                 cMask[q] &= aMask;
507                         }
508                 }
509                 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
510                 {
511                         alphaToCoverage(r, cMask, r.oC[0].w);
512                 }
513                 else ASSERT(false);
514
515                 Int pass = cMask[0];
516
517                 for(unsigned int q = 1; q < state.multiSample; q++)
518                 {
519                         pass = pass | cMask[q];
520                 }
521
522                 return pass != 0x0;\r
523         }\r
524 \r
525         void PixelProgram::rasterOperation(PixelRoutine::Registers &rBase, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])\r
526         {\r
527                 Registers& r = *static_cast<Registers*>(&rBase);\r
528 \r
529                 for(int index = 0; index < 4; index++)
530                 {
531                         if(!state.colorWriteActive(index))
532                         {
533                                 continue;
534                         }
535
536                         if(!postBlendSRGB && state.writeSRGB)
537                         {
538                                 r.oC[index].x = linearToSRGB(r.oC[index].x);
539                                 r.oC[index].y = linearToSRGB(r.oC[index].y);
540                                 r.oC[index].z = linearToSRGB(r.oC[index].z);
541                         }
542
543                         if(index == 0)
544                         {
545                                 fogBlend(r, r.oC[index], fog);
546                         }
547
548                         switch(state.targetFormat[index])
549                         {
550                         case FORMAT_R5G6B5:
551                         case FORMAT_X8R8G8B8:
552                         case FORMAT_X8B8G8R8:
553                         case FORMAT_A8R8G8B8:
554                         case FORMAT_A8B8G8R8:
555                         case FORMAT_A8:
556                         case FORMAT_G16R16:
557                         case FORMAT_A16B16G16R16:
558                                 for(unsigned int q = 0; q < state.multiSample; q++)
559                                 {
560                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData, colorSliceB[index]));
561                                         Vector4s color;
562
563                                         color.x = convertFixed16(r.oC[index].x, false);
564                                         color.y = convertFixed16(r.oC[index].y, false);
565                                         color.z = convertFixed16(r.oC[index].z, false);
566                                         color.w = convertFixed16(r.oC[index].w, false);
567
568                                         if(state.multiSampleMask & (1 << q))
569                                         {
570                                                 alphaBlend(r, index, buffer, color, x);
571                                                 logicOperation(r, index, buffer, color, x);
572                                                 writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
573                                         }
574                                 }
575                                 break;
576                         case FORMAT_R32F:
577                         case FORMAT_G32R32F:
578                         case FORMAT_A32B32G32R32F:
579                                 for(unsigned int q = 0; q < state.multiSample; q++)
580                                 {
581                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData, colorSliceB[index]));
582                                         Vector4f color = r.oC[index];
583
584                                         if(state.multiSampleMask & (1 << q))
585                                         {
586                                                 alphaBlend(r, index, buffer, color, x);
587                                                 writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
588                                         }
589                                 }
590                                 break;
591                         default:
592                                 ASSERT(false);
593                         }
594                 }
595         }\r
596
597         void PixelProgram::sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
598         {
599                 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
600                 {
601                         sampleTexture(r, c, sampler.index, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);
602                 }
603                 else
604                 {
605                         Int index = As<Int>(Float(fetchRegisterF(r, sampler).x.x));
606
607                         for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
608                         {
609                                 if(shader->usesSampler(i))
610                                 {
611                                         If(index == i)
612                                         {
613                                                 sampleTexture(r, c, i, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);
614                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
615                                         }
616                                 }
617                         }
618                 }
619         }
620
621         void PixelProgram::sampleTexture(Registers &r, Vector4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
622         {
623 #if PERF_PROFILE
624                 Long texTime = Ticks();
625 #endif
626
627                 Pointer<Byte> texture = r.data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
628
629                 if(!project)
630                 {
631                         sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, gradients, lodProvided);
632                 }
633                 else
634                 {
635                         Float4 rq = reciprocal(q);
636
637                         Float4 u_q = u * rq;
638                         Float4 v_q = v * rq;
639                         Float4 w_q = w * rq;
640
641                         sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, gradients, lodProvided);
642                 }
643
644 #if PERF_PROFILE
645                 r.cycles[PERF_TEX] += Ticks() - texTime;
646 #endif
647         }
648
649         void PixelProgram::clampColor(Vector4f oC[4])
650         {
651                 for(int index = 0; index < 4; index++)
652                 {
653                         if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
654                         {
655                                 continue;
656                         }
657
658                         switch(state.targetFormat[index])
659                         {
660                         case FORMAT_NULL:
661                                 break;
662                         case FORMAT_R5G6B5:
663                         case FORMAT_A8R8G8B8:
664                         case FORMAT_A8B8G8R8:
665                         case FORMAT_X8R8G8B8:
666                         case FORMAT_X8B8G8R8:
667                         case FORMAT_A8:
668                         case FORMAT_G16R16:
669                         case FORMAT_A16B16G16R16:
670                                 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
671                                 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
672                                 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
673                                 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
674                                 break;
675                         case FORMAT_R32F:
676                         case FORMAT_G32R32F:
677                         case FORMAT_A32B32G32R32F:
678                                 break;
679                         default:
680                                 ASSERT(false);
681                         }
682                 }
683         }
684
685         Int4 PixelProgram::enableMask(Registers &r, const Shader::Instruction *instruction)
686         {
687                 Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
688
689                 if(!whileTest)
690                 {
691                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
692                         {
693                                 enable &= r.enableBreak;
694                         }
695
696                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
697                         {
698                                 enable &= r.enableContinue;
699                         }
700
701                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
702                         {
703                                 enable &= r.enableLeave;
704                         }
705                 }
706
707                 return enable;
708         }
709
710         Vector4f PixelProgram::fetchRegisterF(Registers &r, const Src &src, int offset)
711         {
712                 Vector4f reg;
713                 int i = src.index + offset;
714
715                 switch(src.type)
716                 {
717                 case Shader::PARAMETER_TEMP:
718                         if(src.rel.type == Shader::PARAMETER_VOID)
719                         {
720                                 reg = r.r[i];
721                         }
722                         else
723                         {
724                                 Int a = relativeAddress(r, src);
725
726                                 reg = r.r[i + a];
727                         }
728                         break;
729                 case Shader::PARAMETER_INPUT:
730                         {
731                                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
732                                 {
733                                         reg = r.v[i];
734                                 }
735                                 else if(src.rel.type == Shader::PARAMETER_LOOP)
736                                 {
737                                         Int aL = r.aL[r.loopDepth];
738
739                                         reg = r.v[i + aL];
740                                 }
741                                 else
742                                 {
743                                         Int a = relativeAddress(r, src);
744
745                                         reg = r.v[i + a];
746                                 }
747                         }
748                         break;
749                 case Shader::PARAMETER_CONST:
750                         reg = readConstant(r, src, offset);
751                         break;
752                 case Shader::PARAMETER_TEXTURE:
753                         reg = r.v[2 + i];
754                         break;
755                 case Shader::PARAMETER_MISCTYPE:
756                         if(src.index == 0) reg = r.vPos;
757                         if(src.index == 1) reg = r.vFace;
758                         break;
759                 case Shader::PARAMETER_SAMPLER:
760                         if(src.rel.type == Shader::PARAMETER_VOID)
761                         {
762                                 reg.x = As<Float4>(Int4(i));
763                         }
764                         else if(src.rel.type == Shader::PARAMETER_TEMP)
765                         {
766                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r.r[src.rel.index].x));
767                         }
768                         return reg;
769                 case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
770                 case Shader::PARAMETER_VOID:        return reg; // Dummy
771                 case Shader::PARAMETER_FLOAT4LITERAL:
772                         reg.x = Float4(src.value[0]);
773                         reg.y = Float4(src.value[1]);
774                         reg.z = Float4(src.value[2]);
775                         reg.w = Float4(src.value[3]);
776                         break;
777                 case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
778                 case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
779                 case Shader::PARAMETER_LOOP:        return reg; // Dummy
780                 case Shader::PARAMETER_COLOROUT:
781                         reg = r.oC[i];
782                         break;
783                 case Shader::PARAMETER_DEPTHOUT:
784                         reg.x = r.oDepth;
785                         break;
786                 default:
787                         ASSERT(false);
788                 }
789
790                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
791                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
792                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
793                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
794
795                 Vector4f mod;
796
797                 switch(src.modifier)
798                 {
799                 case Shader::MODIFIER_NONE:
800                         mod.x = x;
801                         mod.y = y;
802                         mod.z = z;
803                         mod.w = w;
804                         break;
805                 case Shader::MODIFIER_NEGATE:
806                         mod.x = -x;
807                         mod.y = -y;
808                         mod.z = -z;
809                         mod.w = -w;
810                         break;
811                 case Shader::MODIFIER_ABS:
812                         mod.x = Abs(x);
813                         mod.y = Abs(y);
814                         mod.z = Abs(z);
815                         mod.w = Abs(w);
816                         break;
817                 case Shader::MODIFIER_ABS_NEGATE:
818                         mod.x = -Abs(x);
819                         mod.y = -Abs(y);
820                         mod.z = -Abs(z);
821                         mod.w = -Abs(w);
822                         break;
823                 case Shader::MODIFIER_NOT:
824                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
825                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
826                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
827                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
828                         break;
829                 default:
830                         ASSERT(false);
831                 }
832
833                 return mod;
834         }
835
836         Vector4f PixelProgram::readConstant(Registers &r, const Src &src, int offset)
837         {
838                 Vector4f c;
839
840                 int i = src.index + offset;
841
842                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
843                 {
844                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData, ps.c[i]));
845
846                         c.x = c.x.xxxx;
847                         c.y = c.y.yyyy;
848                         c.z = c.z.zzzz;
849                         c.w = c.w.wwww;
850
851                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
852                         {
853                                 for(size_t j = 0; j < shader->getLength(); j++)
854                                 {
855                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
856
857                                         if(instruction.opcode == Shader::OPCODE_DEF)
858                                         {
859                                                 if(instruction.dst.index == i)
860                                                 {
861                                                         c.x = Float4(instruction.src[0].value[0]);
862                                                         c.y = Float4(instruction.src[0].value[1]);
863                                                         c.z = Float4(instruction.src[0].value[2]);
864                                                         c.w = Float4(instruction.src[0].value[3]);
865
866                                                         break;
867                                                 }
868                                         }
869                                 }
870                         }
871                 }
872                 else if(src.rel.type == Shader::PARAMETER_LOOP)
873                 {
874                         Int loopCounter = r.aL[r.loopDepth];
875
876                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData, ps.c[i]) + loopCounter * 16);
877
878                         c.x = c.x.xxxx;
879                         c.y = c.y.yyyy;
880                         c.z = c.z.zzzz;
881                         c.w = c.w.wwww;
882                 }
883                 else
884                 {
885                         Int a = relativeAddress(r, src);
886
887                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData, ps.c[i]) + a * 16);
888
889                         c.x = c.x.xxxx;
890                         c.y = c.y.yyyy;
891                         c.z = c.z.zzzz;
892                         c.w = c.w.wwww;
893                 }
894
895                 return c;
896         }
897
898         Int PixelProgram::relativeAddress(Registers &r, const Shader::Parameter &var)
899         {
900                 ASSERT(var.rel.deterministic);
901
902                 if(var.rel.type == Shader::PARAMETER_TEMP)
903                 {
904                         return As<Int>(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale;
905                 }
906                 else if(var.rel.type == Shader::PARAMETER_INPUT)
907                 {
908                         return As<Int>(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale;
909                 }
910                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
911                 {
912                         return As<Int>(Extract(r.oC[var.rel.index].x, 0)) * var.rel.scale;
913                 }
914                 else if(var.rel.type == Shader::PARAMETER_CONST)
915                 {
916                         RValue<Int4> c = *Pointer<Int4>(r.data + OFFSET(DrawData, ps.c[var.rel.index]));
917
918                         return Extract(c, 0) * var.rel.scale;
919                 }
920                 else ASSERT(false);
921
922                 return 0;
923         }
924
925         Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
926         {
927                 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
928                 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
929
930                 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
931         }
932
933         void PixelProgram::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
934         {
935                 Vector4f row0 = fetchRegisterF(r, src1, 0);
936                 Vector4f row1 = fetchRegisterF(r, src1, 1);
937
938                 dst.x = dot3(src0, row0);
939                 dst.y = dot3(src0, row1);
940         }
941
942         void PixelProgram::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
943         {
944                 Vector4f row0 = fetchRegisterF(r, src1, 0);
945                 Vector4f row1 = fetchRegisterF(r, src1, 1);
946                 Vector4f row2 = fetchRegisterF(r, src1, 2);
947
948                 dst.x = dot3(src0, row0);
949                 dst.y = dot3(src0, row1);
950                 dst.z = dot3(src0, row2);
951         }
952
953         void PixelProgram::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
954         {
955                 Vector4f row0 = fetchRegisterF(r, src1, 0);
956                 Vector4f row1 = fetchRegisterF(r, src1, 1);
957                 Vector4f row2 = fetchRegisterF(r, src1, 2);
958                 Vector4f row3 = fetchRegisterF(r, src1, 3);
959
960                 dst.x = dot3(src0, row0);
961                 dst.y = dot3(src0, row1);
962                 dst.z = dot3(src0, row2);
963                 dst.w = dot3(src0, row3);
964         }
965
966         void PixelProgram::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
967         {
968                 Vector4f row0 = fetchRegisterF(r, src1, 0);
969                 Vector4f row1 = fetchRegisterF(r, src1, 1);
970                 Vector4f row2 = fetchRegisterF(r, src1, 2);
971
972                 dst.x = dot4(src0, row0);
973                 dst.y = dot4(src0, row1);
974                 dst.z = dot4(src0, row2);
975         }
976
977         void PixelProgram::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
978         {
979                 Vector4f row0 = fetchRegisterF(r, src1, 0);
980                 Vector4f row1 = fetchRegisterF(r, src1, 1);
981                 Vector4f row2 = fetchRegisterF(r, src1, 2);
982                 Vector4f row3 = fetchRegisterF(r, src1, 3);
983
984                 dst.x = dot4(src0, row0);
985                 dst.y = dot4(src0, row1);
986                 dst.z = dot4(src0, row2);
987                 dst.w = dot4(src0, row3);
988         }
989
990         void PixelProgram::TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
991         {
992                 Vector4f tmp;
993                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias);
994
995                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
996                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
997                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
998                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
999         }
1000
1001         void PixelProgram::TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project, bool bias)
1002         {
1003                 Vector4f tmp;
1004                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, project, bias, true);
1005
1006                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1007                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1008                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1009                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1010         }
1011
1012         void PixelProgram::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
1013         {
1014                 Vector4f tmp;
1015                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias, false, true);
1016
1017                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1018                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1019                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1020                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1021         }
1022
1023         void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
1024         {
1025                 Int kill = -1;
1026
1027                 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
1028                 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
1029                 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
1030                 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
1031
1032                 // FIXME: Dynamic branching affects TEXKILL?
1033                 //      if(shader->containsDynamicBranching())
1034                 //      {
1035                 //              kill = ~SignMask(enableMask(r));
1036                 //      }
1037
1038                 for(unsigned int q = 0; q < state.multiSample; q++)
1039                 {
1040                         cMask[q] &= kill;
1041                 }
1042
1043                 // FIXME: Branch to end of shader if all killed?
1044         }
1045
1046         void PixelProgram::DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction)
1047         {
1048                 Int kill = 0;
1049
1050                 if(shader->containsDynamicBranching())
1051                 {
1052                         kill = ~SignMask(enableMask(r, instruction));
1053                 }
1054
1055                 for(unsigned int q = 0; q < state.multiSample; q++)
1056                 {
1057                         cMask[q] &= kill;
1058                 }
1059
1060                 // FIXME: Branch to end of shader if all killed?
1061         }
1062
1063         void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
1064         {
1065                 dst.x = src.x.yyww - src.x.xxzz;
1066                 dst.y = src.y.yyww - src.y.xxzz;
1067                 dst.z = src.z.yyww - src.z.xxzz;
1068                 dst.w = src.w.yyww - src.w.xxzz;
1069         }
1070
1071         void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
1072         {
1073                 dst.x = src.x.zwzw - src.x.xyxy;
1074                 dst.y = src.y.zwzw - src.y.xyxy;
1075                 dst.z = src.z.zwzw - src.z.xyxy;
1076                 dst.w = src.w.zwzw - src.w.xyxy;
1077         }
1078
1079         void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
1080         {
1081                 // abs(dFdx(src)) + abs(dFdy(src));
1082                 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
1083                 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
1084                 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
1085                 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
1086         }
1087
1088         void PixelProgram::BREAK(Registers &r)
1089         {
1090                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
1091                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1092
1093                 if(breakDepth == 0)
1094                 {
1095                         r.enableIndex = r.enableIndex - breakDepth;
1096                         Nucleus::createBr(endBlock);
1097                 }
1098                 else
1099                 {
1100                         r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
1101                         Bool allBreak = SignMask(r.enableBreak) == 0x0;
1102
1103                         r.enableIndex = r.enableIndex - breakDepth;
1104                         branch(allBreak, endBlock, deadBlock);
1105                 }
1106
1107                 Nucleus::setInsertBlock(deadBlock);
1108                 r.enableIndex = r.enableIndex + breakDepth;
1109         }
1110
1111         void PixelProgram::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
1112         {
1113                 Int4 condition;
1114
1115                 switch(control)
1116                 {
1117                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1118                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1119                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1120                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1121                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1122                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1123                 default:
1124                         ASSERT(false);
1125                 }
1126
1127                 BREAK(r, condition);
1128         }
1129
1130         void PixelProgram::BREAKP(Registers &r, const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1131         {
1132                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1133
1134                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1135                 {
1136                         condition = ~condition;
1137                 }
1138
1139                 BREAK(r, condition);
1140         }
1141
1142         void PixelProgram::BREAK(Registers &r, Int4 &condition)
1143         {
1144                 condition &= r.enableStack[r.enableIndex];
1145
1146                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
1147                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1148
1149                 r.enableBreak = r.enableBreak & ~condition;
1150                 Bool allBreak = SignMask(r.enableBreak) == 0x0;
1151
1152                 r.enableIndex = r.enableIndex - breakDepth;
1153                 branch(allBreak, endBlock, continueBlock);
1154
1155                 Nucleus::setInsertBlock(continueBlock);
1156                 r.enableIndex = r.enableIndex + breakDepth;
1157         }
1158
1159         void PixelProgram::CONTINUE(Registers &r)
1160         {
1161                 r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
1162         }
1163
1164         void PixelProgram::TEST()
1165         {
1166                 whileTest = true;
1167         }
1168
1169         void PixelProgram::CALL(Registers &r, int labelIndex, int callSiteIndex)
1170         {
1171                 if(!labelBlock[labelIndex])
1172                 {
1173                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1174                 }
1175
1176                 if(callRetBlock[labelIndex].size() > 1)
1177                 {
1178                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1179                 }
1180
1181                 Int4 restoreLeave = r.enableLeave;
1182
1183                 Nucleus::createBr(labelBlock[labelIndex]);
1184                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1185
1186                 r.enableLeave = restoreLeave;
1187         }
1188
1189         void PixelProgram::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
1190         {
1191                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1192                 {
1193                         CALLNZb(r, labelIndex, callSiteIndex, src);
1194                 }
1195                 else if(src.type == Shader::PARAMETER_PREDICATE)
1196                 {
1197                         CALLNZp(r, labelIndex, callSiteIndex, src);
1198                 }
1199                 else ASSERT(false);
1200         }
1201
1202         void PixelProgram::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
1203         {
1204                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1205
1206                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1207                 {
1208                         condition = !condition;
1209                 }
1210
1211                 if(!labelBlock[labelIndex])
1212                 {
1213                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1214                 }
1215
1216                 if(callRetBlock[labelIndex].size() > 1)
1217                 {
1218                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1219                 }
1220
1221                 Int4 restoreLeave = r.enableLeave;
1222
1223                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1224                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1225
1226                 r.enableLeave = restoreLeave;
1227         }
1228
1229         void PixelProgram::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
1230         {
1231                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1232
1233                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1234                 {
1235                         condition = ~condition;
1236                 }
1237
1238                 condition &= r.enableStack[r.enableIndex];
1239
1240                 if(!labelBlock[labelIndex])
1241                 {
1242                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1243                 }
1244
1245                 if(callRetBlock[labelIndex].size() > 1)
1246                 {
1247                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1248                 }
1249
1250                 r.enableIndex++;
1251                 r.enableStack[r.enableIndex] = condition;
1252                 Int4 restoreLeave = r.enableLeave;
1253
1254                 Bool notAllFalse = SignMask(condition) != 0;
1255                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1256                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1257
1258                 r.enableIndex--;
1259                 r.enableLeave = restoreLeave;
1260         }
1261
1262         void PixelProgram::ELSE(Registers &r)
1263         {
1264                 ifDepth--;
1265
1266                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1267                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1268
1269                 if(isConditionalIf[ifDepth])
1270                 {
1271                         Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1272                         Bool notAllFalse = SignMask(condition) != 0;
1273
1274                         branch(notAllFalse, falseBlock, endBlock);
1275
1276                         r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1277                 }
1278                 else
1279                 {
1280                         Nucleus::createBr(endBlock);
1281                         Nucleus::setInsertBlock(falseBlock);
1282                 }
1283
1284                 ifFalseBlock[ifDepth] = endBlock;
1285
1286                 ifDepth++;
1287         }
1288
1289         void PixelProgram::ENDIF(Registers &r)
1290         {
1291                 ifDepth--;
1292
1293                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1294
1295                 Nucleus::createBr(endBlock);
1296                 Nucleus::setInsertBlock(endBlock);
1297
1298                 if(isConditionalIf[ifDepth])
1299                 {
1300                         breakDepth--;
1301                         r.enableIndex--;
1302                 }
1303         }
1304
1305         void PixelProgram::ENDLOOP(Registers &r)
1306         {
1307                 loopRepDepth--;
1308
1309                 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth];   // FIXME: +=
1310
1311                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1312                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1313
1314                 Nucleus::createBr(testBlock);
1315                 Nucleus::setInsertBlock(endBlock);
1316
1317                 r.loopDepth--;
1318                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1319         }
1320
1321         void PixelProgram::ENDREP(Registers &r)
1322         {
1323                 loopRepDepth--;
1324
1325                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1326                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1327
1328                 Nucleus::createBr(testBlock);
1329                 Nucleus::setInsertBlock(endBlock);
1330
1331                 r.loopDepth--;
1332                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1333         }
1334
1335         void PixelProgram::ENDWHILE(Registers &r)
1336         {
1337                 loopRepDepth--;
1338
1339                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1340                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1341
1342                 Nucleus::createBr(testBlock);
1343                 Nucleus::setInsertBlock(endBlock);
1344
1345                 r.enableIndex--;
1346                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1347                 whileTest = false;
1348         }
1349
1350         void PixelProgram::IF(Registers &r, const Src &src)
1351         {
1352                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1353                 {
1354                         IFb(r, src);
1355                 }
1356                 else if(src.type == Shader::PARAMETER_PREDICATE)
1357                 {
1358                         IFp(r, src);
1359                 }
1360                 else
1361                 {
1362                         Int4 condition = As<Int4>(fetchRegisterF(r, src).x);
1363                         IF(r, condition);
1364                 }
1365         }
1366
1367         void PixelProgram::IFb(Registers &r, const Src &boolRegister)
1368         {
1369                 ASSERT(ifDepth < 24 + 4);
1370
1371                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1372
1373                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1374                 {
1375                         condition = !condition;
1376                 }
1377
1378                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1379                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1380
1381                 branch(condition, trueBlock, falseBlock);
1382
1383                 isConditionalIf[ifDepth] = false;
1384                 ifFalseBlock[ifDepth] = falseBlock;
1385
1386                 ifDepth++;
1387         }
1388
1389         void PixelProgram::IFp(Registers &r, const Src &predicateRegister)
1390         {
1391                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1392
1393                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1394                 {
1395                         condition = ~condition;
1396                 }
1397
1398                 IF(r, condition);
1399         }
1400
1401         void PixelProgram::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
1402         {
1403                 Int4 condition;
1404
1405                 switch(control)
1406                 {
1407                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1408                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1409                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1410                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1411                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1412                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1413                 default:
1414                         ASSERT(false);
1415                 }
1416
1417                 IF(r, condition);
1418         }
1419
1420         void PixelProgram::IF(Registers &r, Int4 &condition)
1421         {
1422                 condition &= r.enableStack[r.enableIndex];
1423
1424                 r.enableIndex++;
1425                 r.enableStack[r.enableIndex] = condition;
1426
1427                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1428                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1429
1430                 Bool notAllFalse = SignMask(condition) != 0;
1431
1432                 branch(notAllFalse, trueBlock, falseBlock);
1433
1434                 isConditionalIf[ifDepth] = true;
1435                 ifFalseBlock[ifDepth] = falseBlock;
1436
1437                 ifDepth++;
1438                 breakDepth++;
1439         }
1440
1441         void PixelProgram::LABEL(int labelIndex)
1442         {
1443                 if(!labelBlock[labelIndex])
1444                 {
1445                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1446                 }
1447
1448                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1449                 currentLabel = labelIndex;
1450         }
1451
1452         void PixelProgram::LOOP(Registers &r, const Src &integerRegister)
1453         {
1454                 r.loopDepth++;
1455
1456                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1457                 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
1458                 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
1459
1460                 //      If(r.increment[r.loopDepth] == 0)
1461                 //      {
1462                 //              r.increment[r.loopDepth] = 1;
1463                 //      }
1464
1465                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1466                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1467                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1468
1469                 loopRepTestBlock[loopRepDepth] = testBlock;
1470                 loopRepEndBlock[loopRepDepth] = endBlock;
1471
1472                 // FIXME: jump(testBlock)
1473                 Nucleus::createBr(testBlock);
1474                 Nucleus::setInsertBlock(testBlock);
1475
1476                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1477                 Nucleus::setInsertBlock(loopBlock);
1478
1479                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1480
1481                 loopRepDepth++;
1482                 breakDepth = 0;
1483         }
1484
1485         void PixelProgram::REP(Registers &r, const Src &integerRegister)
1486         {
1487                 r.loopDepth++;
1488
1489                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1490                 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
1491
1492                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1493                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1494                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1495
1496                 loopRepTestBlock[loopRepDepth] = testBlock;
1497                 loopRepEndBlock[loopRepDepth] = endBlock;
1498
1499                 // FIXME: jump(testBlock)
1500                 Nucleus::createBr(testBlock);
1501                 Nucleus::setInsertBlock(testBlock);
1502
1503                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1504                 Nucleus::setInsertBlock(loopBlock);
1505
1506                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1507
1508                 loopRepDepth++;
1509                 breakDepth = 0;
1510         }
1511
1512         void PixelProgram::WHILE(Registers &r, const Src &temporaryRegister)
1513         {
1514                 r.enableIndex++;
1515
1516                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1517                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1518                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1519
1520                 loopRepTestBlock[loopRepDepth] = testBlock;
1521                 loopRepEndBlock[loopRepDepth] = endBlock;
1522
1523                 Int4 restoreBreak = r.enableBreak;
1524                 Int4 restoreContinue = r.enableContinue;
1525
1526                 // FIXME: jump(testBlock)
1527                 Nucleus::createBr(testBlock);
1528                 Nucleus::setInsertBlock(testBlock);
1529                 r.enableContinue = restoreContinue;
1530
1531                 const Vector4f &src = fetchRegisterF(r, temporaryRegister);
1532                 Int4 condition = As<Int4>(src.x);
1533                 condition &= r.enableStack[r.enableIndex - 1];
1534                 r.enableStack[r.enableIndex] = condition;
1535
1536                 Bool notAllFalse = SignMask(condition) != 0;
1537                 branch(notAllFalse, loopBlock, endBlock);
1538
1539                 Nucleus::setInsertBlock(endBlock);
1540                 r.enableBreak = restoreBreak;
1541
1542                 Nucleus::setInsertBlock(loopBlock);
1543
1544                 loopRepDepth++;
1545                 breakDepth = 0;
1546         }
1547
1548         void PixelProgram::RET(Registers &r)
1549         {
1550                 if(currentLabel == -1)
1551                 {
1552                         returnBlock = Nucleus::createBasicBlock();
1553                         Nucleus::createBr(returnBlock);
1554                 }
1555                 else
1556                 {
1557                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1558
1559                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1560                         {
1561                                 // FIXME: Encapsulate
1562                                 UInt index = r.callStack[--r.stackIndex];
1563
1564                                 llvm::Value *value = index.loadValue();
1565                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1566
1567                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1568                                 {
1569                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1570                                 }
1571                         }
1572                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1573                         {
1574                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1575                         }
1576                         else   // Function isn't called
1577                         {
1578                                 Nucleus::createBr(unreachableBlock);
1579                         }
1580
1581                         Nucleus::setInsertBlock(unreachableBlock);
1582                         Nucleus::createUnreachable();
1583                 }
1584         }
1585
1586         void PixelProgram::LEAVE(Registers &r)
1587         {
1588                 r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
1589
1590                 // FIXME: Return from function if all instances left
1591                 // FIXME: Use enableLeave in other control-flow constructs
1592         }
1593 \r
1594 }\r