OSDN Git Service

Signed and unsigned integer packing and unpacking intrinsic functions
[android-x86/external-swiftshader.git] / src / Shader / PixelProgram.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2015 Google Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of Google Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "PixelProgram.hpp"
13 #include "Primitive.hpp"
14 #include "Renderer.hpp"
15 #include "SamplerCore.hpp"
16
17 namespace sw
18 {
19         extern bool postBlendSRGB;
20         extern bool booleanFaceRegister;
21         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
22         extern bool fullPixelPositionRegister;
23
24         void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
25         {
26                 if(shader->getVersion() >= 0x0300)
27                 {
28                         if(shader->vPosDeclared)
29                         {
30                                 if(!halfIntegerCoordinates)
31                                 {
32                                         vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
33                                         vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
34                                 }
35                                 else
36                                 {
37                                         vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
38                                         vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
39                                 }
40
41                                 if(fullPixelPositionRegister)
42                                 {
43                                         vPos.z = z[0]; // FIXME: Centroid?
44                                         vPos.w = w;    // FIXME: Centroid?
45                                 }
46                         }
47
48                         if(shader->vFaceDeclared)
49                         {
50                                 Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area));
51                                 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
52
53                                 vFace.x = face;
54                                 vFace.y = face;
55                                 vFace.z = face;
56                                 vFace.w = face;
57                         }
58                 }
59         }
60
61         void PixelProgram::applyShader(Int cMask[4])
62         {
63                 enableIndex = 0;
64                 stackIndex = 0;
65
66                 if(shader->containsLeaveInstruction())
67                 {
68                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
69                 }
70
71                 for(int i = 0; i < RENDERTARGETS; i++)
72                 {
73                         if(state.targetFormat[i] != FORMAT_NULL)
74                         {
75                                 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f);
76                         }
77                 }
78
79                 // Create all call site return blocks up front
80                 for(size_t i = 0; i < shader->getLength(); i++)
81                 {
82                         const Shader::Instruction *instruction = shader->getInstruction(i);
83                         Shader::Opcode opcode = instruction->opcode;
84
85                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
86                         {
87                                 const Dst &dst = instruction->dst;
88
89                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
90                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
91                         }
92                 }
93
94                 for(size_t i = 0; i < shader->getLength(); i++)
95                 {
96                         const Shader::Instruction *instruction = shader->getInstruction(i);
97                         Shader::Opcode opcode = instruction->opcode;
98
99                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
100                         {
101                                 continue;
102                         }
103
104                         const Dst &dst = instruction->dst;
105                         const Src &src0 = instruction->src[0];
106                         const Src &src1 = instruction->src[1];
107                         const Src &src2 = instruction->src[2];
108                         const Src &src3 = instruction->src[3];
109                         const Src &src4 = instruction->src[4];
110
111                         bool predicate = instruction->predicate;
112                         Control control = instruction->control;
113                         bool pp = dst.partialPrecision;
114                         bool project = instruction->project;
115                         bool bias = instruction->bias;
116
117                         Vector4f d;
118                         Vector4f s0;
119                         Vector4f s1;
120                         Vector4f s2;
121                         Vector4f s3;
122                         Vector4f s4;
123
124                         if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
125                         {
126                                 if(dst.type == Shader::PARAMETER_TEXTURE)
127                                 {
128                                         d.x = v[2 + dst.index].x;
129                                         d.y = v[2 + dst.index].y;
130                                         d.z = v[2 + dst.index].z;
131                                         d.w = v[2 + dst.index].w;
132                                 }
133                                 else
134                                 {
135                                         d = r[dst.index];
136                                 }
137                         }
138
139                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
140                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
141                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
142                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
143                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
144
145                         switch(opcode)
146                         {
147                         case Shader::OPCODE_PS_2_0:                                                    break;
148                         case Shader::OPCODE_PS_2_x:                                                    break;
149                         case Shader::OPCODE_PS_3_0:                                                    break;
150                         case Shader::OPCODE_DEF:                                                       break;
151                         case Shader::OPCODE_DCL:                                                       break;
152                         case Shader::OPCODE_NOP:                                                       break;
153                         case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
154                         case Shader::OPCODE_NEG:        neg(d, s0);                                    break;
155                         case Shader::OPCODE_INEG:       ineg(d, s0);                                   break;
156                         case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
157                         case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
158                         case Shader::OPCODE_F2I:        f2i(d, s0);                                    break;
159                         case Shader::OPCODE_I2F:        i2f(d, s0);                                    break;
160                         case Shader::OPCODE_F2U:        f2u(d, s0);                                    break;
161                         case Shader::OPCODE_U2F:        u2f(d, s0);                                    break;
162                         case Shader::OPCODE_I2B:        i2b(d, s0);                                    break;
163                         case Shader::OPCODE_B2I:        b2i(d, s0);                                    break;
164                         case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
165                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                               break;
166                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
167                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                               break;
168                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
169                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                               break;
170                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
171                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);                           break;
172                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
173                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
174                         case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
175                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
176                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
177                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                               break;
178                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);                           break;
179                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);                       break;
180                         case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
181                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
182                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);                      break;
183                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
184                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
185                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
186                         case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
187                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
188                         case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
189                         case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
190                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
191                         case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
192                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
193                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
194                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
195                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
196                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
197                         case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
198                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
199                         case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
200                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                               break;
201                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                               break;
202                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
203                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                               break;
204                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                               break;
205                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                                break;
206                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                               break;
207                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                               break;
208                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
209                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
210                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
211                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
212                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
213                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
214                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
215                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
216                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
217                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
218                         case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
219                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                               break;
220                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                               break;
221                         case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
222                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                               break;
223                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                               break;
224                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
225                         case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
226                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
227                         case Shader::OPCODE_FLOATBITSTOINT:
228                         case Shader::OPCODE_FLOATBITSTOUINT:
229                         case Shader::OPCODE_INTBITSTOFLOAT:
230                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                                   break;
231                         case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);                     break;
232                         case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);                     break;
233                         case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);                   break;
234                         case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);                   break;
235                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
236                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
237                         case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
238                         case Shader::OPCODE_ISGN:       isgn(d, s0);                                   break;
239                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
240                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
241                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
242                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
243                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
244                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
245                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
246                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
247                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
248                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
249                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
250                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
251                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
252                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
253                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
254                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
255                         case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
256                         case Shader::OPCODE_IABS:       iabs(d, s0);                                   break;
257                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
258                         case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
259                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
260                         case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
261                         case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
262                         case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
263                         case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
264                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
265                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
266                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
267                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
268                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
269                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
270                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
271                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);                             break;
272                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);                             break;
273                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);                             break;
274                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);                             break;
275                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);                             break;
276                         case Shader::OPCODE_TEX:        TEXLD(d, s0, src1, project, bias);             break;
277                         case Shader::OPCODE_TEXLDD:     TEXLDD(d, s0, src1, s2, s3, project);          break;
278                         case Shader::OPCODE_TEXLDL:     TEXLDL(d, s0, src1, project);                  break;
279                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);                        break;
280                         case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
281                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2, s3, project, bias); break;
282                         case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2, project, bias);      break;
283                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2);                   break;
284                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2, s3);         break;
285                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);                  break;
286                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4);           break;
287                         case Shader::OPCODE_DISCARD:    DISCARD(cMask, instruction);                   break;
288                         case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
289                         case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
290                         case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
291                         case Shader::OPCODE_BREAK:      BREAK();                                       break;
292                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);                       break;
293                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                                  break;
294                         case Shader::OPCODE_CONTINUE:   CONTINUE();                                    break;
295                         case Shader::OPCODE_TEST:       TEST();                                        break;
296                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);                 break;
297                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0);         break;
298                         case Shader::OPCODE_ELSE:       ELSE();                                        break;
299                         case Shader::OPCODE_ENDIF:      ENDIF();                                       break;
300                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                                     break;
301                         case Shader::OPCODE_ENDREP:     ENDREP();                                      break;
302                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                                    break;
303                         case Shader::OPCODE_IF:         IF(src0);                                      break;
304                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);                          break;
305                         case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
306                         case Shader::OPCODE_LOOP:       LOOP(src1);                                    break;
307                         case Shader::OPCODE_REP:        REP(src0);                                     break;
308                         case Shader::OPCODE_WHILE:      WHILE(src0);                                   break;
309                         case Shader::OPCODE_RET:        RET();                                         break;
310                         case Shader::OPCODE_LEAVE:      LEAVE();                                       break;
311                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
312                         case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
313                         case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
314                         case Shader::OPCODE_NOT:        not(d, s0);                                    break;
315                         case Shader::OPCODE_OR:         or(d, s0, s1);                                 break;
316                         case Shader::OPCODE_XOR:        xor(d, s0, s1);                                break;
317                         case Shader::OPCODE_AND:        and(d, s0, s1);                                break;
318                         case Shader::OPCODE_EQ:         equal(d, s0, s1);                              break;
319                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);                           break;
320                         case Shader::OPCODE_END:                                                       break;
321                         default:
322                                 ASSERT(false);
323                         }
324
325                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
326                         {
327                                 if(dst.integer)
328                                 {
329                                         switch(opcode)
330                                         {
331                                         case Shader::OPCODE_DIV:
332                                                 if(dst.x) d.x = Trunc(d.x);
333                                                 if(dst.y) d.y = Trunc(d.y);
334                                                 if(dst.z) d.z = Trunc(d.z);
335                                                 if(dst.w) d.w = Trunc(d.w);
336                                                 break;
337                                         default:
338                                                 break;   // No truncation to integer required when arguments are integer
339                                         }
340                                 }
341
342                                 if(dst.saturate)
343                                 {
344                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
345                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
346                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
347                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
348
349                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
350                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
351                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
352                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
353                                 }
354
355                                 if(instruction->isPredicated())
356                                 {
357                                         Vector4f pDst;   // FIXME: Rename
358
359                                         switch(dst.type)
360                                         {
361                                         case Shader::PARAMETER_TEMP:
362                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
363                                                 {
364                                                         if(dst.x) pDst.x = r[dst.index].x;
365                                                         if(dst.y) pDst.y = r[dst.index].y;
366                                                         if(dst.z) pDst.z = r[dst.index].z;
367                                                         if(dst.w) pDst.w = r[dst.index].w;
368                                                 }
369                                                 else
370                                                 {
371                                                         Int a = relativeAddress(dst);
372
373                                                         if(dst.x) pDst.x = r[dst.index + a].x;
374                                                         if(dst.y) pDst.y = r[dst.index + a].y;
375                                                         if(dst.z) pDst.z = r[dst.index + a].z;
376                                                         if(dst.w) pDst.w = r[dst.index + a].w;
377                                                 }
378                                                 break;
379                                         case Shader::PARAMETER_COLOROUT:
380                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
381                                                 {
382                                                         if(dst.x) pDst.x = oC[dst.index].x;
383                                                         if(dst.y) pDst.y = oC[dst.index].y;
384                                                         if(dst.z) pDst.z = oC[dst.index].z;
385                                                         if(dst.w) pDst.w = oC[dst.index].w;
386                                                 }
387                                                 else
388                                                 {
389                                                         Int a = relativeAddress(dst) + dst.index;
390
391                                                         if(dst.x) pDst.x = oC[a].x;
392                                                         if(dst.y) pDst.y = oC[a].y;
393                                                         if(dst.z) pDst.z = oC[a].z;
394                                                         if(dst.w) pDst.w = oC[a].w;
395                                                 }
396                                                 break;
397                                         case Shader::PARAMETER_PREDICATE:
398                                                 if(dst.x) pDst.x = p0.x;
399                                                 if(dst.y) pDst.y = p0.y;
400                                                 if(dst.z) pDst.z = p0.z;
401                                                 if(dst.w) pDst.w = p0.w;
402                                                 break;
403                                         case Shader::PARAMETER_DEPTHOUT:
404                                                 pDst.x = oDepth;
405                                                 break;
406                                         default:
407                                                 ASSERT(false);
408                                         }
409
410                                         Int4 enable = enableMask(instruction);
411
412                                         Int4 xEnable = enable;
413                                         Int4 yEnable = enable;
414                                         Int4 zEnable = enable;
415                                         Int4 wEnable = enable;
416
417                                         if(predicate)
418                                         {
419                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
420
421                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
422                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
423                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
424                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
425
426                                                 if(!instruction->predicateNot)
427                                                 {
428                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
429                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
430                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
431                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
432                                                 }
433                                                 else
434                                                 {
435                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
436                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
437                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
438                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
439                                                 }
440                                         }
441
442                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
443                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
444                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
445                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
446
447                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
448                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
449                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
450                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
451                                 }
452
453                                 switch(dst.type)
454                                 {
455                                 case Shader::PARAMETER_TEMP:
456                                         if(dst.rel.type == Shader::PARAMETER_VOID)
457                                         {
458                                                 if(dst.x) r[dst.index].x = d.x;
459                                                 if(dst.y) r[dst.index].y = d.y;
460                                                 if(dst.z) r[dst.index].z = d.z;
461                                                 if(dst.w) r[dst.index].w = d.w;
462                                         }
463                                         else
464                                         {
465                                                 Int a = relativeAddress(dst);
466
467                                                 if(dst.x) r[dst.index + a].x = d.x;
468                                                 if(dst.y) r[dst.index + a].y = d.y;
469                                                 if(dst.z) r[dst.index + a].z = d.z;
470                                                 if(dst.w) r[dst.index + a].w = d.w;
471                                         }
472                                         break;
473                                 case Shader::PARAMETER_COLOROUT:
474                                         if(dst.rel.type == Shader::PARAMETER_VOID)
475                                         {
476                                                 if(dst.x) { oC[dst.index].x = d.x; }
477                                                 if(dst.y) { oC[dst.index].y = d.y; }
478                                                 if(dst.z) { oC[dst.index].z = d.z; }
479                                                 if(dst.w) { oC[dst.index].w = d.w; }
480                                         }
481                                         else
482                                         {
483                                                 Int a = relativeAddress(dst) + dst.index;
484
485                                                 if(dst.x) { oC[a].x = d.x; }
486                                                 if(dst.y) { oC[a].y = d.y; }
487                                                 if(dst.z) { oC[a].z = d.z; }
488                                                 if(dst.w) { oC[a].w = d.w; }
489                                         }
490                                         break;
491                                 case Shader::PARAMETER_PREDICATE:
492                                         if(dst.x) p0.x = d.x;
493                                         if(dst.y) p0.y = d.y;
494                                         if(dst.z) p0.z = d.z;
495                                         if(dst.w) p0.w = d.w;
496                                         break;
497                                 case Shader::PARAMETER_DEPTHOUT:
498                                         oDepth = d.x;
499                                         break;
500                                 default:
501                                         ASSERT(false);
502                                 }
503                         }
504                 }
505
506                 if(currentLabel != -1)
507                 {
508                         Nucleus::setInsertBlock(returnBlock);
509                 }
510
511                 for(int i = 0; i < RENDERTARGETS; i++)
512                 {
513                         c[i] = oC[i];
514                 }
515         }
516
517         Bool PixelProgram::alphaTest(Int cMask[4])
518         {
519                 clampColor(c);
520
521                 if(!state.alphaTestActive())
522                 {
523                         return true;
524                 }
525
526                 Int aMask;
527
528                 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
529                 {
530                         Short4 alpha = RoundShort4(c[0].w * Float4(0x1000));
531
532                         PixelRoutine::alphaTest(aMask, alpha);
533
534                         for(unsigned int q = 0; q < state.multiSample; q++)
535                         {
536                                 cMask[q] &= aMask;
537                         }
538                 }
539                 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
540                 {
541                         alphaToCoverage(cMask, c[0].w);
542                 }
543                 else ASSERT(false);
544
545                 Int pass = cMask[0];
546
547                 for(unsigned int q = 1; q < state.multiSample; q++)
548                 {
549                         pass = pass | cMask[q];
550                 }
551
552                 return pass != 0x0;
553         }
554
555         void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
556         {
557                 for(int index = 0; index < RENDERTARGETS; index++)
558                 {
559                         if(!state.colorWriteActive(index))
560                         {
561                                 continue;
562                         }
563
564                         if(!postBlendSRGB && state.writeSRGB)
565                         {
566                                 c[index].x = linearToSRGB(c[index].x);
567                                 c[index].y = linearToSRGB(c[index].y);
568                                 c[index].z = linearToSRGB(c[index].z);
569                         }
570
571                         if(index == 0)
572                         {
573                                 fogBlend(c[index], fog);
574                         }
575
576                         switch(state.targetFormat[index])
577                         {
578                         case FORMAT_R5G6B5:
579                         case FORMAT_X8R8G8B8:
580                         case FORMAT_X8B8G8R8:
581                         case FORMAT_A8R8G8B8:
582                         case FORMAT_A8B8G8R8:
583                         case FORMAT_A8:
584                         case FORMAT_G16R16:
585                         case FORMAT_A16B16G16R16:
586                                 for(unsigned int q = 0; q < state.multiSample; q++)
587                                 {
588                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
589                                         Vector4s color;
590
591                                         if(state.targetFormat[index] == FORMAT_R5G6B5)
592                                         {
593                                                 color.x = UShort4(c[index].x * Float4(0xFBFF), false);
594                                                 color.y = UShort4(c[index].y * Float4(0xFDFF), false);
595                                                 color.z = UShort4(c[index].z * Float4(0xFBFF), false);
596                                                 color.w = UShort4(c[index].w * Float4(0xFFFF), false);
597                                         }
598                                         else
599                                         {
600                                                 color.x = convertFixed16(c[index].x, false);
601                                                 color.y = convertFixed16(c[index].y, false);
602                                                 color.z = convertFixed16(c[index].z, false);
603                                                 color.w = convertFixed16(c[index].w, false);
604                                         }
605
606                                         if(state.multiSampleMask & (1 << q))
607                                         {
608                                                 alphaBlend(index, buffer, color, x);
609                                                 logicOperation(index, buffer, color, x);
610                                                 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
611                                         }
612                                 }
613                                 break;
614                         case FORMAT_R32F:
615                         case FORMAT_G32R32F:
616                         case FORMAT_A32B32G32R32F:
617                         case FORMAT_R32I:
618                         case FORMAT_G32R32I:
619                         case FORMAT_A32B32G32R32I:
620                         case FORMAT_R32UI:
621                         case FORMAT_G32R32UI:
622                         case FORMAT_A32B32G32R32UI:
623                                 for(unsigned int q = 0; q < state.multiSample; q++)
624                                 {
625                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
626                                         Vector4f color = c[index];
627
628                                         if(state.multiSampleMask & (1 << q))
629                                         {
630                                                 alphaBlend(index, buffer, color, x);
631                                                 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
632                                         }
633                                 }
634                                 break;
635                         default:
636                                 ASSERT(false);
637                         }
638                 }
639         }
640
641         void PixelProgram::sampleTexture(Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, SamplerMethod method)
642         {
643                 Vector4f tmp;
644
645                 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
646                 {
647                         sampleTexture(tmp, sampler.index, u, v, w, q, dsx, dsy, project, method);
648                 }
649                 else
650                 {
651                         Int index = As<Int>(Float(fetchRegister(sampler).x.x));
652
653                         for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
654                         {
655                                 if(shader->usesSampler(i))
656                                 {
657                                         If(index == i)
658                                         {
659                                                 sampleTexture(tmp, i, u, v, w, q, dsx, dsy, project, method);
660                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
661                                         }
662                                 }
663                         }
664                 }
665
666                 c.x = tmp[(sampler.swizzle >> 0) & 0x3];
667                 c.y = tmp[(sampler.swizzle >> 2) & 0x3];
668                 c.z = tmp[(sampler.swizzle >> 4) & 0x3];
669                 c.w = tmp[(sampler.swizzle >> 6) & 0x3];
670         }
671
672         void PixelProgram::sampleTexture(Vector4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, SamplerMethod method)
673         {
674                 #if PERF_PROFILE
675                         Long texTime = Ticks();
676                 #endif
677
678                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
679
680                 if(!project)
681                 {
682                         sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, method);
683                 }
684                 else
685                 {
686                         Float4 rq = reciprocal(q);
687
688                         Float4 u_q = u * rq;
689                         Float4 v_q = v * rq;
690                         Float4 w_q = w * rq;
691
692                         sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, method);
693                 }
694
695                 #if PERF_PROFILE
696                         cycles[PERF_TEX] += Ticks() - texTime;
697                 #endif
698         }
699
700         void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
701         {
702                 for(int index = 0; index < RENDERTARGETS; index++)
703                 {
704                         if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
705                         {
706                                 continue;
707                         }
708
709                         switch(state.targetFormat[index])
710                         {
711                         case FORMAT_NULL:
712                                 break;
713                         case FORMAT_R5G6B5:
714                         case FORMAT_A8R8G8B8:
715                         case FORMAT_A8B8G8R8:
716                         case FORMAT_X8R8G8B8:
717                         case FORMAT_X8B8G8R8:
718                         case FORMAT_A8:
719                         case FORMAT_G16R16:
720                         case FORMAT_A16B16G16R16:
721                                 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
722                                 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
723                                 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
724                                 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
725                                 break;
726                         case FORMAT_R32F:
727                         case FORMAT_G32R32F:
728                         case FORMAT_A32B32G32R32F:
729                         case FORMAT_R32I:
730                         case FORMAT_G32R32I:
731                         case FORMAT_A32B32G32R32I:
732                         case FORMAT_R32UI:
733                         case FORMAT_G32R32UI:
734                         case FORMAT_A32B32G32R32UI:
735                                 break;
736                         default:
737                                 ASSERT(false);
738                         }
739                 }
740         }
741
742         Int4 PixelProgram::enableMask(const Shader::Instruction *instruction)
743         {
744                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
745
746                 if(!whileTest)
747                 {
748                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
749                         {
750                                 enable &= enableBreak;
751                         }
752
753                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
754                         {
755                                 enable &= enableContinue;
756                         }
757
758                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
759                         {
760                                 enable &= enableLeave;
761                         }
762                 }
763
764                 return enable;
765         }
766
767         Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset)
768         {
769                 Vector4f reg;
770                 unsigned int i = src.index + offset;
771
772                 switch(src.type)
773                 {
774                 case Shader::PARAMETER_TEMP:
775                         if(src.rel.type == Shader::PARAMETER_VOID)
776                         {
777                                 reg = r[i];
778                         }
779                         else
780                         {
781                                 Int a = relativeAddress(src, src.bufferIndex);
782
783                                 reg = r[i + a];
784                         }
785                         break;
786                 case Shader::PARAMETER_INPUT:
787                         {
788                                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
789                                 {
790                                         reg = v[i];
791                                 }
792                                 else
793                                 {
794                                         Int a = relativeAddress(src, src.bufferIndex);
795
796                                         reg = v[i + a];
797                                 }
798                         }
799                         break;
800                 case Shader::PARAMETER_CONST:
801                         reg = readConstant(src, offset);
802                         break;
803                 case Shader::PARAMETER_TEXTURE:
804                         reg = v[2 + i];
805                         break;
806                 case Shader::PARAMETER_MISCTYPE:
807                         if(src.index == 0) reg = vPos;
808                         if(src.index == 1) reg = vFace;
809                         break;
810                 case Shader::PARAMETER_SAMPLER:
811                         if(src.rel.type == Shader::PARAMETER_VOID)
812                         {
813                                 reg.x = As<Float4>(Int4(i));
814                         }
815                         else if(src.rel.type == Shader::PARAMETER_TEMP)
816                         {
817                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
818                         }
819                         return reg;
820                 case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
821                 case Shader::PARAMETER_VOID:        return reg; // Dummy
822                 case Shader::PARAMETER_FLOAT4LITERAL:
823                         reg.x = Float4(src.value[0]);
824                         reg.y = Float4(src.value[1]);
825                         reg.z = Float4(src.value[2]);
826                         reg.w = Float4(src.value[3]);
827                         break;
828                 case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
829                 case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
830                 case Shader::PARAMETER_LOOP:        return reg; // Dummy
831                 case Shader::PARAMETER_COLOROUT:
832                         if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
833                         {
834                                 reg = oC[i];
835                         }
836                         else
837                         {
838                                 Int a = relativeAddress(src, src.bufferIndex);
839
840                                 reg = oC[i + a];
841                         }
842                         break;
843                 case Shader::PARAMETER_DEPTHOUT:
844                         reg.x = oDepth;
845                         break;
846                 default:
847                         ASSERT(false);
848                 }
849
850                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
851                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
852                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
853                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
854
855                 Vector4f mod;
856
857                 switch(src.modifier)
858                 {
859                 case Shader::MODIFIER_NONE:
860                         mod.x = x;
861                         mod.y = y;
862                         mod.z = z;
863                         mod.w = w;
864                         break;
865                 case Shader::MODIFIER_NEGATE:
866                         mod.x = -x;
867                         mod.y = -y;
868                         mod.z = -z;
869                         mod.w = -w;
870                         break;
871                 case Shader::MODIFIER_ABS:
872                         mod.x = Abs(x);
873                         mod.y = Abs(y);
874                         mod.z = Abs(z);
875                         mod.w = Abs(w);
876                         break;
877                 case Shader::MODIFIER_ABS_NEGATE:
878                         mod.x = -Abs(x);
879                         mod.y = -Abs(y);
880                         mod.z = -Abs(z);
881                         mod.w = -Abs(w);
882                         break;
883                 case Shader::MODIFIER_NOT:
884                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
885                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
886                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
887                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
888                         break;
889                 default:
890                         ASSERT(false);
891                 }
892
893                 return mod;
894         }
895
896         RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index)
897         {
898                 if(bufferIndex == -1)
899                 {
900                         return data + OFFSET(DrawData, ps.c[index]);
901                 }
902                 else
903                 {
904                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index;
905                 }
906         }
907
908         RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
909         {
910                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
911         }
912
913         Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset)
914         {
915                 Vector4f c;
916                 unsigned int i = src.index + offset;
917
918                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
919                 {
920                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
921
922                         c.x = c.x.xxxx;
923                         c.y = c.y.yyyy;
924                         c.z = c.z.zzzz;
925                         c.w = c.w.wwww;
926
927                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
928                         {
929                                 for(size_t j = 0; j < shader->getLength(); j++)
930                                 {
931                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
932
933                                         if(instruction.opcode == Shader::OPCODE_DEF)
934                                         {
935                                                 if(instruction.dst.index == i)
936                                                 {
937                                                         c.x = Float4(instruction.src[0].value[0]);
938                                                         c.y = Float4(instruction.src[0].value[1]);
939                                                         c.z = Float4(instruction.src[0].value[2]);
940                                                         c.w = Float4(instruction.src[0].value[3]);
941
942                                                         break;
943                                                 }
944                                         }
945                                 }
946                         }
947                 }
948                 else if(src.rel.type == Shader::PARAMETER_LOOP)
949                 {
950                         Int loopCounter = aL[loopDepth];
951
952                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
953
954                         c.x = c.x.xxxx;
955                         c.y = c.y.yyyy;
956                         c.z = c.z.zzzz;
957                         c.w = c.w.wwww;
958                 }
959                 else
960                 {
961                         Int a = relativeAddress(src, src.bufferIndex);
962
963                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
964
965                         c.x = c.x.xxxx;
966                         c.y = c.y.yyyy;
967                         c.z = c.z.zzzz;
968                         c.w = c.w.wwww;
969                 }
970
971                 return c;
972         }
973
974         Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
975         {
976                 ASSERT(var.rel.deterministic);
977
978                 if(var.rel.type == Shader::PARAMETER_TEMP)
979                 {
980                         return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
981                 }
982                 else if(var.rel.type == Shader::PARAMETER_INPUT)
983                 {
984                         return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
985                 }
986                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
987                 {
988                         return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale;
989                 }
990                 else if(var.rel.type == Shader::PARAMETER_CONST)
991                 {
992                         return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
993                 }
994                 else if(var.rel.type == Shader::PARAMETER_LOOP)
995                 {
996                         return aL[loopDepth];
997                 }
998                 else ASSERT(false);
999
1000                 return 0;
1001         }
1002
1003         Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
1004         {
1005                 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
1006                 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
1007
1008                 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
1009         }
1010
1011         void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1)
1012         {
1013                 Vector4f row0 = fetchRegister(src1, 0);
1014                 Vector4f row1 = fetchRegister(src1, 1);
1015
1016                 dst.x = dot3(src0, row0);
1017                 dst.y = dot3(src0, row1);
1018         }
1019
1020         void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1021         {
1022                 Vector4f row0 = fetchRegister(src1, 0);
1023                 Vector4f row1 = fetchRegister(src1, 1);
1024                 Vector4f row2 = fetchRegister(src1, 2);
1025
1026                 dst.x = dot3(src0, row0);
1027                 dst.y = dot3(src0, row1);
1028                 dst.z = dot3(src0, row2);
1029         }
1030
1031         void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1032         {
1033                 Vector4f row0 = fetchRegister(src1, 0);
1034                 Vector4f row1 = fetchRegister(src1, 1);
1035                 Vector4f row2 = fetchRegister(src1, 2);
1036                 Vector4f row3 = fetchRegister(src1, 3);
1037
1038                 dst.x = dot3(src0, row0);
1039                 dst.y = dot3(src0, row1);
1040                 dst.z = dot3(src0, row2);
1041                 dst.w = dot3(src0, row3);
1042         }
1043
1044         void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1045         {
1046                 Vector4f row0 = fetchRegister(src1, 0);
1047                 Vector4f row1 = fetchRegister(src1, 1);
1048                 Vector4f row2 = fetchRegister(src1, 2);
1049
1050                 dst.x = dot4(src0, row0);
1051                 dst.y = dot4(src0, row1);
1052                 dst.z = dot4(src0, row2);
1053         }
1054
1055         void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1056         {
1057                 Vector4f row0 = fetchRegister(src1, 0);
1058                 Vector4f row1 = fetchRegister(src1, 1);
1059                 Vector4f row2 = fetchRegister(src1, 2);
1060                 Vector4f row3 = fetchRegister(src1, 3);
1061
1062                 dst.x = dot4(src0, row0);
1063                 dst.y = dot4(src0, row1);
1064                 dst.z = dot4(src0, row2);
1065                 dst.w = dot4(src0, row3);
1066         }
1067
1068         void PixelProgram::TEXLD(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
1069         {
1070                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias ? Bias : Implicit);
1071         }
1072
1073         void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, bool project, bool bias)
1074         {
1075                 UNIMPLEMENTED();
1076         }
1077
1078         void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, bool project, bool bias)
1079         {
1080                 UNIMPLEMENTED();
1081         }
1082
1083         void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
1084         {
1085                 UNIMPLEMENTED();
1086         }
1087
1088         void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &offset)
1089         {
1090                 UNIMPLEMENTED();
1091         }
1092
1093         void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1094         {
1095                 UNIMPLEMENTED();
1096         }
1097
1098         void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
1099         {
1100                 UNIMPLEMENTED();
1101         }
1102
1103         void PixelProgram::TEXLDD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project)
1104         {
1105                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, project, Grad);
1106         }
1107
1108         void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, bool project)
1109         {
1110                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, Lod);
1111         }
1112
1113         void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1114         {
1115                 Pointer<Byte> textureMipmap = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
1116                 for(int i = 0; i < 4; ++i)
1117                 {
1118                         Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap);
1119                         dst.x = Insert(dst.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
1120                         dst.y = Insert(dst.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
1121                         dst.z = Insert(dst.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
1122                 }
1123         }
1124
1125         void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
1126         {
1127                 Int kill = -1;
1128
1129                 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
1130                 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
1131                 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
1132                 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
1133
1134                 // FIXME: Dynamic branching affects TEXKILL?
1135                 //      if(shader->containsDynamicBranching())
1136                 //      {
1137                 //              kill = ~SignMask(enableMask());
1138                 //      }
1139
1140                 for(unsigned int q = 0; q < state.multiSample; q++)
1141                 {
1142                         cMask[q] &= kill;
1143                 }
1144
1145                 // FIXME: Branch to end of shader if all killed?
1146         }
1147
1148         void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction)
1149         {
1150                 Int kill = 0;
1151
1152                 if(shader->containsDynamicBranching())
1153                 {
1154                         kill = ~SignMask(enableMask(instruction));
1155                 }
1156
1157                 for(unsigned int q = 0; q < state.multiSample; q++)
1158                 {
1159                         cMask[q] &= kill;
1160                 }
1161
1162                 // FIXME: Branch to end of shader if all killed?
1163         }
1164
1165         void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
1166         {
1167                 dst.x = src.x.yyww - src.x.xxzz;
1168                 dst.y = src.y.yyww - src.y.xxzz;
1169                 dst.z = src.z.yyww - src.z.xxzz;
1170                 dst.w = src.w.yyww - src.w.xxzz;
1171         }
1172
1173         void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
1174         {
1175                 dst.x = src.x.zwzw - src.x.xyxy;
1176                 dst.y = src.y.zwzw - src.y.xyxy;
1177                 dst.z = src.z.zwzw - src.z.xyxy;
1178                 dst.w = src.w.zwzw - src.w.xyxy;
1179         }
1180
1181         void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
1182         {
1183                 // abs(dFdx(src)) + abs(dFdy(src));
1184                 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
1185                 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
1186                 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
1187                 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
1188         }
1189
1190         void PixelProgram::BREAK()
1191         {
1192                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
1193                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1194
1195                 if(breakDepth == 0)
1196                 {
1197                         enableIndex = enableIndex - breakDepth;
1198                         Nucleus::createBr(endBlock);
1199                 }
1200                 else
1201                 {
1202                         enableBreak = enableBreak & ~enableStack[enableIndex];
1203                         Bool allBreak = SignMask(enableBreak) == 0x0;
1204
1205                         enableIndex = enableIndex - breakDepth;
1206                         branch(allBreak, endBlock, deadBlock);
1207                 }
1208
1209                 Nucleus::setInsertBlock(deadBlock);
1210                 enableIndex = enableIndex + breakDepth;
1211         }
1212
1213         void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1214         {
1215                 Int4 condition;
1216
1217                 switch(control)
1218                 {
1219                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1220                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1221                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1222                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1223                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1224                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1225                 default:
1226                         ASSERT(false);
1227                 }
1228
1229                 BREAK(condition);
1230         }
1231
1232         void PixelProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1233         {
1234                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1235
1236                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1237                 {
1238                         condition = ~condition;
1239                 }
1240
1241                 BREAK(condition);
1242         }
1243
1244         void PixelProgram::BREAK(Int4 &condition)
1245         {
1246                 condition &= enableStack[enableIndex];
1247
1248                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
1249                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1250
1251                 enableBreak = enableBreak & ~condition;
1252                 Bool allBreak = SignMask(enableBreak) == 0x0;
1253
1254                 enableIndex = enableIndex - breakDepth;
1255                 branch(allBreak, endBlock, continueBlock);
1256
1257                 Nucleus::setInsertBlock(continueBlock);
1258                 enableIndex = enableIndex + breakDepth;
1259         }
1260
1261         void PixelProgram::CONTINUE()
1262         {
1263                 enableContinue = enableContinue & ~enableStack[enableIndex];
1264         }
1265
1266         void PixelProgram::TEST()
1267         {
1268                 whileTest = true;
1269         }
1270
1271         void PixelProgram::CALL(int labelIndex, int callSiteIndex)
1272         {
1273                 if(!labelBlock[labelIndex])
1274                 {
1275                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1276                 }
1277
1278                 if(callRetBlock[labelIndex].size() > 1)
1279                 {
1280                         callStack[stackIndex++] = UInt(callSiteIndex);
1281                 }
1282
1283                 Int4 restoreLeave = enableLeave;
1284
1285                 Nucleus::createBr(labelBlock[labelIndex]);
1286                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1287
1288                 enableLeave = restoreLeave;
1289         }
1290
1291         void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1292         {
1293                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1294                 {
1295                         CALLNZb(labelIndex, callSiteIndex, src);
1296                 }
1297                 else if(src.type == Shader::PARAMETER_PREDICATE)
1298                 {
1299                         CALLNZp(labelIndex, callSiteIndex, src);
1300                 }
1301                 else ASSERT(false);
1302         }
1303
1304         void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1305         {
1306                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1307
1308                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1309                 {
1310                         condition = !condition;
1311                 }
1312
1313                 if(!labelBlock[labelIndex])
1314                 {
1315                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1316                 }
1317
1318                 if(callRetBlock[labelIndex].size() > 1)
1319                 {
1320                         callStack[stackIndex++] = UInt(callSiteIndex);
1321                 }
1322
1323                 Int4 restoreLeave = enableLeave;
1324
1325                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1326                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1327
1328                 enableLeave = restoreLeave;
1329         }
1330
1331         void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1332         {
1333                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1334
1335                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1336                 {
1337                         condition = ~condition;
1338                 }
1339
1340                 condition &= enableStack[enableIndex];
1341
1342                 if(!labelBlock[labelIndex])
1343                 {
1344                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1345                 }
1346
1347                 if(callRetBlock[labelIndex].size() > 1)
1348                 {
1349                         callStack[stackIndex++] = UInt(callSiteIndex);
1350                 }
1351
1352                 enableIndex++;
1353                 enableStack[enableIndex] = condition;
1354                 Int4 restoreLeave = enableLeave;
1355
1356                 Bool notAllFalse = SignMask(condition) != 0;
1357                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1358                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1359
1360                 enableIndex--;
1361                 enableLeave = restoreLeave;
1362         }
1363
1364         void PixelProgram::ELSE()
1365         {
1366                 ifDepth--;
1367
1368                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1369                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1370
1371                 if(isConditionalIf[ifDepth])
1372                 {
1373                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1374                         Bool notAllFalse = SignMask(condition) != 0;
1375
1376                         branch(notAllFalse, falseBlock, endBlock);
1377
1378                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1379                 }
1380                 else
1381                 {
1382                         Nucleus::createBr(endBlock);
1383                         Nucleus::setInsertBlock(falseBlock);
1384                 }
1385
1386                 ifFalseBlock[ifDepth] = endBlock;
1387
1388                 ifDepth++;
1389         }
1390
1391         void PixelProgram::ENDIF()
1392         {
1393                 ifDepth--;
1394
1395                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1396
1397                 Nucleus::createBr(endBlock);
1398                 Nucleus::setInsertBlock(endBlock);
1399
1400                 if(isConditionalIf[ifDepth])
1401                 {
1402                         breakDepth--;
1403                         enableIndex--;
1404                 }
1405         }
1406
1407         void PixelProgram::ENDLOOP()
1408         {
1409                 loopRepDepth--;
1410
1411                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1412
1413                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1414                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1415
1416                 Nucleus::createBr(testBlock);
1417                 Nucleus::setInsertBlock(endBlock);
1418
1419                 loopDepth--;
1420                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1421         }
1422
1423         void PixelProgram::ENDREP()
1424         {
1425                 loopRepDepth--;
1426
1427                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1428                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1429
1430                 Nucleus::createBr(testBlock);
1431                 Nucleus::setInsertBlock(endBlock);
1432
1433                 loopDepth--;
1434                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1435         }
1436
1437         void PixelProgram::ENDWHILE()
1438         {
1439                 loopRepDepth--;
1440
1441                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1442                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1443
1444                 Nucleus::createBr(testBlock);
1445                 Nucleus::setInsertBlock(endBlock);
1446
1447                 enableIndex--;
1448                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1449                 whileTest = false;
1450         }
1451
1452         void PixelProgram::IF(const Src &src)
1453         {
1454                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1455                 {
1456                         IFb(src);
1457                 }
1458                 else if(src.type == Shader::PARAMETER_PREDICATE)
1459                 {
1460                         IFp(src);
1461                 }
1462                 else
1463                 {
1464                         Int4 condition = As<Int4>(fetchRegister(src).x);
1465                         IF(condition);
1466                 }
1467         }
1468
1469         void PixelProgram::IFb(const Src &boolRegister)
1470         {
1471                 ASSERT(ifDepth < 24 + 4);
1472
1473                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1474
1475                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1476                 {
1477                         condition = !condition;
1478                 }
1479
1480                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1481                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1482
1483                 branch(condition, trueBlock, falseBlock);
1484
1485                 isConditionalIf[ifDepth] = false;
1486                 ifFalseBlock[ifDepth] = falseBlock;
1487
1488                 ifDepth++;
1489         }
1490
1491         void PixelProgram::IFp(const Src &predicateRegister)
1492         {
1493                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1494
1495                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1496                 {
1497                         condition = ~condition;
1498                 }
1499
1500                 IF(condition);
1501         }
1502
1503         void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1504         {
1505                 Int4 condition;
1506
1507                 switch(control)
1508                 {
1509                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1510                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1511                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1512                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1513                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1514                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1515                 default:
1516                         ASSERT(false);
1517                 }
1518
1519                 IF(condition);
1520         }
1521
1522         void PixelProgram::IF(Int4 &condition)
1523         {
1524                 condition &= enableStack[enableIndex];
1525
1526                 enableIndex++;
1527                 enableStack[enableIndex] = condition;
1528
1529                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1530                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1531
1532                 Bool notAllFalse = SignMask(condition) != 0;
1533
1534                 branch(notAllFalse, trueBlock, falseBlock);
1535
1536                 isConditionalIf[ifDepth] = true;
1537                 ifFalseBlock[ifDepth] = falseBlock;
1538
1539                 ifDepth++;
1540                 breakDepth++;
1541         }
1542
1543         void PixelProgram::LABEL(int labelIndex)
1544         {
1545                 if(!labelBlock[labelIndex])
1546                 {
1547                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1548                 }
1549
1550                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1551                 currentLabel = labelIndex;
1552         }
1553
1554         void PixelProgram::LOOP(const Src &integerRegister)
1555         {
1556                 loopDepth++;
1557
1558                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1559                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
1560                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
1561
1562                 //      If(increment[loopDepth] == 0)
1563                 //      {
1564                 //              increment[loopDepth] = 1;
1565                 //      }
1566
1567                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1568                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1569                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1570
1571                 loopRepTestBlock[loopRepDepth] = testBlock;
1572                 loopRepEndBlock[loopRepDepth] = endBlock;
1573
1574                 // FIXME: jump(testBlock)
1575                 Nucleus::createBr(testBlock);
1576                 Nucleus::setInsertBlock(testBlock);
1577
1578                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1579                 Nucleus::setInsertBlock(loopBlock);
1580
1581                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1582
1583                 loopRepDepth++;
1584                 breakDepth = 0;
1585         }
1586
1587         void PixelProgram::REP(const Src &integerRegister)
1588         {
1589                 loopDepth++;
1590
1591                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1592                 aL[loopDepth] = aL[loopDepth - 1];
1593
1594                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1595                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1596                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1597
1598                 loopRepTestBlock[loopRepDepth] = testBlock;
1599                 loopRepEndBlock[loopRepDepth] = endBlock;
1600
1601                 // FIXME: jump(testBlock)
1602                 Nucleus::createBr(testBlock);
1603                 Nucleus::setInsertBlock(testBlock);
1604
1605                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1606                 Nucleus::setInsertBlock(loopBlock);
1607
1608                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1609
1610                 loopRepDepth++;
1611                 breakDepth = 0;
1612         }
1613
1614         void PixelProgram::WHILE(const Src &temporaryRegister)
1615         {
1616                 enableIndex++;
1617
1618                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1619                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1620                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1621
1622                 loopRepTestBlock[loopRepDepth] = testBlock;
1623                 loopRepEndBlock[loopRepDepth] = endBlock;
1624
1625                 Int4 restoreBreak = enableBreak;
1626                 Int4 restoreContinue = enableContinue;
1627
1628                 // FIXME: jump(testBlock)
1629                 Nucleus::createBr(testBlock);
1630                 Nucleus::setInsertBlock(testBlock);
1631                 enableContinue = restoreContinue;
1632
1633                 const Vector4f &src = fetchRegister(temporaryRegister);
1634                 Int4 condition = As<Int4>(src.x);
1635                 condition &= enableStack[enableIndex - 1];
1636                 enableStack[enableIndex] = condition;
1637
1638                 Bool notAllFalse = SignMask(condition) != 0;
1639                 branch(notAllFalse, loopBlock, endBlock);
1640
1641                 Nucleus::setInsertBlock(endBlock);
1642                 enableBreak = restoreBreak;
1643
1644                 Nucleus::setInsertBlock(loopBlock);
1645
1646                 loopRepDepth++;
1647                 breakDepth = 0;
1648         }
1649
1650         void PixelProgram::RET()
1651         {
1652                 if(currentLabel == -1)
1653                 {
1654                         returnBlock = Nucleus::createBasicBlock();
1655                         Nucleus::createBr(returnBlock);
1656                 }
1657                 else
1658                 {
1659                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1660
1661                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1662                         {
1663                                 // FIXME: Encapsulate
1664                                 UInt index = callStack[--stackIndex];
1665
1666                                 llvm::Value *value = index.loadValue();
1667                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1668
1669                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1670                                 {
1671                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1672                                 }
1673                         }
1674                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1675                         {
1676                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1677                         }
1678                         else   // Function isn't called
1679                         {
1680                                 Nucleus::createBr(unreachableBlock);
1681                         }
1682
1683                         Nucleus::setInsertBlock(unreachableBlock);
1684                         Nucleus::createUnreachable();
1685                 }
1686         }
1687
1688         void PixelProgram::LEAVE()
1689         {
1690                 enableLeave = enableLeave & ~enableStack[enableIndex];
1691
1692                 // FIXME: Return from function if all instances left
1693                 // FIXME: Use enableLeave in other control-flow constructs
1694         }
1695 }