OSDN Git Service

texelFetch implementation
[android-x86/external-swiftshader.git] / src / Shader / PixelProgram.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "PixelProgram.hpp"
16 #include "Primitive.hpp"
17 #include "Renderer.hpp"
18 #include "SamplerCore.hpp"
19
20 namespace sw
21 {
22         extern bool postBlendSRGB;
23         extern bool booleanFaceRegister;
24         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
25         extern bool fullPixelPositionRegister;
26
27         void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
28         {
29                 if(shader->getVersion() >= 0x0300)
30                 {
31                         if(shader->vPosDeclared)
32                         {
33                                 if(!halfIntegerCoordinates)
34                                 {
35                                         vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
36                                         vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
37                                 }
38                                 else
39                                 {
40                                         vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
41                                         vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
42                                 }
43
44                                 if(fullPixelPositionRegister)
45                                 {
46                                         vPos.z = z[0]; // FIXME: Centroid?
47                                         vPos.w = w;    // FIXME: Centroid?
48                                 }
49                         }
50
51                         if(shader->vFaceDeclared)
52                         {
53                                 Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area));
54                                 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
55
56                                 vFace.x = face;
57                                 vFace.y = face;
58                                 vFace.z = face;
59                                 vFace.w = face;
60                         }
61                 }
62         }
63
64         void PixelProgram::applyShader(Int cMask[4])
65         {
66                 enableIndex = 0;
67                 stackIndex = 0;
68
69                 if(shader->containsLeaveInstruction())
70                 {
71                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
72                 }
73
74                 for(int i = 0; i < RENDERTARGETS; i++)
75                 {
76                         if(state.targetFormat[i] != FORMAT_NULL)
77                         {
78                                 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f);
79                         }
80                 }
81
82                 // Create all call site return blocks up front
83                 for(size_t i = 0; i < shader->getLength(); i++)
84                 {
85                         const Shader::Instruction *instruction = shader->getInstruction(i);
86                         Shader::Opcode opcode = instruction->opcode;
87
88                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
89                         {
90                                 const Dst &dst = instruction->dst;
91
92                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
93                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
94                         }
95                 }
96
97                 bool broadcastColor0 = true;
98
99                 for(size_t i = 0; i < shader->getLength(); i++)
100                 {
101                         const Shader::Instruction *instruction = shader->getInstruction(i);
102                         Shader::Opcode opcode = instruction->opcode;
103
104                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
105                         {
106                                 continue;
107                         }
108
109                         const Dst &dst = instruction->dst;
110                         const Src &src0 = instruction->src[0];
111                         const Src &src1 = instruction->src[1];
112                         const Src &src2 = instruction->src[2];
113                         const Src &src3 = instruction->src[3];
114                         const Src &src4 = instruction->src[4];
115
116                         bool predicate = instruction->predicate;
117                         Control control = instruction->control;
118                         bool pp = dst.partialPrecision;
119                         bool project = instruction->project;
120                         bool bias = instruction->bias;
121
122                         Vector4f d;
123                         Vector4f s0;
124                         Vector4f s1;
125                         Vector4f s2;
126                         Vector4f s3;
127                         Vector4f s4;
128
129                         if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
130                         {
131                                 if(dst.type == Shader::PARAMETER_TEXTURE)
132                                 {
133                                         d.x = v[2 + dst.index].x;
134                                         d.y = v[2 + dst.index].y;
135                                         d.z = v[2 + dst.index].z;
136                                         d.w = v[2 + dst.index].w;
137                                 }
138                                 else
139                                 {
140                                         d = r[dst.index];
141                                 }
142                         }
143
144                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
145                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
146                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
147                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
148                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
149
150                         switch(opcode)
151                         {
152                         case Shader::OPCODE_PS_2_0:                                                    break;
153                         case Shader::OPCODE_PS_2_x:                                                    break;
154                         case Shader::OPCODE_PS_3_0:                                                    break;
155                         case Shader::OPCODE_DEF:                                                       break;
156                         case Shader::OPCODE_DCL:                                                       break;
157                         case Shader::OPCODE_NOP:                                                       break;
158                         case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
159                         case Shader::OPCODE_NEG:        neg(d, s0);                                    break;
160                         case Shader::OPCODE_INEG:       ineg(d, s0);                                   break;
161                         case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
162                         case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
163                         case Shader::OPCODE_F2I:        f2i(d, s0);                                    break;
164                         case Shader::OPCODE_I2F:        i2f(d, s0);                                    break;
165                         case Shader::OPCODE_F2U:        f2u(d, s0);                                    break;
166                         case Shader::OPCODE_U2F:        u2f(d, s0);                                    break;
167                         case Shader::OPCODE_I2B:        i2b(d, s0);                                    break;
168                         case Shader::OPCODE_B2I:        b2i(d, s0);                                    break;
169                         case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
170                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                               break;
171                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
172                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                               break;
173                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
174                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                               break;
175                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
176                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);                           break;
177                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
178                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
179                         case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
180                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
181                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
182                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                               break;
183                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);                           break;
184                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);                       break;
185                         case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
186                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
187                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);                      break;
188                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
189                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
190                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
191                         case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
192                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
193                         case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
194                         case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
195                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
196                         case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
197                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
198                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
199                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
200                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
201                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
202                         case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
203                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
204                         case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
205                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                               break;
206                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                               break;
207                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
208                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                               break;
209                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                               break;
210                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                                break;
211                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                               break;
212                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                               break;
213                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
214                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
215                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
216                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
217                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
218                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
219                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
220                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
221                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
222                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
223                         case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
224                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                               break;
225                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                               break;
226                         case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
227                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                               break;
228                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                               break;
229                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
230                         case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
231                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
232                         case Shader::OPCODE_FLOATBITSTOINT:
233                         case Shader::OPCODE_FLOATBITSTOUINT:
234                         case Shader::OPCODE_INTBITSTOFLOAT:
235                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                                   break;
236                         case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);                     break;
237                         case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);                     break;
238                         case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);                      break;
239                         case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);                   break;
240                         case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);                   break;
241                         case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);                    break;
242                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
243                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
244                         case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
245                         case Shader::OPCODE_ISGN:       isgn(d, s0);                                   break;
246                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
247                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
248                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
249                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
250                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
251                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
252                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
253                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
254                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
255                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
256                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
257                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
258                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
259                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
260                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
261                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
262                         case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
263                         case Shader::OPCODE_IABS:       iabs(d, s0);                                   break;
264                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
265                         case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
266                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
267                         case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
268                         case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
269                         case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
270                         case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
271                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
272                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
273                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
274                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
275                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
276                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
277                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
278                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);                             break;
279                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);                             break;
280                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);                             break;
281                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);                             break;
282                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);                             break;
283                         case Shader::OPCODE_TEX:        TEXLD(d, s0, src1, project, bias);             break;
284                         case Shader::OPCODE_TEXLDD:     TEXLDD(d, s0, src1, s2, s3, project);          break;
285                         case Shader::OPCODE_TEXLDL:     TEXLDL(d, s0, src1, project);                  break;
286                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);                        break;
287                         case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
288                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2, project, bias);     break;
289                         case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2, project, bias);      break;
290                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2);                   break;
291                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2, s3);         break;
292                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);                  break;
293                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4);           break;
294                         case Shader::OPCODE_DISCARD:    DISCARD(cMask, instruction);                   break;
295                         case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
296                         case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
297                         case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
298                         case Shader::OPCODE_BREAK:      BREAK();                                       break;
299                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);                       break;
300                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                                  break;
301                         case Shader::OPCODE_CONTINUE:   CONTINUE();                                    break;
302                         case Shader::OPCODE_TEST:       TEST();                                        break;
303                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);                 break;
304                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0);         break;
305                         case Shader::OPCODE_ELSE:       ELSE();                                        break;
306                         case Shader::OPCODE_ENDIF:      ENDIF();                                       break;
307                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                                     break;
308                         case Shader::OPCODE_ENDREP:     ENDREP();                                      break;
309                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                                    break;
310                         case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                                   break;
311                         case Shader::OPCODE_IF:         IF(src0);                                      break;
312                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);                          break;
313                         case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
314                         case Shader::OPCODE_LOOP:       LOOP(src1);                                    break;
315                         case Shader::OPCODE_REP:        REP(src0);                                     break;
316                         case Shader::OPCODE_WHILE:      WHILE(src0);                                   break;
317                         case Shader::OPCODE_SWITCH:     SWITCH();                                      break;
318                         case Shader::OPCODE_RET:        RET();                                         break;
319                         case Shader::OPCODE_LEAVE:      LEAVE();                                       break;
320                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
321                         case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
322                         case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
323                         case Shader::OPCODE_NOT:        not(d, s0);                                    break;
324                         case Shader::OPCODE_OR:         or(d, s0, s1);                                 break;
325                         case Shader::OPCODE_XOR:        xor(d, s0, s1);                                break;
326                         case Shader::OPCODE_AND:        and(d, s0, s1);                                break;
327                         case Shader::OPCODE_EQ:         equal(d, s0, s1);                              break;
328                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);                           break;
329                         case Shader::OPCODE_END:                                                       break;
330                         default:
331                                 ASSERT(false);
332                         }
333
334                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
335                         {
336                                 if(dst.integer)
337                                 {
338                                         switch(opcode)
339                                         {
340                                         case Shader::OPCODE_DIV:
341                                                 if(dst.x) d.x = Trunc(d.x);
342                                                 if(dst.y) d.y = Trunc(d.y);
343                                                 if(dst.z) d.z = Trunc(d.z);
344                                                 if(dst.w) d.w = Trunc(d.w);
345                                                 break;
346                                         default:
347                                                 break;   // No truncation to integer required when arguments are integer
348                                         }
349                                 }
350
351                                 if(dst.saturate)
352                                 {
353                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
354                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
355                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
356                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
357
358                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
359                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
360                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
361                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
362                                 }
363
364                                 if(instruction->isPredicated())
365                                 {
366                                         Vector4f pDst;   // FIXME: Rename
367
368                                         switch(dst.type)
369                                         {
370                                         case Shader::PARAMETER_TEMP:
371                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
372                                                 {
373                                                         if(dst.x) pDst.x = r[dst.index].x;
374                                                         if(dst.y) pDst.y = r[dst.index].y;
375                                                         if(dst.z) pDst.z = r[dst.index].z;
376                                                         if(dst.w) pDst.w = r[dst.index].w;
377                                                 }
378                                                 else
379                                                 {
380                                                         Int a = relativeAddress(dst);
381
382                                                         if(dst.x) pDst.x = r[dst.index + a].x;
383                                                         if(dst.y) pDst.y = r[dst.index + a].y;
384                                                         if(dst.z) pDst.z = r[dst.index + a].z;
385                                                         if(dst.w) pDst.w = r[dst.index + a].w;
386                                                 }
387                                                 break;
388                                         case Shader::PARAMETER_COLOROUT:
389                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
390                                                 {
391                                                         if(dst.x) pDst.x = oC[dst.index].x;
392                                                         if(dst.y) pDst.y = oC[dst.index].y;
393                                                         if(dst.z) pDst.z = oC[dst.index].z;
394                                                         if(dst.w) pDst.w = oC[dst.index].w;
395                                                 }
396                                                 else
397                                                 {
398                                                         Int a = relativeAddress(dst) + dst.index;
399
400                                                         if(dst.x) pDst.x = oC[a].x;
401                                                         if(dst.y) pDst.y = oC[a].y;
402                                                         if(dst.z) pDst.z = oC[a].z;
403                                                         if(dst.w) pDst.w = oC[a].w;
404                                                 }
405                                                 break;
406                                         case Shader::PARAMETER_PREDICATE:
407                                                 if(dst.x) pDst.x = p0.x;
408                                                 if(dst.y) pDst.y = p0.y;
409                                                 if(dst.z) pDst.z = p0.z;
410                                                 if(dst.w) pDst.w = p0.w;
411                                                 break;
412                                         case Shader::PARAMETER_DEPTHOUT:
413                                                 pDst.x = oDepth;
414                                                 break;
415                                         default:
416                                                 ASSERT(false);
417                                         }
418
419                                         Int4 enable = enableMask(instruction);
420
421                                         Int4 xEnable = enable;
422                                         Int4 yEnable = enable;
423                                         Int4 zEnable = enable;
424                                         Int4 wEnable = enable;
425
426                                         if(predicate)
427                                         {
428                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
429
430                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
431                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
432                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
433                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
434
435                                                 if(!instruction->predicateNot)
436                                                 {
437                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
438                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
439                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
440                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
441                                                 }
442                                                 else
443                                                 {
444                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
445                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
446                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
447                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
448                                                 }
449                                         }
450
451                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
452                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
453                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
454                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
455
456                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
457                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
458                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
459                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
460                                 }
461
462                                 switch(dst.type)
463                                 {
464                                 case Shader::PARAMETER_TEMP:
465                                         if(dst.rel.type == Shader::PARAMETER_VOID)
466                                         {
467                                                 if(dst.x) r[dst.index].x = d.x;
468                                                 if(dst.y) r[dst.index].y = d.y;
469                                                 if(dst.z) r[dst.index].z = d.z;
470                                                 if(dst.w) r[dst.index].w = d.w;
471                                         }
472                                         else
473                                         {
474                                                 Int a = relativeAddress(dst);
475
476                                                 if(dst.x) r[dst.index + a].x = d.x;
477                                                 if(dst.y) r[dst.index + a].y = d.y;
478                                                 if(dst.z) r[dst.index + a].z = d.z;
479                                                 if(dst.w) r[dst.index + a].w = d.w;
480                                         }
481                                         break;
482                                 case Shader::PARAMETER_COLOROUT:
483                                         if(dst.rel.type == Shader::PARAMETER_VOID)
484                                         {
485                                                 broadcastColor0 = (dst.index == 0) && broadcastColor0;
486
487                                                 if(dst.x) { oC[dst.index].x = d.x; }
488                                                 if(dst.y) { oC[dst.index].y = d.y; }
489                                                 if(dst.z) { oC[dst.index].z = d.z; }
490                                                 if(dst.w) { oC[dst.index].w = d.w; }
491                                         }
492                                         else
493                                         {
494                                                 broadcastColor0 = false;
495                                                 Int a = relativeAddress(dst) + dst.index;
496
497                                                 if(dst.x) { oC[a].x = d.x; }
498                                                 if(dst.y) { oC[a].y = d.y; }
499                                                 if(dst.z) { oC[a].z = d.z; }
500                                                 if(dst.w) { oC[a].w = d.w; }
501                                         }
502                                         break;
503                                 case Shader::PARAMETER_PREDICATE:
504                                         if(dst.x) p0.x = d.x;
505                                         if(dst.y) p0.y = d.y;
506                                         if(dst.z) p0.z = d.z;
507                                         if(dst.w) p0.w = d.w;
508                                         break;
509                                 case Shader::PARAMETER_DEPTHOUT:
510                                         oDepth = d.x;
511                                         break;
512                                 default:
513                                         ASSERT(false);
514                                 }
515                         }
516                 }
517
518                 if(currentLabel != -1)
519                 {
520                         Nucleus::setInsertBlock(returnBlock);
521                 }
522
523                 if(broadcastColor0)
524                 {
525                         for(int i = 0; i < RENDERTARGETS; i++)
526                         {
527                                 c[i] = oC[0];
528                         }
529                 }
530                 else
531                 {
532                         for(int i = 0; i < RENDERTARGETS; i++)
533                         {
534                                 c[i] = oC[i];
535                         }
536                 }
537         }
538
539         Bool PixelProgram::alphaTest(Int cMask[4])
540         {
541                 clampColor(c);
542
543                 if(!state.alphaTestActive())
544                 {
545                         return true;
546                 }
547
548                 Int aMask;
549
550                 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
551                 {
552                         Short4 alpha = RoundShort4(c[0].w * Float4(0x1000));
553
554                         PixelRoutine::alphaTest(aMask, alpha);
555
556                         for(unsigned int q = 0; q < state.multiSample; q++)
557                         {
558                                 cMask[q] &= aMask;
559                         }
560                 }
561                 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
562                 {
563                         alphaToCoverage(cMask, c[0].w);
564                 }
565                 else ASSERT(false);
566
567                 Int pass = cMask[0];
568
569                 for(unsigned int q = 1; q < state.multiSample; q++)
570                 {
571                         pass = pass | cMask[q];
572                 }
573
574                 return pass != 0x0;
575         }
576
577         void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
578         {
579                 for(int index = 0; index < RENDERTARGETS; index++)
580                 {
581                         if(!state.colorWriteActive(index))
582                         {
583                                 continue;
584                         }
585
586                         if(!postBlendSRGB && state.writeSRGB && !isSRGB(index))
587                         {
588                                 c[index].x = linearToSRGB(c[index].x);
589                                 c[index].y = linearToSRGB(c[index].y);
590                                 c[index].z = linearToSRGB(c[index].z);
591                         }
592
593                         if(index == 0)
594                         {
595                                 fogBlend(c[index], fog);
596                         }
597
598                         switch(state.targetFormat[index])
599                         {
600                         case FORMAT_R5G6B5:
601                         case FORMAT_X8R8G8B8:
602                         case FORMAT_X8B8G8R8:
603                         case FORMAT_A8R8G8B8:
604                         case FORMAT_A8B8G8R8:
605                         case FORMAT_SRGB8_X8:
606                         case FORMAT_SRGB8_A8:
607                         case FORMAT_A8:
608                         case FORMAT_G16R16:
609                         case FORMAT_A16B16G16R16:
610                                 for(unsigned int q = 0; q < state.multiSample; q++)
611                                 {
612                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
613                                         Vector4s color;
614
615                                         if(state.targetFormat[index] == FORMAT_R5G6B5)
616                                         {
617                                                 color.x = UShort4(c[index].x * Float4(0xFBFF), false);
618                                                 color.y = UShort4(c[index].y * Float4(0xFDFF), false);
619                                                 color.z = UShort4(c[index].z * Float4(0xFBFF), false);
620                                                 color.w = UShort4(c[index].w * Float4(0xFFFF), false);
621                                         }
622                                         else
623                                         {
624                                                 color.x = convertFixed16(c[index].x, false);
625                                                 color.y = convertFixed16(c[index].y, false);
626                                                 color.z = convertFixed16(c[index].z, false);
627                                                 color.w = convertFixed16(c[index].w, false);
628                                         }
629
630                                         if(state.multiSampleMask & (1 << q))
631                                         {
632                                                 alphaBlend(index, buffer, color, x);
633                                                 logicOperation(index, buffer, color, x);
634                                                 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
635                                         }
636                                 }
637                                 break;
638                         case FORMAT_R32F:
639                         case FORMAT_G32R32F:
640                         case FORMAT_X32B32G32R32F:
641                         case FORMAT_A32B32G32R32F:
642                         case FORMAT_R32I:
643                         case FORMAT_G32R32I:
644                         case FORMAT_A32B32G32R32I:
645                         case FORMAT_R32UI:
646                         case FORMAT_G32R32UI:
647                         case FORMAT_A32B32G32R32UI:
648                         case FORMAT_R16I:
649                         case FORMAT_G16R16I:
650                         case FORMAT_A16B16G16R16I:
651                         case FORMAT_R16UI:
652                         case FORMAT_G16R16UI:
653                         case FORMAT_A16B16G16R16UI:
654                         case FORMAT_R8I:
655                         case FORMAT_G8R8I:
656                         case FORMAT_A8B8G8R8I:
657                         case FORMAT_R8UI:
658                         case FORMAT_G8R8UI:
659                         case FORMAT_A8B8G8R8UI:
660                                 for(unsigned int q = 0; q < state.multiSample; q++)
661                                 {
662                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
663                                         Vector4f color = c[index];
664
665                                         if(state.multiSampleMask & (1 << q))
666                                         {
667                                                 alphaBlend(index, buffer, color, x);
668                                                 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
669                                         }
670                                 }
671                                 break;
672                         default:
673                                 ASSERT(false);
674                         }
675                 }
676         }
677
678         void PixelProgram::sampleTexture(Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerMethod method, unsigned int options)
679         {
680                 Vector4f tmp;
681
682                 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
683                 {
684                         sampleTexture(tmp, sampler.index, u, v, w, q, dsx, dsy, offset, method, options);
685                 }
686                 else
687                 {
688                         Int index = As<Int>(Float(fetchRegister(sampler).x.x));
689
690                         for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
691                         {
692                                 if(shader->usesSampler(i))
693                                 {
694                                         If(index == i)
695                                         {
696                                                 sampleTexture(tmp, i, u, v, w, q, dsx, dsy, offset, method, options);
697                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
698                                         }
699                                 }
700                         }
701                 }
702
703                 c.x = tmp[(sampler.swizzle >> 0) & 0x3];
704                 c.y = tmp[(sampler.swizzle >> 2) & 0x3];
705                 c.z = tmp[(sampler.swizzle >> 4) & 0x3];
706                 c.w = tmp[(sampler.swizzle >> 6) & 0x3];
707         }
708
709         void PixelProgram::sampleTexture(Vector4f &c, int samplerIndex, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerMethod method, unsigned int options)
710         {
711                 #if PERF_PROFILE
712                         Long texTime = Ticks();
713                 #endif
714
715                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture);
716
717                 if(!(options & Project))
718                 {
719                         sampler[samplerIndex]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, offset, options, method);
720                 }
721                 else
722                 {
723                         Float4 rq = reciprocal(q);
724
725                         Float4 u_q = u * rq;
726                         Float4 v_q = v * rq;
727                         Float4 w_q = w * rq;
728
729                         sampler[samplerIndex]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, offset, options, method);
730                 }
731
732                 #if PERF_PROFILE
733                         cycles[PERF_TEX] += Ticks() - texTime;
734                 #endif
735         }
736
737         void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
738         {
739                 for(int index = 0; index < RENDERTARGETS; index++)
740                 {
741                         if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
742                         {
743                                 continue;
744                         }
745
746                         switch(state.targetFormat[index])
747                         {
748                         case FORMAT_NULL:
749                                 break;
750                         case FORMAT_R5G6B5:
751                         case FORMAT_A8R8G8B8:
752                         case FORMAT_A8B8G8R8:
753                         case FORMAT_X8R8G8B8:
754                         case FORMAT_X8B8G8R8:
755                         case FORMAT_SRGB8_X8:
756                         case FORMAT_SRGB8_A8:
757                         case FORMAT_A8:
758                         case FORMAT_G16R16:
759                         case FORMAT_A16B16G16R16:
760                                 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
761                                 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
762                                 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
763                                 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
764                                 break;
765                         case FORMAT_R32F:
766                         case FORMAT_G32R32F:
767                         case FORMAT_X32B32G32R32F:
768                         case FORMAT_A32B32G32R32F:
769                         case FORMAT_R32I:
770                         case FORMAT_G32R32I:
771                         case FORMAT_A32B32G32R32I:
772                         case FORMAT_R32UI:
773                         case FORMAT_G32R32UI:
774                         case FORMAT_A32B32G32R32UI:
775                                 break;
776                         default:
777                                 ASSERT(false);
778                         }
779                 }
780         }
781
782         Int4 PixelProgram::enableMask(const Shader::Instruction *instruction)
783         {
784                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
785
786                 if(!whileTest)
787                 {
788                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
789                         {
790                                 enable &= enableBreak;
791                         }
792
793                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
794                         {
795                                 enable &= enableContinue;
796                         }
797
798                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
799                         {
800                                 enable &= enableLeave;
801                         }
802                 }
803
804                 return enable;
805         }
806
807         Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset)
808         {
809                 Vector4f reg;
810                 unsigned int i = src.index + offset;
811
812                 switch(src.type)
813                 {
814                 case Shader::PARAMETER_TEMP:
815                         if(src.rel.type == Shader::PARAMETER_VOID)
816                         {
817                                 reg = r[i];
818                         }
819                         else
820                         {
821                                 Int a = relativeAddress(src, src.bufferIndex);
822
823                                 reg = r[i + a];
824                         }
825                         break;
826                 case Shader::PARAMETER_INPUT:
827                         {
828                                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
829                                 {
830                                         reg = v[i];
831                                 }
832                                 else
833                                 {
834                                         Int a = relativeAddress(src, src.bufferIndex);
835
836                                         reg = v[i + a];
837                                 }
838                         }
839                         break;
840                 case Shader::PARAMETER_CONST:
841                         reg = readConstant(src, offset);
842                         break;
843                 case Shader::PARAMETER_TEXTURE:
844                         reg = v[2 + i];
845                         break;
846                 case Shader::PARAMETER_MISCTYPE:
847                         if(src.index == 0) reg = vPos;
848                         if(src.index == 1) reg = vFace;
849                         break;
850                 case Shader::PARAMETER_SAMPLER:
851                         if(src.rel.type == Shader::PARAMETER_VOID)
852                         {
853                                 reg.x = As<Float4>(Int4(i));
854                         }
855                         else if(src.rel.type == Shader::PARAMETER_TEMP)
856                         {
857                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
858                         }
859                         return reg;
860                 case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
861                 case Shader::PARAMETER_VOID:        return reg; // Dummy
862                 case Shader::PARAMETER_FLOAT4LITERAL:
863                         reg.x = Float4(src.value[0]);
864                         reg.y = Float4(src.value[1]);
865                         reg.z = Float4(src.value[2]);
866                         reg.w = Float4(src.value[3]);
867                         break;
868                 case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
869                 case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
870                 case Shader::PARAMETER_LOOP:        return reg; // Dummy
871                 case Shader::PARAMETER_COLOROUT:
872                         if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
873                         {
874                                 reg = oC[i];
875                         }
876                         else
877                         {
878                                 Int a = relativeAddress(src, src.bufferIndex);
879
880                                 reg = oC[i + a];
881                         }
882                         break;
883                 case Shader::PARAMETER_DEPTHOUT:
884                         reg.x = oDepth;
885                         break;
886                 default:
887                         ASSERT(false);
888                 }
889
890                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
891                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
892                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
893                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
894
895                 Vector4f mod;
896
897                 switch(src.modifier)
898                 {
899                 case Shader::MODIFIER_NONE:
900                         mod.x = x;
901                         mod.y = y;
902                         mod.z = z;
903                         mod.w = w;
904                         break;
905                 case Shader::MODIFIER_NEGATE:
906                         mod.x = -x;
907                         mod.y = -y;
908                         mod.z = -z;
909                         mod.w = -w;
910                         break;
911                 case Shader::MODIFIER_ABS:
912                         mod.x = Abs(x);
913                         mod.y = Abs(y);
914                         mod.z = Abs(z);
915                         mod.w = Abs(w);
916                         break;
917                 case Shader::MODIFIER_ABS_NEGATE:
918                         mod.x = -Abs(x);
919                         mod.y = -Abs(y);
920                         mod.z = -Abs(z);
921                         mod.w = -Abs(w);
922                         break;
923                 case Shader::MODIFIER_NOT:
924                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
925                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
926                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
927                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
928                         break;
929                 default:
930                         ASSERT(false);
931                 }
932
933                 return mod;
934         }
935
936         RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index)
937         {
938                 if(bufferIndex == -1)
939                 {
940                         return data + OFFSET(DrawData, ps.c[index]);
941                 }
942                 else
943                 {
944                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index;
945                 }
946         }
947
948         RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
949         {
950                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
951         }
952
953         Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset)
954         {
955                 Vector4f c;
956                 unsigned int i = src.index + offset;
957
958                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
959                 {
960                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
961
962                         c.x = c.x.xxxx;
963                         c.y = c.y.yyyy;
964                         c.z = c.z.zzzz;
965                         c.w = c.w.wwww;
966
967                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
968                         {
969                                 for(size_t j = 0; j < shader->getLength(); j++)
970                                 {
971                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
972
973                                         if(instruction.opcode == Shader::OPCODE_DEF)
974                                         {
975                                                 if(instruction.dst.index == i)
976                                                 {
977                                                         c.x = Float4(instruction.src[0].value[0]);
978                                                         c.y = Float4(instruction.src[0].value[1]);
979                                                         c.z = Float4(instruction.src[0].value[2]);
980                                                         c.w = Float4(instruction.src[0].value[3]);
981
982                                                         break;
983                                                 }
984                                         }
985                                 }
986                         }
987                 }
988                 else if(src.rel.type == Shader::PARAMETER_LOOP)
989                 {
990                         Int loopCounter = aL[loopDepth];
991
992                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
993
994                         c.x = c.x.xxxx;
995                         c.y = c.y.yyyy;
996                         c.z = c.z.zzzz;
997                         c.w = c.w.wwww;
998                 }
999                 else
1000                 {
1001                         Int a = relativeAddress(src, src.bufferIndex);
1002
1003                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
1004
1005                         c.x = c.x.xxxx;
1006                         c.y = c.y.yyyy;
1007                         c.z = c.z.zzzz;
1008                         c.w = c.w.wwww;
1009                 }
1010
1011                 return c;
1012         }
1013
1014         Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
1015         {
1016                 ASSERT(var.rel.deterministic);
1017
1018                 if(var.rel.type == Shader::PARAMETER_TEMP)
1019                 {
1020                         return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
1021                 }
1022                 else if(var.rel.type == Shader::PARAMETER_INPUT)
1023                 {
1024                         return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
1025                 }
1026                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
1027                 {
1028                         return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale;
1029                 }
1030                 else if(var.rel.type == Shader::PARAMETER_CONST)
1031                 {
1032                         return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
1033                 }
1034                 else if(var.rel.type == Shader::PARAMETER_LOOP)
1035                 {
1036                         return aL[loopDepth];
1037                 }
1038                 else ASSERT(false);
1039
1040                 return 0;
1041         }
1042
1043         Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
1044         {
1045                 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
1046                 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
1047
1048                 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
1049         }
1050
1051         void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1)
1052         {
1053                 Vector4f row0 = fetchRegister(src1, 0);
1054                 Vector4f row1 = fetchRegister(src1, 1);
1055
1056                 dst.x = dot3(src0, row0);
1057                 dst.y = dot3(src0, row1);
1058         }
1059
1060         void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1061         {
1062                 Vector4f row0 = fetchRegister(src1, 0);
1063                 Vector4f row1 = fetchRegister(src1, 1);
1064                 Vector4f row2 = fetchRegister(src1, 2);
1065
1066                 dst.x = dot3(src0, row0);
1067                 dst.y = dot3(src0, row1);
1068                 dst.z = dot3(src0, row2);
1069         }
1070
1071         void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1072         {
1073                 Vector4f row0 = fetchRegister(src1, 0);
1074                 Vector4f row1 = fetchRegister(src1, 1);
1075                 Vector4f row2 = fetchRegister(src1, 2);
1076                 Vector4f row3 = fetchRegister(src1, 3);
1077
1078                 dst.x = dot3(src0, row0);
1079                 dst.y = dot3(src0, row1);
1080                 dst.z = dot3(src0, row2);
1081                 dst.w = dot3(src0, row3);
1082         }
1083
1084         void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1085         {
1086                 Vector4f row0 = fetchRegister(src1, 0);
1087                 Vector4f row1 = fetchRegister(src1, 1);
1088                 Vector4f row2 = fetchRegister(src1, 2);
1089
1090                 dst.x = dot4(src0, row0);
1091                 dst.y = dot4(src0, row1);
1092                 dst.z = dot4(src0, row2);
1093         }
1094
1095         void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1096         {
1097                 Vector4f row0 = fetchRegister(src1, 0);
1098                 Vector4f row1 = fetchRegister(src1, 1);
1099                 Vector4f row2 = fetchRegister(src1, 2);
1100                 Vector4f row3 = fetchRegister(src1, 3);
1101
1102                 dst.x = dot4(src0, row0);
1103                 dst.y = dot4(src0, row1);
1104                 dst.z = dot4(src0, row2);
1105                 dst.w = dot4(src0, row3);
1106         }
1107
1108         void PixelProgram::TEXLD(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
1109         {
1110                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, src0, bias ? Bias : Implicit, project ? Project : None);
1111         }
1112
1113         void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, bool project, bool bias)
1114         {
1115                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, src2, bias ? Bias : Implicit, project ? (Project | Offset) : Offset);
1116         }
1117
1118         void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, bool project, bool bias)
1119         {
1120                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, offset, Lod, project ? (Project | Offset) : Offset);
1121         }
1122
1123         void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
1124         {
1125                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, Float4(As<Int4>(src2.x)), src0, src0, src0, Lod, Fetch);
1126         }
1127
1128         void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &offset)
1129         {
1130                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, Float4(As<Int4>(src2.x)), src0, src0, offset, Lod, Fetch | Offset);
1131         }
1132
1133         void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1134         {
1135                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, src0, Grad, None);
1136         }
1137
1138         void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
1139         {
1140                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, offset, Grad, Offset);
1141         }
1142
1143         void PixelProgram::TEXLDD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project)
1144         {
1145                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, src0, Grad, project ? Project : None);
1146         }
1147
1148         void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, bool project)
1149         {
1150                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, src0, Lod, project ? Project : None);
1151         }
1152
1153         void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1154         {
1155                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture);
1156                 sampler[src1.index]->textureSize(texture, dst, lod);
1157         }
1158
1159         void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
1160         {
1161                 Int kill = -1;
1162
1163                 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
1164                 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
1165                 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
1166                 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
1167
1168                 // FIXME: Dynamic branching affects TEXKILL?
1169                 //      if(shader->containsDynamicBranching())
1170                 //      {
1171                 //              kill = ~SignMask(enableMask());
1172                 //      }
1173
1174                 for(unsigned int q = 0; q < state.multiSample; q++)
1175                 {
1176                         cMask[q] &= kill;
1177                 }
1178
1179                 // FIXME: Branch to end of shader if all killed?
1180         }
1181
1182         void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction)
1183         {
1184                 Int kill = 0;
1185
1186                 if(shader->containsDynamicBranching())
1187                 {
1188                         kill = ~SignMask(enableMask(instruction));
1189                 }
1190
1191                 for(unsigned int q = 0; q < state.multiSample; q++)
1192                 {
1193                         cMask[q] &= kill;
1194                 }
1195
1196                 // FIXME: Branch to end of shader if all killed?
1197         }
1198
1199         void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
1200         {
1201                 dst.x = src.x.yyww - src.x.xxzz;
1202                 dst.y = src.y.yyww - src.y.xxzz;
1203                 dst.z = src.z.yyww - src.z.xxzz;
1204                 dst.w = src.w.yyww - src.w.xxzz;
1205         }
1206
1207         void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
1208         {
1209                 dst.x = src.x.zwzw - src.x.xyxy;
1210                 dst.y = src.y.zwzw - src.y.xyxy;
1211                 dst.z = src.z.zwzw - src.z.xyxy;
1212                 dst.w = src.w.zwzw - src.w.xyxy;
1213         }
1214
1215         void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
1216         {
1217                 // abs(dFdx(src)) + abs(dFdy(src));
1218                 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
1219                 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
1220                 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
1221                 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
1222         }
1223
1224         void PixelProgram::BREAK()
1225         {
1226                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
1227                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1228
1229                 if(breakDepth == 0)
1230                 {
1231                         enableIndex = enableIndex - breakDepth;
1232                         Nucleus::createBr(endBlock);
1233                 }
1234                 else
1235                 {
1236                         enableBreak = enableBreak & ~enableStack[enableIndex];
1237                         Bool allBreak = SignMask(enableBreak) == 0x0;
1238
1239                         enableIndex = enableIndex - breakDepth;
1240                         branch(allBreak, endBlock, deadBlock);
1241                 }
1242
1243                 Nucleus::setInsertBlock(deadBlock);
1244                 enableIndex = enableIndex + breakDepth;
1245         }
1246
1247         void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1248         {
1249                 Int4 condition;
1250
1251                 switch(control)
1252                 {
1253                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1254                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1255                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1256                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1257                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1258                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1259                 default:
1260                         ASSERT(false);
1261                 }
1262
1263                 BREAK(condition);
1264         }
1265
1266         void PixelProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1267         {
1268                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1269
1270                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1271                 {
1272                         condition = ~condition;
1273                 }
1274
1275                 BREAK(condition);
1276         }
1277
1278         void PixelProgram::BREAK(Int4 &condition)
1279         {
1280                 condition &= enableStack[enableIndex];
1281
1282                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
1283                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1284
1285                 enableBreak = enableBreak & ~condition;
1286                 Bool allBreak = SignMask(enableBreak) == 0x0;
1287
1288                 enableIndex = enableIndex - breakDepth;
1289                 branch(allBreak, endBlock, continueBlock);
1290
1291                 Nucleus::setInsertBlock(continueBlock);
1292                 enableIndex = enableIndex + breakDepth;
1293         }
1294
1295         void PixelProgram::CONTINUE()
1296         {
1297                 enableContinue = enableContinue & ~enableStack[enableIndex];
1298         }
1299
1300         void PixelProgram::TEST()
1301         {
1302                 whileTest = true;
1303         }
1304
1305         void PixelProgram::CALL(int labelIndex, int callSiteIndex)
1306         {
1307                 if(!labelBlock[labelIndex])
1308                 {
1309                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1310                 }
1311
1312                 if(callRetBlock[labelIndex].size() > 1)
1313                 {
1314                         callStack[stackIndex++] = UInt(callSiteIndex);
1315                 }
1316
1317                 Int4 restoreLeave = enableLeave;
1318
1319                 Nucleus::createBr(labelBlock[labelIndex]);
1320                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1321
1322                 enableLeave = restoreLeave;
1323         }
1324
1325         void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1326         {
1327                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1328                 {
1329                         CALLNZb(labelIndex, callSiteIndex, src);
1330                 }
1331                 else if(src.type == Shader::PARAMETER_PREDICATE)
1332                 {
1333                         CALLNZp(labelIndex, callSiteIndex, src);
1334                 }
1335                 else ASSERT(false);
1336         }
1337
1338         void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1339         {
1340                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1341
1342                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1343                 {
1344                         condition = !condition;
1345                 }
1346
1347                 if(!labelBlock[labelIndex])
1348                 {
1349                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1350                 }
1351
1352                 if(callRetBlock[labelIndex].size() > 1)
1353                 {
1354                         callStack[stackIndex++] = UInt(callSiteIndex);
1355                 }
1356
1357                 Int4 restoreLeave = enableLeave;
1358
1359                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1360                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1361
1362                 enableLeave = restoreLeave;
1363         }
1364
1365         void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1366         {
1367                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1368
1369                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1370                 {
1371                         condition = ~condition;
1372                 }
1373
1374                 condition &= enableStack[enableIndex];
1375
1376                 if(!labelBlock[labelIndex])
1377                 {
1378                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1379                 }
1380
1381                 if(callRetBlock[labelIndex].size() > 1)
1382                 {
1383                         callStack[stackIndex++] = UInt(callSiteIndex);
1384                 }
1385
1386                 enableIndex++;
1387                 enableStack[enableIndex] = condition;
1388                 Int4 restoreLeave = enableLeave;
1389
1390                 Bool notAllFalse = SignMask(condition) != 0;
1391                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1392                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1393
1394                 enableIndex--;
1395                 enableLeave = restoreLeave;
1396         }
1397
1398         void PixelProgram::ELSE()
1399         {
1400                 ifDepth--;
1401
1402                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1403                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1404
1405                 if(isConditionalIf[ifDepth])
1406                 {
1407                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1408                         Bool notAllFalse = SignMask(condition) != 0;
1409
1410                         branch(notAllFalse, falseBlock, endBlock);
1411
1412                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1413                 }
1414                 else
1415                 {
1416                         Nucleus::createBr(endBlock);
1417                         Nucleus::setInsertBlock(falseBlock);
1418                 }
1419
1420                 ifFalseBlock[ifDepth] = endBlock;
1421
1422                 ifDepth++;
1423         }
1424
1425         void PixelProgram::ENDIF()
1426         {
1427                 ifDepth--;
1428
1429                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1430
1431                 Nucleus::createBr(endBlock);
1432                 Nucleus::setInsertBlock(endBlock);
1433
1434                 if(isConditionalIf[ifDepth])
1435                 {
1436                         breakDepth--;
1437                         enableIndex--;
1438                 }
1439         }
1440
1441         void PixelProgram::ENDLOOP()
1442         {
1443                 loopRepDepth--;
1444
1445                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1446
1447                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1448                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1449
1450                 Nucleus::createBr(testBlock);
1451                 Nucleus::setInsertBlock(endBlock);
1452
1453                 loopDepth--;
1454                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1455         }
1456
1457         void PixelProgram::ENDREP()
1458         {
1459                 loopRepDepth--;
1460
1461                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1462                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1463
1464                 Nucleus::createBr(testBlock);
1465                 Nucleus::setInsertBlock(endBlock);
1466
1467                 loopDepth--;
1468                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1469         }
1470
1471         void PixelProgram::ENDWHILE()
1472         {
1473                 loopRepDepth--;
1474
1475                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1476                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1477
1478                 Nucleus::createBr(testBlock);
1479                 Nucleus::setInsertBlock(endBlock);
1480
1481                 enableIndex--;
1482                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1483                 whileTest = false;
1484         }
1485
1486         void PixelProgram::ENDSWITCH()
1487         {
1488                 loopRepDepth--;
1489
1490                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1491
1492                 Nucleus::createBr(loopRepEndBlock[loopRepDepth]);
1493                 Nucleus::setInsertBlock(endBlock);
1494
1495                 enableIndex--;
1496                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1497         }
1498
1499         void PixelProgram::IF(const Src &src)
1500         {
1501                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1502                 {
1503                         IFb(src);
1504                 }
1505                 else if(src.type == Shader::PARAMETER_PREDICATE)
1506                 {
1507                         IFp(src);
1508                 }
1509                 else
1510                 {
1511                         Int4 condition = As<Int4>(fetchRegister(src).x);
1512                         IF(condition);
1513                 }
1514         }
1515
1516         void PixelProgram::IFb(const Src &boolRegister)
1517         {
1518                 ASSERT(ifDepth < 24 + 4);
1519
1520                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1521
1522                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1523                 {
1524                         condition = !condition;
1525                 }
1526
1527                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1528                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1529
1530                 branch(condition, trueBlock, falseBlock);
1531
1532                 isConditionalIf[ifDepth] = false;
1533                 ifFalseBlock[ifDepth] = falseBlock;
1534
1535                 ifDepth++;
1536         }
1537
1538         void PixelProgram::IFp(const Src &predicateRegister)
1539         {
1540                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1541
1542                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1543                 {
1544                         condition = ~condition;
1545                 }
1546
1547                 IF(condition);
1548         }
1549
1550         void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1551         {
1552                 Int4 condition;
1553
1554                 switch(control)
1555                 {
1556                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1557                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1558                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1559                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1560                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1561                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1562                 default:
1563                         ASSERT(false);
1564                 }
1565
1566                 IF(condition);
1567         }
1568
1569         void PixelProgram::IF(Int4 &condition)
1570         {
1571                 condition &= enableStack[enableIndex];
1572
1573                 enableIndex++;
1574                 enableStack[enableIndex] = condition;
1575
1576                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1577                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1578
1579                 Bool notAllFalse = SignMask(condition) != 0;
1580
1581                 branch(notAllFalse, trueBlock, falseBlock);
1582
1583                 isConditionalIf[ifDepth] = true;
1584                 ifFalseBlock[ifDepth] = falseBlock;
1585
1586                 ifDepth++;
1587                 breakDepth++;
1588         }
1589
1590         void PixelProgram::LABEL(int labelIndex)
1591         {
1592                 if(!labelBlock[labelIndex])
1593                 {
1594                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1595                 }
1596
1597                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1598                 currentLabel = labelIndex;
1599         }
1600
1601         void PixelProgram::LOOP(const Src &integerRegister)
1602         {
1603                 loopDepth++;
1604
1605                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1606                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
1607                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
1608
1609                 //      If(increment[loopDepth] == 0)
1610                 //      {
1611                 //              increment[loopDepth] = 1;
1612                 //      }
1613
1614                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1615                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1616                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1617
1618                 loopRepTestBlock[loopRepDepth] = testBlock;
1619                 loopRepEndBlock[loopRepDepth] = endBlock;
1620
1621                 // FIXME: jump(testBlock)
1622                 Nucleus::createBr(testBlock);
1623                 Nucleus::setInsertBlock(testBlock);
1624
1625                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1626                 Nucleus::setInsertBlock(loopBlock);
1627
1628                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1629
1630                 loopRepDepth++;
1631                 breakDepth = 0;
1632         }
1633
1634         void PixelProgram::REP(const Src &integerRegister)
1635         {
1636                 loopDepth++;
1637
1638                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1639                 aL[loopDepth] = aL[loopDepth - 1];
1640
1641                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1642                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1643                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1644
1645                 loopRepTestBlock[loopRepDepth] = testBlock;
1646                 loopRepEndBlock[loopRepDepth] = endBlock;
1647
1648                 // FIXME: jump(testBlock)
1649                 Nucleus::createBr(testBlock);
1650                 Nucleus::setInsertBlock(testBlock);
1651
1652                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1653                 Nucleus::setInsertBlock(loopBlock);
1654
1655                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1656
1657                 loopRepDepth++;
1658                 breakDepth = 0;
1659         }
1660
1661         void PixelProgram::WHILE(const Src &temporaryRegister)
1662         {
1663                 enableIndex++;
1664
1665                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1666                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1667                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1668
1669                 loopRepTestBlock[loopRepDepth] = testBlock;
1670                 loopRepEndBlock[loopRepDepth] = endBlock;
1671
1672                 Int4 restoreBreak = enableBreak;
1673                 Int4 restoreContinue = enableContinue;
1674
1675                 // FIXME: jump(testBlock)
1676                 Nucleus::createBr(testBlock);
1677                 Nucleus::setInsertBlock(testBlock);
1678                 enableContinue = restoreContinue;
1679
1680                 const Vector4f &src = fetchRegister(temporaryRegister);
1681                 Int4 condition = As<Int4>(src.x);
1682                 condition &= enableStack[enableIndex - 1];
1683                 if(shader->containsLeaveInstruction()) condition &= enableLeave;
1684                 enableStack[enableIndex] = condition;
1685
1686                 Bool notAllFalse = SignMask(condition) != 0;
1687                 branch(notAllFalse, loopBlock, endBlock);
1688
1689                 Nucleus::setInsertBlock(endBlock);
1690                 enableBreak = restoreBreak;
1691
1692                 Nucleus::setInsertBlock(loopBlock);
1693
1694                 loopRepDepth++;
1695                 breakDepth = 0;
1696         }
1697
1698         void PixelProgram::SWITCH()
1699         {
1700                 enableIndex++;
1701                 enableStack[enableIndex] = Int4(0xFFFFFFFF);
1702
1703                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1704
1705                 loopRepTestBlock[loopRepDepth] = nullptr;
1706                 loopRepEndBlock[loopRepDepth] = endBlock;
1707
1708                 loopRepDepth++;
1709                 breakDepth = 0;
1710         }
1711
1712         void PixelProgram::RET()
1713         {
1714                 if(currentLabel == -1)
1715                 {
1716                         returnBlock = Nucleus::createBasicBlock();
1717                         Nucleus::createBr(returnBlock);
1718                 }
1719                 else
1720                 {
1721                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1722
1723                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1724                         {
1725                                 // FIXME: Encapsulate
1726                                 UInt index = callStack[--stackIndex];
1727
1728                                 llvm::Value *value = index.loadValue();
1729                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1730
1731                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1732                                 {
1733                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1734                                 }
1735                         }
1736                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1737                         {
1738                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1739                         }
1740                         else   // Function isn't called
1741                         {
1742                                 Nucleus::createBr(unreachableBlock);
1743                         }
1744
1745                         Nucleus::setInsertBlock(unreachableBlock);
1746                         Nucleus::createUnreachable();
1747                 }
1748         }
1749
1750         void PixelProgram::LEAVE()
1751         {
1752                 enableLeave = enableLeave & ~enableStack[enableIndex];
1753
1754                 // FIXME: Return from function if all instances left
1755                 // FIXME: Use enableLeave in other control-flow constructs
1756         }
1757 }