OSDN Git Service

Fix clamping depth output to [0, 1] range.
[android-x86/external-swiftshader.git] / src / Shader / PixelProgram.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "PixelProgram.hpp"
16
17 #include "SamplerCore.hpp"
18 #include "Renderer/Primitive.hpp"
19 #include "Renderer/Renderer.hpp"
20
21 namespace sw
22 {
23         extern bool postBlendSRGB;
24         extern bool booleanFaceRegister;
25         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
26         extern bool fullPixelPositionRegister;
27
28         void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
29         {
30                 if(shader->getShaderModel() >= 0x0300)
31                 {
32                         if(shader->isVPosDeclared())
33                         {
34                                 if(!halfIntegerCoordinates)
35                                 {
36                                         vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
37                                         vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
38                                 }
39                                 else
40                                 {
41                                         vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
42                                         vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
43                                 }
44
45                                 if(fullPixelPositionRegister)
46                                 {
47                                         vPos.z = z[0]; // FIXME: Centroid?
48                                         vPos.w = w;    // FIXME: Centroid?
49                                 }
50                         }
51
52                         if(shader->isVFaceDeclared())
53                         {
54                                 Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area));
55                                 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
56
57                                 vFace.x = face;
58                                 vFace.y = face;
59                                 vFace.z = face;
60                                 vFace.w = face;
61                         }
62                 }
63         }
64
65         void PixelProgram::applyShader(Int cMask[4])
66         {
67                 enableIndex = 0;
68                 stackIndex = 0;
69
70                 if(shader->containsLeaveInstruction())
71                 {
72                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
73                 }
74
75                 for(int i = 0; i < RENDERTARGETS; i++)
76                 {
77                         if(state.targetFormat[i] != FORMAT_NULL)
78                         {
79                                 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f);
80                         }
81                 }
82
83                 // Create all call site return blocks up front
84                 for(size_t i = 0; i < shader->getLength(); i++)
85                 {
86                         const Shader::Instruction *instruction = shader->getInstruction(i);
87                         Shader::Opcode opcode = instruction->opcode;
88
89                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
90                         {
91                                 const Dst &dst = instruction->dst;
92
93                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
94                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
95                         }
96                 }
97
98                 bool broadcastColor0 = true;
99
100                 for(size_t i = 0; i < shader->getLength(); i++)
101                 {
102                         const Shader::Instruction *instruction = shader->getInstruction(i);
103                         Shader::Opcode opcode = instruction->opcode;
104
105                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
106                         {
107                                 continue;
108                         }
109
110                         const Dst &dst = instruction->dst;
111                         const Src &src0 = instruction->src[0];
112                         const Src &src1 = instruction->src[1];
113                         const Src &src2 = instruction->src[2];
114                         const Src &src3 = instruction->src[3];
115                         const Src &src4 = instruction->src[4];
116
117                         bool predicate = instruction->predicate;
118                         Control control = instruction->control;
119                         bool pp = dst.partialPrecision;
120                         bool project = instruction->project;
121                         bool bias = instruction->bias;
122
123                         Vector4f d;
124                         Vector4f s0;
125                         Vector4f s1;
126                         Vector4f s2;
127                         Vector4f s3;
128                         Vector4f s4;
129
130                         if(opcode == Shader::OPCODE_TEXKILL)   // Takes destination as input
131                         {
132                                 if(dst.type == Shader::PARAMETER_TEXTURE)
133                                 {
134                                         d.x = v[2 + dst.index].x;
135                                         d.y = v[2 + dst.index].y;
136                                         d.z = v[2 + dst.index].z;
137                                         d.w = v[2 + dst.index].w;
138                                 }
139                                 else
140                                 {
141                                         d = r[dst.index];
142                                 }
143                         }
144
145                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
146                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
147                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
148                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
149                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
150
151                         switch(opcode)
152                         {
153                         case Shader::OPCODE_PS_2_0:                                                    break;
154                         case Shader::OPCODE_PS_2_x:                                                    break;
155                         case Shader::OPCODE_PS_3_0:                                                    break;
156                         case Shader::OPCODE_DEF:                                                       break;
157                         case Shader::OPCODE_DCL:                                                       break;
158                         case Shader::OPCODE_NOP:                                                       break;
159                         case Shader::OPCODE_MOV:        mov(d, s0);                                    break;
160                         case Shader::OPCODE_NEG:        neg(d, s0);                                    break;
161                         case Shader::OPCODE_INEG:       ineg(d, s0);                                   break;
162                         case Shader::OPCODE_F2B:        f2b(d, s0);                                    break;
163                         case Shader::OPCODE_B2F:        b2f(d, s0);                                    break;
164                         case Shader::OPCODE_F2I:        f2i(d, s0);                                    break;
165                         case Shader::OPCODE_I2F:        i2f(d, s0);                                    break;
166                         case Shader::OPCODE_F2U:        f2u(d, s0);                                    break;
167                         case Shader::OPCODE_U2F:        u2f(d, s0);                                    break;
168                         case Shader::OPCODE_I2B:        i2b(d, s0);                                    break;
169                         case Shader::OPCODE_B2I:        b2i(d, s0);                                    break;
170                         case Shader::OPCODE_ADD:        add(d, s0, s1);                                break;
171                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                               break;
172                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                                break;
173                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                               break;
174                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                                break;
175                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                               break;
176                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);                            break;
177                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);                           break;
178                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                                break;
179                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                                break;
180                         case Shader::OPCODE_DP2ADD:     dp2add(d, s0, s1, s2);                         break;
181                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                                break;
182                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                                break;
183                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                               break;
184                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);                           break;
185                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);                       break;
186                         case Shader::OPCODE_CMP0:       cmp0(d, s0, s1, s2);                           break;
187                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);                      break;
188                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);                      break;
189                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);                         break;
190                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                        break;
191                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);                     break;
192                         case Shader::OPCODE_FRC:        frc(d, s0);                                    break;
193                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                                  break;
194                         case Shader::OPCODE_FLOOR:      floor(d, s0);                                  break;
195                         case Shader::OPCODE_ROUND:      round(d, s0);                                  break;
196                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);                              break;
197                         case Shader::OPCODE_CEIL:       ceil(d, s0);                                   break;
198                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);                              break;
199                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                               break;
200                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);                              break;
201                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                               break;
202                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                                break;
203                         case Shader::OPCODE_LOG:        log(d, s0, pp);                                break;
204                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                               break;
205                         case Shader::OPCODE_DIV:        div(d, s0, s1);                                break;
206                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                               break;
207                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                               break;
208                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                                break;
209                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                               break;
210                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                               break;
211                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                                break;
212                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                               break;
213                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                               break;
214                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                               break;
215                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                               break;
216                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                                break;
217                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);                             break;
218                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);                             break;
219                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);                             break;
220                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);                        break;
221                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);                        break;
222                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);                        break;
223                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);                        break;
224                         case Shader::OPCODE_MIN:        min(d, s0, s1);                                break;
225                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                               break;
226                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                               break;
227                         case Shader::OPCODE_MAX:        max(d, s0, s1);                                break;
228                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                               break;
229                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                               break;
230                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);                            break;
231                         case Shader::OPCODE_STEP:       step(d, s0, s1);                               break;
232                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);                         break;
233                         case Shader::OPCODE_ISINF:      isinf(d, s0);                                  break;
234                         case Shader::OPCODE_ISNAN:      isnan(d, s0);                                  break;
235                         case Shader::OPCODE_FLOATBITSTOINT:
236                         case Shader::OPCODE_FLOATBITSTOUINT:
237                         case Shader::OPCODE_INTBITSTOFLOAT:
238                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                                   break;
239                         case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);                     break;
240                         case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);                     break;
241                         case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);                      break;
242                         case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);                   break;
243                         case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);                   break;
244                         case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);                    break;
245                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);                           break;
246                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);                            break;
247                         case Shader::OPCODE_SGN:        sgn(d, s0);                                    break;
248                         case Shader::OPCODE_ISGN:       isgn(d, s0);                                   break;
249                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                                break;
250                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                       break;
251                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                       break;
252                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                       break;
253                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                       break;
254                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                           break;
255                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                           break;
256                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                           break;
257                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                           break;
258                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);                     break;
259                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);                     break;
260                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);                     break;
261                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);                     break;
262                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                               break;
263                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                               break;
264                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                               break;
265                         case Shader::OPCODE_ABS:        abs(d, s0);                                    break;
266                         case Shader::OPCODE_IABS:       iabs(d, s0);                                   break;
267                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);                             break;
268                         case Shader::OPCODE_COS:        cos(d, s0, pp);                                break;
269                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                                break;
270                         case Shader::OPCODE_TAN:        tan(d, s0, pp);                                break;
271                         case Shader::OPCODE_ACOS:       acos(d, s0, pp);                               break;
272                         case Shader::OPCODE_ASIN:       asin(d, s0, pp);                               break;
273                         case Shader::OPCODE_ATAN:       atan(d, s0, pp);                               break;
274                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1, pp);                          break;
275                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                               break;
276                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                               break;
277                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                               break;
278                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);                              break;
279                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);                              break;
280                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);                              break;
281                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);                             break;
282                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);                             break;
283                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);                             break;
284                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);                             break;
285                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);                             break;
286                         case Shader::OPCODE_TEX:        TEX(d, s0, src1, project, bias);               break;
287                         case Shader::OPCODE_TEXLDD:     TEXGRAD(d, s0, src1, s2, s3);                  break;
288                         case Shader::OPCODE_TEXLDL:     TEXLOD(d, s0, src1, s0.w);                     break;
289                         case Shader::OPCODE_TEXLOD:     TEXLOD(d, s0, src1, s2.x);                     break;
290                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);                        break;
291                         case Shader::OPCODE_TEXKILL:    TEXKILL(cMask, d, dst.mask);                   break;
292                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);                    break;
293                         case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x);         break;
294                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x);                 break;
295                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break;
296                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);                  break;
297                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4);     break;
298                         case Shader::OPCODE_TEXBIAS:    TEXBIAS(d, s0, src1, s2.x);                    break;
299                         case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x);       break;
300                         case Shader::OPCODE_DISCARD:    DISCARD(cMask, instruction);                   break;
301                         case Shader::OPCODE_DFDX:       DFDX(d, s0);                                   break;
302                         case Shader::OPCODE_DFDY:       DFDY(d, s0);                                   break;
303                         case Shader::OPCODE_FWIDTH:     FWIDTH(d, s0);                                 break;
304                         case Shader::OPCODE_BREAK:      BREAK();                                       break;
305                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);                       break;
306                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                                  break;
307                         case Shader::OPCODE_CONTINUE:   CONTINUE();                                    break;
308                         case Shader::OPCODE_TEST:       TEST();                                        break;
309                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);                 break;
310                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0);         break;
311                         case Shader::OPCODE_ELSE:       ELSE();                                        break;
312                         case Shader::OPCODE_ENDIF:      ENDIF();                                       break;
313                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                                     break;
314                         case Shader::OPCODE_ENDREP:     ENDREP();                                      break;
315                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                                    break;
316                         case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                                   break;
317                         case Shader::OPCODE_IF:         IF(src0);                                      break;
318                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);                          break;
319                         case Shader::OPCODE_LABEL:      LABEL(dst.index);                              break;
320                         case Shader::OPCODE_LOOP:       LOOP(src1);                                    break;
321                         case Shader::OPCODE_REP:        REP(src0);                                     break;
322                         case Shader::OPCODE_WHILE:      WHILE(src0);                                   break;
323                         case Shader::OPCODE_SWITCH:     SWITCH();                                      break;
324                         case Shader::OPCODE_RET:        RET();                                         break;
325                         case Shader::OPCODE_LEAVE:      LEAVE();                                       break;
326                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);                       break;
327                         case Shader::OPCODE_ALL:        all(d.x, s0);                                  break;
328                         case Shader::OPCODE_ANY:        any(d.x, s0);                                  break;
329                         case Shader::OPCODE_NOT:        bitwise_not(d, s0);                            break;
330                         case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);                         break;
331                         case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);                        break;
332                         case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);                        break;
333                         case Shader::OPCODE_EQ:         equal(d, s0, s1);                              break;
334                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);                           break;
335                         case Shader::OPCODE_END:                                                       break;
336                         default:
337                                 ASSERT(false);
338                         }
339
340                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
341                         {
342                                 if(dst.saturate)
343                                 {
344                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
345                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
346                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
347                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
348
349                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
350                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
351                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
352                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
353                                 }
354
355                                 if(instruction->isPredicated())
356                                 {
357                                         Vector4f pDst;   // FIXME: Rename
358
359                                         switch(dst.type)
360                                         {
361                                         case Shader::PARAMETER_TEMP:
362                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
363                                                 {
364                                                         if(dst.x) pDst.x = r[dst.index].x;
365                                                         if(dst.y) pDst.y = r[dst.index].y;
366                                                         if(dst.z) pDst.z = r[dst.index].z;
367                                                         if(dst.w) pDst.w = r[dst.index].w;
368                                                 }
369                                                 else
370                                                 {
371                                                         Int a = relativeAddress(dst);
372
373                                                         if(dst.x) pDst.x = r[dst.index + a].x;
374                                                         if(dst.y) pDst.y = r[dst.index + a].y;
375                                                         if(dst.z) pDst.z = r[dst.index + a].z;
376                                                         if(dst.w) pDst.w = r[dst.index + a].w;
377                                                 }
378                                                 break;
379                                         case Shader::PARAMETER_COLOROUT:
380                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
381                                                 {
382                                                         if(dst.x) pDst.x = oC[dst.index].x;
383                                                         if(dst.y) pDst.y = oC[dst.index].y;
384                                                         if(dst.z) pDst.z = oC[dst.index].z;
385                                                         if(dst.w) pDst.w = oC[dst.index].w;
386                                                 }
387                                                 else
388                                                 {
389                                                         Int a = relativeAddress(dst) + dst.index;
390
391                                                         if(dst.x) pDst.x = oC[a].x;
392                                                         if(dst.y) pDst.y = oC[a].y;
393                                                         if(dst.z) pDst.z = oC[a].z;
394                                                         if(dst.w) pDst.w = oC[a].w;
395                                                 }
396                                                 break;
397                                         case Shader::PARAMETER_PREDICATE:
398                                                 if(dst.x) pDst.x = p0.x;
399                                                 if(dst.y) pDst.y = p0.y;
400                                                 if(dst.z) pDst.z = p0.z;
401                                                 if(dst.w) pDst.w = p0.w;
402                                                 break;
403                                         case Shader::PARAMETER_DEPTHOUT:
404                                                 pDst.x = oDepth;
405                                                 break;
406                                         default:
407                                                 ASSERT(false);
408                                         }
409
410                                         Int4 enable = enableMask(instruction);
411
412                                         Int4 xEnable = enable;
413                                         Int4 yEnable = enable;
414                                         Int4 zEnable = enable;
415                                         Int4 wEnable = enable;
416
417                                         if(predicate)
418                                         {
419                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
420
421                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
422                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
423                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
424                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
425
426                                                 if(!instruction->predicateNot)
427                                                 {
428                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
429                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
430                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
431                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
432                                                 }
433                                                 else
434                                                 {
435                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
436                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
437                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
438                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
439                                                 }
440                                         }
441
442                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
443                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
444                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
445                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
446
447                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
448                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
449                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
450                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
451                                 }
452
453                                 switch(dst.type)
454                                 {
455                                 case Shader::PARAMETER_TEMP:
456                                         if(dst.rel.type == Shader::PARAMETER_VOID)
457                                         {
458                                                 if(dst.x) r[dst.index].x = d.x;
459                                                 if(dst.y) r[dst.index].y = d.y;
460                                                 if(dst.z) r[dst.index].z = d.z;
461                                                 if(dst.w) r[dst.index].w = d.w;
462                                         }
463                                         else
464                                         {
465                                                 Int a = relativeAddress(dst);
466
467                                                 if(dst.x) r[dst.index + a].x = d.x;
468                                                 if(dst.y) r[dst.index + a].y = d.y;
469                                                 if(dst.z) r[dst.index + a].z = d.z;
470                                                 if(dst.w) r[dst.index + a].w = d.w;
471                                         }
472                                         break;
473                                 case Shader::PARAMETER_COLOROUT:
474                                         if(dst.rel.type == Shader::PARAMETER_VOID)
475                                         {
476                                                 broadcastColor0 = (dst.index == 0) && broadcastColor0;
477
478                                                 if(dst.x) { oC[dst.index].x = d.x; }
479                                                 if(dst.y) { oC[dst.index].y = d.y; }
480                                                 if(dst.z) { oC[dst.index].z = d.z; }
481                                                 if(dst.w) { oC[dst.index].w = d.w; }
482                                         }
483                                         else
484                                         {
485                                                 broadcastColor0 = false;
486                                                 Int a = relativeAddress(dst) + dst.index;
487
488                                                 if(dst.x) { oC[a].x = d.x; }
489                                                 if(dst.y) { oC[a].y = d.y; }
490                                                 if(dst.z) { oC[a].z = d.z; }
491                                                 if(dst.w) { oC[a].w = d.w; }
492                                         }
493                                         break;
494                                 case Shader::PARAMETER_PREDICATE:
495                                         if(dst.x) p0.x = d.x;
496                                         if(dst.y) p0.y = d.y;
497                                         if(dst.z) p0.z = d.z;
498                                         if(dst.w) p0.w = d.w;
499                                         break;
500                                 case Shader::PARAMETER_DEPTHOUT:
501                                         oDepth = d.x;
502                                         break;
503                                 default:
504                                         ASSERT(false);
505                                 }
506                         }
507                 }
508
509                 if(currentLabel != -1)
510                 {
511                         Nucleus::setInsertBlock(returnBlock);
512                 }
513
514                 if(broadcastColor0)
515                 {
516                         for(int i = 0; i < RENDERTARGETS; i++)
517                         {
518                                 c[i] = oC[0];
519                         }
520                 }
521                 else
522                 {
523                         for(int i = 0; i < RENDERTARGETS; i++)
524                         {
525                                 c[i] = oC[i];
526                         }
527                 }
528
529                 clampColor(c);
530
531                 if(state.depthOverride)
532                 {
533                         oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f));
534                 }
535         }
536
537         Bool PixelProgram::alphaTest(Int cMask[4])
538         {
539                 if(!state.alphaTestActive())
540                 {
541                         return true;
542                 }
543
544                 Int aMask;
545
546                 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
547                 {
548                         Short4 alpha = RoundShort4(c[0].w * Float4(0x1000));
549
550                         PixelRoutine::alphaTest(aMask, alpha);
551
552                         for(unsigned int q = 0; q < state.multiSample; q++)
553                         {
554                                 cMask[q] &= aMask;
555                         }
556                 }
557                 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
558                 {
559                         alphaToCoverage(cMask, c[0].w);
560                 }
561                 else ASSERT(false);
562
563                 Int pass = cMask[0];
564
565                 for(unsigned int q = 1; q < state.multiSample; q++)
566                 {
567                         pass = pass | cMask[q];
568                 }
569
570                 return pass != 0x0;
571         }
572
573         void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
574         {
575                 for(int index = 0; index < RENDERTARGETS; index++)
576                 {
577                         if(!state.colorWriteActive(index))
578                         {
579                                 continue;
580                         }
581
582                         if(!postBlendSRGB && state.writeSRGB && !isSRGB(index))
583                         {
584                                 c[index].x = linearToSRGB(c[index].x);
585                                 c[index].y = linearToSRGB(c[index].y);
586                                 c[index].z = linearToSRGB(c[index].z);
587                         }
588
589                         if(index == 0)
590                         {
591                                 fogBlend(c[index], fog);
592                         }
593
594                         switch(state.targetFormat[index])
595                         {
596                         case FORMAT_R5G6B5:
597                         case FORMAT_X8R8G8B8:
598                         case FORMAT_X8B8G8R8:
599                         case FORMAT_A8R8G8B8:
600                         case FORMAT_A8B8G8R8:
601                         case FORMAT_SRGB8_X8:
602                         case FORMAT_SRGB8_A8:
603                         case FORMAT_G8R8:
604                         case FORMAT_R8:
605                         case FORMAT_A8:
606                         case FORMAT_G16R16:
607                         case FORMAT_A16B16G16R16:
608                                 for(unsigned int q = 0; q < state.multiSample; q++)
609                                 {
610                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
611                                         Vector4s color;
612
613                                         if(state.targetFormat[index] == FORMAT_R5G6B5)
614                                         {
615                                                 color.x = UShort4(c[index].x * Float4(0xFBFF), false);
616                                                 color.y = UShort4(c[index].y * Float4(0xFDFF), false);
617                                                 color.z = UShort4(c[index].z * Float4(0xFBFF), false);
618                                                 color.w = UShort4(c[index].w * Float4(0xFFFF), false);
619                                         }
620                                         else
621                                         {
622                                                 color.x = convertFixed16(c[index].x, false);
623                                                 color.y = convertFixed16(c[index].y, false);
624                                                 color.z = convertFixed16(c[index].z, false);
625                                                 color.w = convertFixed16(c[index].w, false);
626                                         }
627
628                                         if(state.multiSampleMask & (1 << q))
629                                         {
630                                                 alphaBlend(index, buffer, color, x);
631                                                 logicOperation(index, buffer, color, x);
632                                                 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
633                                         }
634                                 }
635                                 break;
636                         case FORMAT_R32F:
637                         case FORMAT_G32R32F:
638                         case FORMAT_X32B32G32R32F:
639                         case FORMAT_A32B32G32R32F:
640                         case FORMAT_X32B32G32R32F_UNSIGNED:
641                         case FORMAT_R32I:
642                         case FORMAT_G32R32I:
643                         case FORMAT_A32B32G32R32I:
644                         case FORMAT_R32UI:
645                         case FORMAT_G32R32UI:
646                         case FORMAT_A32B32G32R32UI:
647                         case FORMAT_R16I:
648                         case FORMAT_G16R16I:
649                         case FORMAT_A16B16G16R16I:
650                         case FORMAT_R16UI:
651                         case FORMAT_G16R16UI:
652                         case FORMAT_A16B16G16R16UI:
653                         case FORMAT_R8I:
654                         case FORMAT_G8R8I:
655                         case FORMAT_A8B8G8R8I:
656                         case FORMAT_R8UI:
657                         case FORMAT_G8R8UI:
658                         case FORMAT_A8B8G8R8UI:
659                                 for(unsigned int q = 0; q < state.multiSample; q++)
660                                 {
661                                         Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
662                                         Vector4f color = c[index];
663
664                                         if(state.multiSampleMask & (1 << q))
665                                         {
666                                                 alphaBlend(index, buffer, color, x);
667                                                 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
668                                         }
669                                 }
670                                 break;
671                         default:
672                                 ASSERT(false);
673                         }
674                 }
675         }
676
677         Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
678         {
679                 Vector4f tmp;
680
681                 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
682                 {
683                         tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function);
684                 }
685                 else
686                 {
687                         Int index = As<Int>(Float(fetchRegister(sampler).x.x));
688
689                         for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
690                         {
691                                 if(shader->usesSampler(i))
692                                 {
693                                         If(index == i)
694                                         {
695                                                 tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function);
696                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
697                                         }
698                                 }
699                         }
700                 }
701
702                 Vector4f c;
703                 c.x = tmp[(sampler.swizzle >> 0) & 0x3];
704                 c.y = tmp[(sampler.swizzle >> 2) & 0x3];
705                 c.z = tmp[(sampler.swizzle >> 4) & 0x3];
706                 c.w = tmp[(sampler.swizzle >> 6) & 0x3];
707
708                 return c;
709         }
710
711         Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
712         {
713                 #if PERF_PROFILE
714                         Long texTime = Ticks();
715                 #endif
716
717                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture);
718                 Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function);
719
720                 #if PERF_PROFILE
721                         cycles[PERF_TEX] += Ticks() - texTime;
722                 #endif
723
724                 return c;
725         }
726
727         void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
728         {
729                 for(int index = 0; index < RENDERTARGETS; index++)
730                 {
731                         if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
732                         {
733                                 continue;
734                         }
735
736                         switch(state.targetFormat[index])
737                         {
738                         case FORMAT_NULL:
739                                 break;
740                         case FORMAT_R5G6B5:
741                         case FORMAT_A8R8G8B8:
742                         case FORMAT_A8B8G8R8:
743                         case FORMAT_X8R8G8B8:
744                         case FORMAT_X8B8G8R8:
745                         case FORMAT_SRGB8_X8:
746                         case FORMAT_SRGB8_A8:
747                         case FORMAT_G8R8:
748                         case FORMAT_R8:
749                         case FORMAT_A8:
750                         case FORMAT_G16R16:
751                         case FORMAT_A16B16G16R16:
752                                 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
753                                 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
754                                 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
755                                 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
756                                 break;
757                         case FORMAT_R32F:
758                         case FORMAT_G32R32F:
759                         case FORMAT_X32B32G32R32F:
760                         case FORMAT_A32B32G32R32F:
761                         case FORMAT_R32I:
762                         case FORMAT_G32R32I:
763                         case FORMAT_A32B32G32R32I:
764                         case FORMAT_R32UI:
765                         case FORMAT_G32R32UI:
766                         case FORMAT_A32B32G32R32UI:
767                         case FORMAT_R16I:
768                         case FORMAT_G16R16I:
769                         case FORMAT_A16B16G16R16I:
770                         case FORMAT_R16UI:
771                         case FORMAT_G16R16UI:
772                         case FORMAT_A16B16G16R16UI:
773                         case FORMAT_R8I:
774                         case FORMAT_G8R8I:
775                         case FORMAT_A8B8G8R8I:
776                         case FORMAT_R8UI:
777                         case FORMAT_G8R8UI:
778                         case FORMAT_A8B8G8R8UI:
779                                 break;
780                         case FORMAT_X32B32G32R32F_UNSIGNED:
781                                 oC[index].x = Max(oC[index].x, Float4(0.0f));
782                                 oC[index].y = Max(oC[index].y, Float4(0.0f));
783                                 oC[index].z = Max(oC[index].z, Float4(0.0f));
784                                 oC[index].w = Max(oC[index].w, Float4(0.0f));
785                                 break;
786                         default:
787                                 ASSERT(false);
788                         }
789                 }
790         }
791
792         Int4 PixelProgram::enableMask(const Shader::Instruction *instruction)
793         {
794                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
795
796                 if(!whileTest)
797                 {
798                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
799                         {
800                                 enable &= enableBreak;
801                         }
802
803                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
804                         {
805                                 enable &= enableContinue;
806                         }
807
808                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
809                         {
810                                 enable &= enableLeave;
811                         }
812                 }
813
814                 return enable;
815         }
816
817         Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset)
818         {
819                 Vector4f reg;
820                 unsigned int i = src.index + offset;
821
822                 switch(src.type)
823                 {
824                 case Shader::PARAMETER_TEMP:
825                         if(src.rel.type == Shader::PARAMETER_VOID)
826                         {
827                                 reg = r[i];
828                         }
829                         else
830                         {
831                                 Int a = relativeAddress(src, src.bufferIndex);
832
833                                 reg = r[i + a];
834                         }
835                         break;
836                 case Shader::PARAMETER_INPUT:
837                         {
838                                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
839                                 {
840                                         reg = v[i];
841                                 }
842                                 else
843                                 {
844                                         Int a = relativeAddress(src, src.bufferIndex);
845
846                                         reg = v[i + a];
847                                 }
848                         }
849                         break;
850                 case Shader::PARAMETER_CONST:
851                         reg = readConstant(src, offset);
852                         break;
853                 case Shader::PARAMETER_TEXTURE:
854                         reg = v[2 + i];
855                         break;
856                 case Shader::PARAMETER_MISCTYPE:
857                         if(src.index == Shader::VPosIndex) reg = vPos;
858                         if(src.index == Shader::VFaceIndex) reg = vFace;
859                         break;
860                 case Shader::PARAMETER_SAMPLER:
861                         if(src.rel.type == Shader::PARAMETER_VOID)
862                         {
863                                 reg.x = As<Float4>(Int4(i));
864                         }
865                         else if(src.rel.type == Shader::PARAMETER_TEMP)
866                         {
867                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
868                         }
869                         return reg;
870                 case Shader::PARAMETER_PREDICATE:   return reg; // Dummy
871                 case Shader::PARAMETER_VOID:        return reg; // Dummy
872                 case Shader::PARAMETER_FLOAT4LITERAL:
873                         reg.x = Float4(src.value[0]);
874                         reg.y = Float4(src.value[1]);
875                         reg.z = Float4(src.value[2]);
876                         reg.w = Float4(src.value[3]);
877                         break;
878                 case Shader::PARAMETER_CONSTINT:    return reg; // Dummy
879                 case Shader::PARAMETER_CONSTBOOL:   return reg; // Dummy
880                 case Shader::PARAMETER_LOOP:        return reg; // Dummy
881                 case Shader::PARAMETER_COLOROUT:
882                         if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
883                         {
884                                 reg = oC[i];
885                         }
886                         else
887                         {
888                                 Int a = relativeAddress(src, src.bufferIndex);
889
890                                 reg = oC[i + a];
891                         }
892                         break;
893                 case Shader::PARAMETER_DEPTHOUT:
894                         reg.x = oDepth;
895                         break;
896                 default:
897                         ASSERT(false);
898                 }
899
900                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
901                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
902                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
903                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
904
905                 Vector4f mod;
906
907                 switch(src.modifier)
908                 {
909                 case Shader::MODIFIER_NONE:
910                         mod.x = x;
911                         mod.y = y;
912                         mod.z = z;
913                         mod.w = w;
914                         break;
915                 case Shader::MODIFIER_NEGATE:
916                         mod.x = -x;
917                         mod.y = -y;
918                         mod.z = -z;
919                         mod.w = -w;
920                         break;
921                 case Shader::MODIFIER_ABS:
922                         mod.x = Abs(x);
923                         mod.y = Abs(y);
924                         mod.z = Abs(z);
925                         mod.w = Abs(w);
926                         break;
927                 case Shader::MODIFIER_ABS_NEGATE:
928                         mod.x = -Abs(x);
929                         mod.y = -Abs(y);
930                         mod.z = -Abs(z);
931                         mod.w = -Abs(w);
932                         break;
933                 case Shader::MODIFIER_NOT:
934                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
935                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
936                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
937                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
938                         break;
939                 default:
940                         ASSERT(false);
941                 }
942
943                 return mod;
944         }
945
946         RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index)
947         {
948                 if(bufferIndex == -1)
949                 {
950                         return data + OFFSET(DrawData, ps.c[index]);
951                 }
952                 else
953                 {
954                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index;
955                 }
956         }
957
958         RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
959         {
960                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
961         }
962
963         Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset)
964         {
965                 Vector4f c;
966                 unsigned int i = src.index + offset;
967
968                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
969                 {
970                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
971
972                         c.x = c.x.xxxx;
973                         c.y = c.y.yyyy;
974                         c.z = c.z.zzzz;
975                         c.w = c.w.wwww;
976
977                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
978                         {
979                                 for(size_t j = 0; j < shader->getLength(); j++)
980                                 {
981                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
982
983                                         if(instruction.opcode == Shader::OPCODE_DEF)
984                                         {
985                                                 if(instruction.dst.index == i)
986                                                 {
987                                                         c.x = Float4(instruction.src[0].value[0]);
988                                                         c.y = Float4(instruction.src[0].value[1]);
989                                                         c.z = Float4(instruction.src[0].value[2]);
990                                                         c.w = Float4(instruction.src[0].value[3]);
991
992                                                         break;
993                                                 }
994                                         }
995                                 }
996                         }
997                 }
998                 else if(src.rel.type == Shader::PARAMETER_LOOP)
999                 {
1000                         Int loopCounter = aL[loopDepth];
1001
1002                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
1003
1004                         c.x = c.x.xxxx;
1005                         c.y = c.y.yyyy;
1006                         c.z = c.z.zzzz;
1007                         c.w = c.w.wwww;
1008                 }
1009                 else
1010                 {
1011                         Int a = relativeAddress(src, src.bufferIndex);
1012
1013                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
1014
1015                         c.x = c.x.xxxx;
1016                         c.y = c.y.yyyy;
1017                         c.z = c.z.zzzz;
1018                         c.w = c.w.wwww;
1019                 }
1020
1021                 return c;
1022         }
1023
1024         Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
1025         {
1026                 ASSERT(var.rel.deterministic);
1027
1028                 if(var.rel.type == Shader::PARAMETER_TEMP)
1029                 {
1030                         return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
1031                 }
1032                 else if(var.rel.type == Shader::PARAMETER_INPUT)
1033                 {
1034                         return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
1035                 }
1036                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
1037                 {
1038                         return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale;
1039                 }
1040                 else if(var.rel.type == Shader::PARAMETER_CONST)
1041                 {
1042                         return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
1043                 }
1044                 else if(var.rel.type == Shader::PARAMETER_LOOP)
1045                 {
1046                         return aL[loopDepth];
1047                 }
1048                 else ASSERT(false);
1049
1050                 return 0;
1051         }
1052
1053         Float4 PixelProgram::linearToSRGB(const Float4 &x)   // Approximates x^(1.0/2.2)
1054         {
1055                 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
1056                 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
1057
1058                 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
1059         }
1060
1061         void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1)
1062         {
1063                 Vector4f row0 = fetchRegister(src1, 0);
1064                 Vector4f row1 = fetchRegister(src1, 1);
1065
1066                 dst.x = dot3(src0, row0);
1067                 dst.y = dot3(src0, row1);
1068         }
1069
1070         void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1071         {
1072                 Vector4f row0 = fetchRegister(src1, 0);
1073                 Vector4f row1 = fetchRegister(src1, 1);
1074                 Vector4f row2 = fetchRegister(src1, 2);
1075
1076                 dst.x = dot3(src0, row0);
1077                 dst.y = dot3(src0, row1);
1078                 dst.z = dot3(src0, row2);
1079         }
1080
1081         void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1082         {
1083                 Vector4f row0 = fetchRegister(src1, 0);
1084                 Vector4f row1 = fetchRegister(src1, 1);
1085                 Vector4f row2 = fetchRegister(src1, 2);
1086                 Vector4f row3 = fetchRegister(src1, 3);
1087
1088                 dst.x = dot3(src0, row0);
1089                 dst.y = dot3(src0, row1);
1090                 dst.z = dot3(src0, row2);
1091                 dst.w = dot3(src0, row3);
1092         }
1093
1094         void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1)
1095         {
1096                 Vector4f row0 = fetchRegister(src1, 0);
1097                 Vector4f row1 = fetchRegister(src1, 1);
1098                 Vector4f row2 = fetchRegister(src1, 2);
1099
1100                 dst.x = dot4(src0, row0);
1101                 dst.y = dot4(src0, row1);
1102                 dst.z = dot4(src0, row2);
1103         }
1104
1105         void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1)
1106         {
1107                 Vector4f row0 = fetchRegister(src1, 0);
1108                 Vector4f row1 = fetchRegister(src1, 1);
1109                 Vector4f row2 = fetchRegister(src1, 2);
1110                 Vector4f row3 = fetchRegister(src1, 3);
1111
1112                 dst.x = dot4(src0, row0);
1113                 dst.y = dot4(src0, row1);
1114                 dst.z = dot4(src0, row2);
1115                 dst.w = dot4(src0, row3);
1116         }
1117
1118         void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
1119         {
1120                 if(project)
1121                 {
1122                         Vector4f proj;
1123                         Float4 rw = reciprocal(src0.w);
1124                         proj.x = src0.x * rw;
1125                         proj.y = src0.y * rw;
1126                         proj.z = src0.z * rw;
1127
1128                         dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit);
1129                 }
1130                 else
1131                 {
1132                         dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit);
1133                 }
1134         }
1135
1136         void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset)
1137         {
1138                 dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset});
1139         }
1140
1141         void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod)
1142         {
1143                 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset});
1144         }
1145
1146         void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias)
1147         {
1148                 dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias);
1149         }
1150
1151         void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias)
1152         {
1153                 dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset});
1154         }
1155
1156         void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1157         {
1158                 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch);
1159         }
1160
1161         void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1162         {
1163                 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset});
1164         }
1165
1166         void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy)
1167         {
1168                 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad);
1169         }
1170
1171         void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset)
1172         {
1173                 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset});
1174         }
1175
1176         void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod)
1177         {
1178                 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod);
1179         }
1180
1181         void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1182         {
1183                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture);
1184                 dst = SamplerCore::textureSize(texture, lod);
1185         }
1186
1187         void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
1188         {
1189                 Int kill = -1;
1190
1191                 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
1192                 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
1193                 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
1194                 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
1195
1196                 // FIXME: Dynamic branching affects TEXKILL?
1197                 //      if(shader->containsDynamicBranching())
1198                 //      {
1199                 //              kill = ~SignMask(enableMask());
1200                 //      }
1201
1202                 for(unsigned int q = 0; q < state.multiSample; q++)
1203                 {
1204                         cMask[q] &= kill;
1205                 }
1206
1207                 // FIXME: Branch to end of shader if all killed?
1208         }
1209
1210         void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction)
1211         {
1212                 Int kill = 0;
1213
1214                 if(shader->containsDynamicBranching())
1215                 {
1216                         kill = ~SignMask(enableMask(instruction));
1217                 }
1218
1219                 for(unsigned int q = 0; q < state.multiSample; q++)
1220                 {
1221                         cMask[q] &= kill;
1222                 }
1223
1224                 // FIXME: Branch to end of shader if all killed?
1225         }
1226
1227         void PixelProgram::DFDX(Vector4f &dst, Vector4f &src)
1228         {
1229                 dst.x = src.x.yyww - src.x.xxzz;
1230                 dst.y = src.y.yyww - src.y.xxzz;
1231                 dst.z = src.z.yyww - src.z.xxzz;
1232                 dst.w = src.w.yyww - src.w.xxzz;
1233         }
1234
1235         void PixelProgram::DFDY(Vector4f &dst, Vector4f &src)
1236         {
1237                 dst.x = src.x.zwzw - src.x.xyxy;
1238                 dst.y = src.y.zwzw - src.y.xyxy;
1239                 dst.z = src.z.zwzw - src.z.xyxy;
1240                 dst.w = src.w.zwzw - src.w.xyxy;
1241         }
1242
1243         void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src)
1244         {
1245                 // abs(dFdx(src)) + abs(dFdy(src));
1246                 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
1247                 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
1248                 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
1249                 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
1250         }
1251
1252         void PixelProgram::BREAK()
1253         {
1254                 BasicBlock *deadBlock = Nucleus::createBasicBlock();
1255                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1256
1257                 if(breakDepth == 0)
1258                 {
1259                         enableIndex = enableIndex - breakDepth;
1260                         Nucleus::createBr(endBlock);
1261                 }
1262                 else
1263                 {
1264                         enableBreak = enableBreak & ~enableStack[enableIndex];
1265                         Bool allBreak = SignMask(enableBreak) == 0x0;
1266
1267                         enableIndex = enableIndex - breakDepth;
1268                         branch(allBreak, endBlock, deadBlock);
1269                 }
1270
1271                 Nucleus::setInsertBlock(deadBlock);
1272                 enableIndex = enableIndex + breakDepth;
1273         }
1274
1275         void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1276         {
1277                 Int4 condition;
1278
1279                 switch(control)
1280                 {
1281                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1282                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1283                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1284                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1285                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1286                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1287                 default:
1288                         ASSERT(false);
1289                 }
1290
1291                 BREAK(condition);
1292         }
1293
1294         void PixelProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1295         {
1296                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1297
1298                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1299                 {
1300                         condition = ~condition;
1301                 }
1302
1303                 BREAK(condition);
1304         }
1305
1306         void PixelProgram::BREAK(Int4 &condition)
1307         {
1308                 condition &= enableStack[enableIndex];
1309
1310                 BasicBlock *continueBlock = Nucleus::createBasicBlock();
1311                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1312
1313                 enableBreak = enableBreak & ~condition;
1314                 Bool allBreak = SignMask(enableBreak) == 0x0;
1315
1316                 enableIndex = enableIndex - breakDepth;
1317                 branch(allBreak, endBlock, continueBlock);
1318
1319                 Nucleus::setInsertBlock(continueBlock);
1320                 enableIndex = enableIndex + breakDepth;
1321         }
1322
1323         void PixelProgram::CONTINUE()
1324         {
1325                 enableContinue = enableContinue & ~enableStack[enableIndex];
1326         }
1327
1328         void PixelProgram::TEST()
1329         {
1330                 whileTest = true;
1331         }
1332
1333         void PixelProgram::CALL(int labelIndex, int callSiteIndex)
1334         {
1335                 if(!labelBlock[labelIndex])
1336                 {
1337                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1338                 }
1339
1340                 if(callRetBlock[labelIndex].size() > 1)
1341                 {
1342                         callStack[stackIndex++] = UInt(callSiteIndex);
1343                 }
1344
1345                 Int4 restoreLeave = enableLeave;
1346
1347                 Nucleus::createBr(labelBlock[labelIndex]);
1348                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1349
1350                 enableLeave = restoreLeave;
1351         }
1352
1353         void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1354         {
1355                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1356                 {
1357                         CALLNZb(labelIndex, callSiteIndex, src);
1358                 }
1359                 else if(src.type == Shader::PARAMETER_PREDICATE)
1360                 {
1361                         CALLNZp(labelIndex, callSiteIndex, src);
1362                 }
1363                 else ASSERT(false);
1364         }
1365
1366         void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1367         {
1368                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1369
1370                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1371                 {
1372                         condition = !condition;
1373                 }
1374
1375                 if(!labelBlock[labelIndex])
1376                 {
1377                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1378                 }
1379
1380                 if(callRetBlock[labelIndex].size() > 1)
1381                 {
1382                         callStack[stackIndex++] = UInt(callSiteIndex);
1383                 }
1384
1385                 Int4 restoreLeave = enableLeave;
1386
1387                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1388                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1389
1390                 enableLeave = restoreLeave;
1391         }
1392
1393         void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1394         {
1395                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1396
1397                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1398                 {
1399                         condition = ~condition;
1400                 }
1401
1402                 condition &= enableStack[enableIndex];
1403
1404                 if(!labelBlock[labelIndex])
1405                 {
1406                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1407                 }
1408
1409                 if(callRetBlock[labelIndex].size() > 1)
1410                 {
1411                         callStack[stackIndex++] = UInt(callSiteIndex);
1412                 }
1413
1414                 enableIndex++;
1415                 enableStack[enableIndex] = condition;
1416                 Int4 restoreLeave = enableLeave;
1417
1418                 Bool notAllFalse = SignMask(condition) != 0;
1419                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1420                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1421
1422                 enableIndex--;
1423                 enableLeave = restoreLeave;
1424         }
1425
1426         void PixelProgram::ELSE()
1427         {
1428                 ifDepth--;
1429
1430                 BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1431                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1432
1433                 if(isConditionalIf[ifDepth])
1434                 {
1435                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1436                         Bool notAllFalse = SignMask(condition) != 0;
1437
1438                         branch(notAllFalse, falseBlock, endBlock);
1439
1440                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1441                 }
1442                 else
1443                 {
1444                         Nucleus::createBr(endBlock);
1445                         Nucleus::setInsertBlock(falseBlock);
1446                 }
1447
1448                 ifFalseBlock[ifDepth] = endBlock;
1449
1450                 ifDepth++;
1451         }
1452
1453         void PixelProgram::ENDIF()
1454         {
1455                 ifDepth--;
1456
1457                 BasicBlock *endBlock = ifFalseBlock[ifDepth];
1458
1459                 Nucleus::createBr(endBlock);
1460                 Nucleus::setInsertBlock(endBlock);
1461
1462                 if(isConditionalIf[ifDepth])
1463                 {
1464                         breakDepth--;
1465                         enableIndex--;
1466                 }
1467         }
1468
1469         void PixelProgram::ENDLOOP()
1470         {
1471                 loopRepDepth--;
1472
1473                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1474
1475                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1476                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1477
1478                 Nucleus::createBr(testBlock);
1479                 Nucleus::setInsertBlock(endBlock);
1480
1481                 loopDepth--;
1482                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1483         }
1484
1485         void PixelProgram::ENDREP()
1486         {
1487                 loopRepDepth--;
1488
1489                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1490                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1491
1492                 Nucleus::createBr(testBlock);
1493                 Nucleus::setInsertBlock(endBlock);
1494
1495                 loopDepth--;
1496                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1497         }
1498
1499         void PixelProgram::ENDWHILE()
1500         {
1501                 loopRepDepth--;
1502
1503                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1504                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1505
1506                 Nucleus::createBr(testBlock);
1507                 Nucleus::setInsertBlock(endBlock);
1508
1509                 enableIndex--;
1510                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1511                 whileTest = false;
1512         }
1513
1514         void PixelProgram::ENDSWITCH()
1515         {
1516                 loopRepDepth--;
1517
1518                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1519
1520                 Nucleus::createBr(loopRepEndBlock[loopRepDepth]);
1521                 Nucleus::setInsertBlock(endBlock);
1522
1523                 enableIndex--;
1524                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1525         }
1526
1527         void PixelProgram::IF(const Src &src)
1528         {
1529                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1530                 {
1531                         IFb(src);
1532                 }
1533                 else if(src.type == Shader::PARAMETER_PREDICATE)
1534                 {
1535                         IFp(src);
1536                 }
1537                 else
1538                 {
1539                         Int4 condition = As<Int4>(fetchRegister(src).x);
1540                         IF(condition);
1541                 }
1542         }
1543
1544         void PixelProgram::IFb(const Src &boolRegister)
1545         {
1546                 ASSERT(ifDepth < 24 + 4);
1547
1548                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0));   // FIXME
1549
1550                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1551                 {
1552                         condition = !condition;
1553                 }
1554
1555                 BasicBlock *trueBlock = Nucleus::createBasicBlock();
1556                 BasicBlock *falseBlock = Nucleus::createBasicBlock();
1557
1558                 branch(condition, trueBlock, falseBlock);
1559
1560                 isConditionalIf[ifDepth] = false;
1561                 ifFalseBlock[ifDepth] = falseBlock;
1562
1563                 ifDepth++;
1564         }
1565
1566         void PixelProgram::IFp(const Src &predicateRegister)
1567         {
1568                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1569
1570                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1571                 {
1572                         condition = ~condition;
1573                 }
1574
1575                 IF(condition);
1576         }
1577
1578         void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1579         {
1580                 Int4 condition;
1581
1582                 switch(control)
1583                 {
1584                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1585                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1586                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1587                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1588                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1589                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1590                 default:
1591                         ASSERT(false);
1592                 }
1593
1594                 IF(condition);
1595         }
1596
1597         void PixelProgram::IF(Int4 &condition)
1598         {
1599                 condition &= enableStack[enableIndex];
1600
1601                 enableIndex++;
1602                 enableStack[enableIndex] = condition;
1603
1604                 BasicBlock *trueBlock = Nucleus::createBasicBlock();
1605                 BasicBlock *falseBlock = Nucleus::createBasicBlock();
1606
1607                 Bool notAllFalse = SignMask(condition) != 0;
1608
1609                 branch(notAllFalse, trueBlock, falseBlock);
1610
1611                 isConditionalIf[ifDepth] = true;
1612                 ifFalseBlock[ifDepth] = falseBlock;
1613
1614                 ifDepth++;
1615                 breakDepth++;
1616         }
1617
1618         void PixelProgram::LABEL(int labelIndex)
1619         {
1620                 if(!labelBlock[labelIndex])
1621                 {
1622                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1623                 }
1624
1625                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1626                 currentLabel = labelIndex;
1627         }
1628
1629         void PixelProgram::LOOP(const Src &integerRegister)
1630         {
1631                 loopDepth++;
1632
1633                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1634                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1]));
1635                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2]));
1636
1637                 //      If(increment[loopDepth] == 0)
1638                 //      {
1639                 //              increment[loopDepth] = 1;
1640                 //      }
1641
1642                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1643                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1644                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1645
1646                 loopRepTestBlock[loopRepDepth] = testBlock;
1647                 loopRepEndBlock[loopRepDepth] = endBlock;
1648
1649                 // FIXME: jump(testBlock)
1650                 Nucleus::createBr(testBlock);
1651                 Nucleus::setInsertBlock(testBlock);
1652
1653                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1654                 Nucleus::setInsertBlock(loopBlock);
1655
1656                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1657
1658                 loopRepDepth++;
1659                 breakDepth = 0;
1660         }
1661
1662         void PixelProgram::REP(const Src &integerRegister)
1663         {
1664                 loopDepth++;
1665
1666                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0]));
1667                 aL[loopDepth] = aL[loopDepth - 1];
1668
1669                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1670                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1671                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1672
1673                 loopRepTestBlock[loopRepDepth] = testBlock;
1674                 loopRepEndBlock[loopRepDepth] = endBlock;
1675
1676                 // FIXME: jump(testBlock)
1677                 Nucleus::createBr(testBlock);
1678                 Nucleus::setInsertBlock(testBlock);
1679
1680                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1681                 Nucleus::setInsertBlock(loopBlock);
1682
1683                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1684
1685                 loopRepDepth++;
1686                 breakDepth = 0;
1687         }
1688
1689         void PixelProgram::WHILE(const Src &temporaryRegister)
1690         {
1691                 enableIndex++;
1692
1693                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1694                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1695                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1696
1697                 loopRepTestBlock[loopRepDepth] = testBlock;
1698                 loopRepEndBlock[loopRepDepth] = endBlock;
1699
1700                 Int4 restoreBreak = enableBreak;
1701                 Int4 restoreContinue = enableContinue;
1702
1703                 // FIXME: jump(testBlock)
1704                 Nucleus::createBr(testBlock);
1705                 Nucleus::setInsertBlock(testBlock);
1706                 enableContinue = restoreContinue;
1707
1708                 const Vector4f &src = fetchRegister(temporaryRegister);
1709                 Int4 condition = As<Int4>(src.x);
1710                 condition &= enableStack[enableIndex - 1];
1711                 if(shader->containsLeaveInstruction()) condition &= enableLeave;
1712                 enableStack[enableIndex] = condition;
1713
1714                 Bool notAllFalse = SignMask(condition) != 0;
1715                 branch(notAllFalse, loopBlock, endBlock);
1716
1717                 Nucleus::setInsertBlock(endBlock);
1718                 enableBreak = restoreBreak;
1719
1720                 Nucleus::setInsertBlock(loopBlock);
1721
1722                 loopRepDepth++;
1723                 breakDepth = 0;
1724         }
1725
1726         void PixelProgram::SWITCH()
1727         {
1728                 enableIndex++;
1729                 enableStack[enableIndex] = Int4(0xFFFFFFFF);
1730
1731                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1732
1733                 loopRepTestBlock[loopRepDepth] = nullptr;
1734                 loopRepEndBlock[loopRepDepth] = endBlock;
1735
1736                 loopRepDepth++;
1737                 breakDepth = 0;
1738         }
1739
1740         void PixelProgram::RET()
1741         {
1742                 if(currentLabel == -1)
1743                 {
1744                         returnBlock = Nucleus::createBasicBlock();
1745                         Nucleus::createBr(returnBlock);
1746                 }
1747                 else
1748                 {
1749                         BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1750
1751                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1752                         {
1753                                 // FIXME: Encapsulate
1754                                 UInt index = callStack[--stackIndex];
1755
1756                                 Value *value = index.loadValue();
1757                                 SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1758
1759                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1760                                 {
1761                                         Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
1762                                 }
1763                         }
1764                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1765                         {
1766                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1767                         }
1768                         else   // Function isn't called
1769                         {
1770                                 Nucleus::createBr(unreachableBlock);
1771                         }
1772
1773                         Nucleus::setInsertBlock(unreachableBlock);
1774                         Nucleus::createUnreachable();
1775                 }
1776         }
1777
1778         void PixelProgram::LEAVE()
1779         {
1780                 enableLeave = enableLeave & ~enableStack[enableIndex];
1781
1782                 // FIXME: Return from function if all instances left
1783                 // FIXME: Use enableLeave in other control-flow constructs
1784         }
1785 }