OSDN Git Service

Connecting the dots for some built-in functions
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2013 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "VertexProgram.hpp"
13
14 #include "Renderer.hpp"
15 #include "VertexShader.hpp"
16 #include "Vertex.hpp"
17 #include "Half.hpp"
18 #include "SamplerCore.hpp"
19 #include "Debug.hpp"
20
21 extern bool localShaderConstants;
22
23 namespace sw
24 {
25         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) : VertexRoutine(state, shader)
26         {
27                 ifDepth = 0;
28                 loopRepDepth = 0;
29                 breakDepth = 0;
30                 currentLabel = -1;
31                 whileTest = false;
32
33                 for(int i = 0; i < 2048; i++)
34                 {
35                         labelBlock[i] = 0;
36                 }
37         }
38
39         VertexProgram::~VertexProgram()
40         {
41                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
42                 {
43                         delete sampler[i];
44                 }
45         }
46
47         void VertexProgram::pipeline(Registers &r)
48         {
49                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
50                 {
51                         sampler[i] = new SamplerCore(r.constants, state.samplerState[i]);
52                 }
53
54                 if(!state.preTransformed)
55                 {
56                         program(r);
57                 }
58                 else
59                 {
60                         passThrough(r);
61                 }
62         }
63
64         void VertexProgram::program(Registers &r)
65         {
66         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
67
68                 unsigned short version = shader->getVersion();
69
70                 r.enableIndex = 0;
71                 r.stackIndex = 0;
72
73                 if(shader->containsLeaveInstruction())
74                 {
75                         r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
76                 }
77
78                 // Create all call site return blocks up front
79                 for(size_t i = 0; i < shader->getLength(); i++)
80                 {
81                         const Shader::Instruction *instruction = shader->getInstruction(i);
82                         Shader::Opcode opcode = instruction->opcode;
83
84                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
85                         {
86                                 const Dst &dst = instruction->dst;
87
88                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
89                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
90                         }
91                 }
92         
93                 for(size_t i = 0; i < shader->getLength(); i++)
94                 {
95                         const Shader::Instruction *instruction = shader->getInstruction(i);
96                         Shader::Opcode opcode = instruction->opcode;
97
98                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
99                         {
100                                 continue;
101                         }
102
103                         Dst dst = instruction->dst;
104                         Src src0 = instruction->src[0];
105                         Src src1 = instruction->src[1];
106                         Src src2 = instruction->src[2];
107
108                         bool predicate = instruction->predicate;
109                         int size = shader->size(opcode);
110                         Usage usage = instruction->usage;
111                         unsigned char usageIndex = instruction->usageIndex;
112                         Control control = instruction->control;
113                         bool integer = dst.type == Shader::PARAMETER_ADDR;
114                         bool pp = dst.partialPrecision;
115
116                         Vector4f d;
117                         Vector4f s0;
118                         Vector4f s1;
119                         Vector4f s2;
120
121                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
122                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
123                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
124
125                         switch(opcode)
126                         {
127                         case Shader::OPCODE_VS_1_0:                                                                             break;
128                         case Shader::OPCODE_VS_1_1:                                                                             break;
129                         case Shader::OPCODE_VS_2_0:                                                                             break;
130                         case Shader::OPCODE_VS_2_x:                                                                             break;
131                         case Shader::OPCODE_VS_2_sw:                                                                    break;
132                         case Shader::OPCODE_VS_3_0:                                                                             break;
133                         case Shader::OPCODE_VS_3_sw:                                                                    break;
134                         case Shader::OPCODE_DCL:                                                                                break;
135                         case Shader::OPCODE_DEF:                                                                                break;
136                         case Shader::OPCODE_DEFI:                                                                               break;
137                         case Shader::OPCODE_DEFB:                                                                               break;
138                         case Shader::OPCODE_NOP:                                                                                break;
139                         case Shader::OPCODE_ABS:                abs(d, s0);                                             break;
140                         case Shader::OPCODE_ADD:                add(d, s0, s1);                                 break;
141                         case Shader::OPCODE_CRS:                crs(d, s0, s1);                                 break;
142                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                break;
143                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                break;
144                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                break;
145                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                break;
146                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                    break;
147                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                    break;
148                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                    break;
149                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                    break;
150                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);              break;
151                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);              break;
152                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);              break;
153                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);              break;
154                         case Shader::OPCODE_DP1:                dp1(d, s0, s1);                                 break;
155                         case Shader::OPCODE_DP2:                dp2(d, s0, s1);                                 break;
156                         case Shader::OPCODE_DP3:                dp3(d, s0, s1);                                 break;
157                         case Shader::OPCODE_DP4:                dp4(d, s0, s1);                                 break;
158                         case Shader::OPCODE_ATT:                att(d, s0, s1);                                 break;
159                         case Shader::OPCODE_EXP2X:              exp2x(d, s0, pp);                               break;
160                         case Shader::OPCODE_EXP2:               exp2(d, s0, pp);                                break;
161                         case Shader::OPCODE_EXPP:               expp(d, s0, version);                   break;
162                         case Shader::OPCODE_EXP:                exp(d, s0, pp);                                 break;
163                         case Shader::OPCODE_FRC:                frc(d, s0);                                             break;
164                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
165                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
166                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
167                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
168                         case Shader::OPCODE_LIT:                lit(d, s0);                                             break;
169                         case Shader::OPCODE_LOG2X:              log2x(d, s0, pp);                               break;
170                         case Shader::OPCODE_LOG2:               log2(d, s0, pp);                                break;
171                         case Shader::OPCODE_LOGP:               logp(d, s0, version);                   break;
172                         case Shader::OPCODE_LOG:                log(d, s0, pp);                                 break;
173                         case Shader::OPCODE_LRP:                lrp(d, s0, s1, s2);                             break;
174                         case Shader::OPCODE_STEP:               step(d, s0, s1);                                break;
175                         case Shader::OPCODE_SMOOTH:             smooth(d, s0, s1, s2);                  break;
176                         case Shader::OPCODE_M3X2:               M3X2(r, d, s0, src1);                   break;
177                         case Shader::OPCODE_M3X3:               M3X3(r, d, s0, src1);                   break;
178                         case Shader::OPCODE_M3X4:               M3X4(r, d, s0, src1);                   break;
179                         case Shader::OPCODE_M4X3:               M4X3(r, d, s0, src1);                   break;
180                         case Shader::OPCODE_M4X4:               M4X4(r, d, s0, src1);                   break;
181                         case Shader::OPCODE_MAD:                mad(d, s0, s1, s2);                             break;
182                         case Shader::OPCODE_MAX:                max(d, s0, s1);                                 break;
183                         case Shader::OPCODE_MIN:                min(d, s0, s1);                                 break;
184                         case Shader::OPCODE_MOV:                mov(d, s0, integer);                    break;
185                         case Shader::OPCODE_MOVA:               mov(d, s0);                                             break;
186                         case Shader::OPCODE_F2B:                f2b(d, s0);                                             break;
187                         case Shader::OPCODE_B2F:                b2f(d, s0);                                             break;
188                         case Shader::OPCODE_MUL:                mul(d, s0, s1);                                 break;
189                         case Shader::OPCODE_NRM2:               nrm2(d, s0, pp);                                break;
190                         case Shader::OPCODE_NRM3:               nrm3(d, s0, pp);                                break;
191                         case Shader::OPCODE_NRM4:               nrm4(d, s0, pp);                                break;
192                         case Shader::OPCODE_POWX:               powx(d, s0, s1, pp);                    break;
193                         case Shader::OPCODE_POW:                pow(d, s0, s1, pp);                             break;
194                         case Shader::OPCODE_RCPX:               rcpx(d, s0, pp);                                break;
195                         case Shader::OPCODE_DIV:                div(d, s0, s1);                                 break;
196                         case Shader::OPCODE_MOD:                mod(d, s0, s1);                                 break;
197                         case Shader::OPCODE_RSQX:               rsqx(d, s0, pp);                                break;
198                         case Shader::OPCODE_SQRT:               sqrt(d, s0, pp);                                break;
199                         case Shader::OPCODE_RSQ:                rsq(d, s0, pp);                                 break;
200                         case Shader::OPCODE_LEN2:               len2(d.x, s0, pp);                              break;
201                         case Shader::OPCODE_LEN3:               len3(d.x, s0, pp);                              break;
202                         case Shader::OPCODE_LEN4:               len4(d.x, s0, pp);                              break;
203                         case Shader::OPCODE_DIST1:              dist1(d.x, s0, s1, pp);                 break;
204                         case Shader::OPCODE_DIST2:              dist2(d.x, s0, s1, pp);                 break;
205                         case Shader::OPCODE_DIST3:              dist3(d.x, s0, s1, pp);                 break;
206                         case Shader::OPCODE_DIST4:              dist4(d.x, s0, s1, pp);                 break;
207                         case Shader::OPCODE_SGE:                step(d, s1, s0);                                break;
208                         case Shader::OPCODE_SGN:                sgn(d, s0);                                             break;
209                         case Shader::OPCODE_SINCOS:             sincos(d, s0, pp);                              break;
210                         case Shader::OPCODE_COS:                cos(d, s0, pp);                                 break;
211                         case Shader::OPCODE_SIN:                sin(d, s0, pp);                                 break;
212                         case Shader::OPCODE_TAN:                tan(d, s0);                                             break;
213                         case Shader::OPCODE_ACOS:               acos(d, s0);                                    break;
214                         case Shader::OPCODE_ASIN:               asin(d, s0);                                    break;
215                         case Shader::OPCODE_ATAN:               atan(d, s0);                                    break;
216                         case Shader::OPCODE_ATAN2:              atan2(d, s0, s1);                               break;
217                         case Shader::OPCODE_COSH:               cosh(d, s0, pp);                                break;
218                         case Shader::OPCODE_SINH:               sinh(d, s0, pp);                                break;
219                         case Shader::OPCODE_TANH:               tanh(d, s0, pp);                                break;
220                         case Shader::OPCODE_ACOSH:              acosh(d, s0, pp);                               break;
221                         case Shader::OPCODE_ASINH:              asinh(d, s0, pp);                               break;
222                         case Shader::OPCODE_ATANH:              atanh(d, s0, pp);                               break;
223                         case Shader::OPCODE_SLT:                slt(d, s0, s1);                                 break;
224                         case Shader::OPCODE_SUB:                sub(d, s0, s1);                                 break;
225                         case Shader::OPCODE_BREAK:              BREAK(r);                                               break;
226                         case Shader::OPCODE_BREAKC:             BREAKC(r, s0, s1, control);             break;
227                         case Shader::OPCODE_BREAKP:             BREAKP(r, src0);                                break;
228                         case Shader::OPCODE_CONTINUE:   CONTINUE(r);                                    break;
229                         case Shader::OPCODE_TEST:               TEST();                                                 break;
230                         case Shader::OPCODE_CALL:               CALL(r, dst.label, dst.callSite);         break;
231                         case Shader::OPCODE_CALLNZ:             CALLNZ(r, dst.label, dst.callSite, src0); break;
232                         case Shader::OPCODE_ELSE:               ELSE(r);                                                break;
233                         case Shader::OPCODE_ENDIF:              ENDIF(r);                                               break;
234                         case Shader::OPCODE_ENDLOOP:    ENDLOOP(r);                                             break;
235                         case Shader::OPCODE_ENDREP:             ENDREP(r);                                              break;
236                         case Shader::OPCODE_ENDWHILE:   ENDWHILE(r);                                    break;
237                         case Shader::OPCODE_IF:                 IF(r, src0);                                    break;
238                         case Shader::OPCODE_IFC:                IFC(r, s0, s1, control);                break;
239                         case Shader::OPCODE_LABEL:              LABEL(dst.index);                               break;
240                         case Shader::OPCODE_LOOP:               LOOP(r, src1);                                  break;
241                         case Shader::OPCODE_REP:                REP(r, src0);                                   break;
242                         case Shader::OPCODE_WHILE:              WHILE(r, src0);                                 break;
243                         case Shader::OPCODE_RET:                RET(r);                                                 break;
244                         case Shader::OPCODE_LEAVE:              LEAVE(r);                                               break;
245                         case Shader::OPCODE_CMP:                cmp(d, s0, s1, control);                break;
246                         case Shader::OPCODE_ICMP:               icmp(d, s0, s1, control);               break;
247                         case Shader::OPCODE_SELECT:             select(d, s0, s1, s2);                  break;
248                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                 break;
249                         case Shader::OPCODE_INSERT:             insert(d, s0, s1.x, s2.x);              break;
250                         case Shader::OPCODE_ALL:                all(d.x, s0);                                   break;
251                         case Shader::OPCODE_ANY:                any(d.x, s0);                                   break;
252                         case Shader::OPCODE_NOT:                not(d, s0);                                             break;
253                         case Shader::OPCODE_OR:                 or(d.x, s0.x, s1.x);                    break;
254                         case Shader::OPCODE_XOR:                xor(d.x, s0.x, s1.x);                   break;
255                         case Shader::OPCODE_AND:                and(d.x, s0.x, s1.x);                   break;
256                         case Shader::OPCODE_TEXLDL:             TEXLDL(r, d, s0, src1);                 break;
257                         case Shader::OPCODE_TEX:                TEX(r, d, s0, src1);                    break;
258                         case Shader::OPCODE_END:                                                                                break;
259                         default:
260                                 ASSERT(false);
261                         }
262
263                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
264                         {
265                                 if(dst.integer)
266                                 {
267                                         switch(opcode)
268                                         {
269                                         case Shader::OPCODE_DIV:
270                                                 if(dst.x) d.x = Trunc(d.x);
271                                                 if(dst.y) d.y = Trunc(d.y);
272                                                 if(dst.z) d.z = Trunc(d.z);
273                                                 if(dst.w) d.w = Trunc(d.w);
274                                                 break;
275                                         default:
276                                                 break;   // No truncation to integer required when arguments are integer
277                                         }
278                                 }
279
280                                 if(dst.saturate)
281                                 {
282                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
283                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
284                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
285                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
286
287                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
288                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
289                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
290                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
291                                 }
292
293                                 if(instruction->isPredicated())
294                                 {
295                                         Vector4f pDst;   // FIXME: Rename
296
297                                         switch(dst.type)
298                                         {
299                                         case Shader::PARAMETER_VOID:                                                                                                                                            break;
300                                         case Shader::PARAMETER_TEMP:
301                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
302                                                 {
303                                                         if(dst.x) pDst.x = r.r[dst.index].x;
304                                                         if(dst.y) pDst.y = r.r[dst.index].y;
305                                                         if(dst.z) pDst.z = r.r[dst.index].z;
306                                                         if(dst.w) pDst.w = r.r[dst.index].w;
307                                                 }
308                                                 else
309                                                 {
310                                                         Int a = relativeAddress(r, dst);
311
312                                                         if(dst.x) pDst.x = r.r[dst.index + a].x;
313                                                         if(dst.y) pDst.y = r.r[dst.index + a].y;
314                                                         if(dst.z) pDst.z = r.r[dst.index + a].z;
315                                                         if(dst.w) pDst.w = r.r[dst.index + a].w;
316                                                 }
317                                                 break;
318                                         case Shader::PARAMETER_ADDR:            pDst = r.a0;                                                                                                    break;
319                                         case Shader::PARAMETER_RASTOUT:
320                                                 switch(dst.index)
321                                                 {
322                                                 case 0:
323                                                         if(dst.x) pDst.x = r.o[Pos].x;
324                                                         if(dst.y) pDst.y = r.o[Pos].y;
325                                                         if(dst.z) pDst.z = r.o[Pos].z;
326                                                         if(dst.w) pDst.w = r.o[Pos].w;
327                                                         break;
328                                                 case 1:
329                                                         pDst.x = r.o[Fog].x;
330                                                         break;
331                                                 case 2:
332                                                         pDst.x = r.o[Pts].y;
333                                                         break;
334                                                 default:
335                                                         ASSERT(false);
336                                                 }
337                                                 break;
338                                         case Shader::PARAMETER_ATTROUT:
339                                                 if(dst.x) pDst.x = r.o[D0 + dst.index].x;
340                                                 if(dst.y) pDst.y = r.o[D0 + dst.index].y;
341                                                 if(dst.z) pDst.z = r.o[D0 + dst.index].z;
342                                                 if(dst.w) pDst.w = r.o[D0 + dst.index].w;
343                                                 break;
344                                         case Shader::PARAMETER_TEXCRDOUT:
345                                 //      case Shader::PARAMETER_OUTPUT:
346                                                 if(version < 0x0300)
347                                                 {
348                                                         if(dst.x) pDst.x = r.o[T0 + dst.index].x;
349                                                         if(dst.y) pDst.y = r.o[T0 + dst.index].y;
350                                                         if(dst.z) pDst.z = r.o[T0 + dst.index].z;
351                                                         if(dst.w) pDst.w = r.o[T0 + dst.index].w;
352                                                 }
353                                                 else
354                                                 {
355                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
356                                                         {
357                                                                 if(dst.x) pDst.x = r.o[dst.index].x;
358                                                                 if(dst.y) pDst.y = r.o[dst.index].y;
359                                                                 if(dst.z) pDst.z = r.o[dst.index].z;
360                                                                 if(dst.w) pDst.w = r.o[dst.index].w;
361                                                         }
362                                                         else if(dst.rel.type == Shader::PARAMETER_LOOP)
363                                                         {
364                                                                 Int aL = r.aL[r.loopDepth];
365
366                                                                 if(dst.x) pDst.x = r.o[dst.index + aL].x;
367                                                                 if(dst.y) pDst.y = r.o[dst.index + aL].y;
368                                                                 if(dst.z) pDst.z = r.o[dst.index + aL].z;
369                                                                 if(dst.w) pDst.w = r.o[dst.index + aL].w;
370                                                         }
371                                                         else
372                                                         {
373                                                                 Int a = relativeAddress(r, dst);
374
375                                                                 if(dst.x) pDst.x = r.o[dst.index + a].x;
376                                                                 if(dst.y) pDst.y = r.o[dst.index + a].y;
377                                                                 if(dst.z) pDst.z = r.o[dst.index + a].z;
378                                                                 if(dst.w) pDst.w = r.o[dst.index + a].w;
379                                                         }
380                                                 }
381                                                 break;
382                                         case Shader::PARAMETER_LABEL:                                                                                                                                           break;
383                                         case Shader::PARAMETER_PREDICATE:       pDst = r.p0;                                                                                                    break;
384                                         case Shader::PARAMETER_INPUT:                                                                                                                                           break;
385                                         default:
386                                                 ASSERT(false);
387                                         }
388
389                                         Int4 enable = enableMask(r, instruction);
390
391                                         Int4 xEnable = enable;
392                                         Int4 yEnable = enable;
393                                         Int4 zEnable = enable;
394                                         Int4 wEnable = enable;
395
396                                         if(predicate)
397                                         {
398                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
399
400                                                 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
401                                                 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
402                                                 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
403                                                 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
404
405                                                 if(!instruction->predicateNot)
406                                                 {
407                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
408                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
409                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
410                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
411                                                 }
412                                                 else
413                                                 {
414                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
415                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
416                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
417                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
418                                                 }
419                                         }
420
421                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
422                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
423                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
424                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
425
426                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
427                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
428                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
429                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
430                                 }
431
432                                 switch(dst.type)
433                                 {
434                                 case Shader::PARAMETER_VOID:
435                                         break;
436                                 case Shader::PARAMETER_TEMP:
437                                         if(dst.rel.type == Shader::PARAMETER_VOID)
438                                         {
439                                                 if(dst.x) r.r[dst.index].x = d.x;
440                                                 if(dst.y) r.r[dst.index].y = d.y;
441                                                 if(dst.z) r.r[dst.index].z = d.z;
442                                                 if(dst.w) r.r[dst.index].w = d.w;
443                                         }
444                                         else
445                                         {
446                                                 Int a = relativeAddress(r, dst);
447
448                                                 if(dst.x) r.r[dst.index + a].x = d.x;
449                                                 if(dst.y) r.r[dst.index + a].y = d.y;
450                                                 if(dst.z) r.r[dst.index + a].z = d.z;
451                                                 if(dst.w) r.r[dst.index + a].w = d.w;
452                                         }
453                                         break;
454                                 case Shader::PARAMETER_ADDR:
455                                         if(dst.x) r.a0.x = d.x;
456                                         if(dst.y) r.a0.y = d.y;
457                                         if(dst.z) r.a0.z = d.z;
458                                         if(dst.w) r.a0.w = d.w;
459                                         break;
460                                 case Shader::PARAMETER_RASTOUT:
461                                         switch(dst.index)
462                                         {
463                                         case 0:
464                                                 if(dst.x) r.o[Pos].x = d.x;
465                                                 if(dst.y) r.o[Pos].y = d.y;
466                                                 if(dst.z) r.o[Pos].z = d.z;
467                                                 if(dst.w) r.o[Pos].w = d.w;
468                                                 break;
469                                         case 1:
470                                                 r.o[Fog].x = d.x;
471                                                 break;
472                                         case 2:         
473                                                 r.o[Pts].y = d.x;
474                                                 break;
475                                         default:        ASSERT(false);
476                                         }
477                                         break;
478                                 case Shader::PARAMETER_ATTROUT: 
479                                         if(dst.x) r.o[D0 + dst.index].x = d.x;
480                                         if(dst.y) r.o[D0 + dst.index].y = d.y;
481                                         if(dst.z) r.o[D0 + dst.index].z = d.z;
482                                         if(dst.w) r.o[D0 + dst.index].w = d.w;
483                                         break;
484                                 case Shader::PARAMETER_TEXCRDOUT:
485                         //      case Shader::PARAMETER_OUTPUT:
486                                         if(version < 0x0300)
487                                         {
488                                                 if(dst.x) r.o[T0 + dst.index].x = d.x;
489                                                 if(dst.y) r.o[T0 + dst.index].y = d.y;
490                                                 if(dst.z) r.o[T0 + dst.index].z = d.z;
491                                                 if(dst.w) r.o[T0 + dst.index].w = d.w;
492                                         }
493                                         else
494                                         {
495                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
496                                                 {
497                                                         if(dst.x) r.o[dst.index].x = d.x;
498                                                         if(dst.y) r.o[dst.index].y = d.y;
499                                                         if(dst.z) r.o[dst.index].z = d.z;
500                                                         if(dst.w) r.o[dst.index].w = d.w;
501                                                 }
502                                                 else if(dst.rel.type == Shader::PARAMETER_LOOP)
503                                                 {
504                                                         Int aL = r.aL[r.loopDepth];
505
506                                                         if(dst.x) r.o[dst.index + aL].x = d.x;
507                                                         if(dst.y) r.o[dst.index + aL].y = d.y;
508                                                         if(dst.z) r.o[dst.index + aL].z = d.z;
509                                                         if(dst.w) r.o[dst.index + aL].w = d.w;
510                                                 }
511                                                 else
512                                                 {
513                                                         Int a = relativeAddress(r, dst);
514
515                                                         if(dst.x) r.o[dst.index + a].x = d.x;
516                                                         if(dst.y) r.o[dst.index + a].y = d.y;
517                                                         if(dst.z) r.o[dst.index + a].z = d.z;
518                                                         if(dst.w) r.o[dst.index + a].w = d.w;
519                                                 }
520                                         }
521                                         break;
522                                 case Shader::PARAMETER_LABEL:                                                                                                                                           break;
523                                 case Shader::PARAMETER_PREDICATE:       r.p0 = d;                                                                                                               break;
524                                 case Shader::PARAMETER_INPUT:                                                                                                                                           break;
525                                 default:
526                                         ASSERT(false);
527                                 }
528                         }
529                 }
530
531                 if(currentLabel != -1)
532                 {
533                         Nucleus::setInsertBlock(returnBlock);
534                 }
535         }
536
537         void VertexProgram::passThrough(Registers &r)
538         {
539                 if(shader)
540                 {
541                         for(int i = 0; i < 12; i++)
542                         {
543                                 unsigned char usage = shader->output[i][0].usage;
544                                 unsigned char index = shader->output[i][0].index;
545
546                                 switch(usage)
547                                 {
548                                 case 0xFF:
549                                         continue;
550                                 case Shader::USAGE_PSIZE:
551                                         r.o[i].y = r.v[i].x;
552                                         break;
553                                 case Shader::USAGE_TEXCOORD:
554                                         r.o[i].x = r.v[i].x;
555                                         r.o[i].y = r.v[i].y;
556                                         r.o[i].z = r.v[i].z;
557                                         r.o[i].w = r.v[i].w;
558                                         break;
559                                 case Shader::USAGE_POSITION:
560                                         r.o[i].x = r.v[i].x;
561                                         r.o[i].y = r.v[i].y;
562                                         r.o[i].z = r.v[i].z;
563                                         r.o[i].w = r.v[i].w;
564                                         break;
565                                 case Shader::USAGE_COLOR:
566                                         r.o[i].x = r.v[i].x;
567                                         r.o[i].y = r.v[i].y;
568                                         r.o[i].z = r.v[i].z;
569                                         r.o[i].w = r.v[i].w;
570                                         break;
571                                 case Shader::USAGE_FOG:
572                                         r.o[i].x = r.v[i].x;
573                                         break;
574                                 default:
575                                         ASSERT(false);
576                                 }
577                         }
578                 }
579                 else
580                 {
581                         r.o[Pos].x = r.v[PositionT].x;
582                         r.o[Pos].y = r.v[PositionT].y;
583                         r.o[Pos].z = r.v[PositionT].z;
584                         r.o[Pos].w = r.v[PositionT].w;
585
586                         for(int i = 0; i < 2; i++)
587                         {
588                                 r.o[D0 + i].x = r.v[Color0 + i].x;
589                                 r.o[D0 + i].y = r.v[Color0 + i].y;
590                                 r.o[D0 + i].z = r.v[Color0 + i].z;
591                                 r.o[D0 + i].w = r.v[Color0 + i].w;
592                         }
593
594                         for(int i = 0; i < 8; i++)
595                         {
596                                 r.o[T0 + i].x = r.v[TexCoord0 + i].x;
597                                 r.o[T0 + i].y = r.v[TexCoord0 + i].y;
598                                 r.o[T0 + i].z = r.v[TexCoord0 + i].z;
599                                 r.o[T0 + i].w = r.v[TexCoord0 + i].w;
600                         }
601
602                         r.o[Pts].y = r.v[PointSize].x;
603                 }
604         }
605
606         Vector4f VertexProgram::fetchRegisterF(Registers &r, const Src &src, int offset)
607         {
608                 int i = src.index + offset;
609
610                 Vector4f reg;
611
612                 switch(src.type)
613                 {
614                 case Shader::PARAMETER_TEMP:
615                         if(src.rel.type == Shader::PARAMETER_VOID)
616                         {
617                                 reg = r.r[i];
618                         }
619                         else
620                         {
621                                 reg = r.r[i + relativeAddress(r, src)];
622                         }
623                         break;
624                 case Shader::PARAMETER_CONST:
625                         reg = readConstant(r, src, offset);
626                         break;
627                 case Shader::PARAMETER_INPUT:
628             if(src.rel.type == Shader::PARAMETER_VOID)
629                         {
630                                 reg = r.v[i];
631                         }
632                         else
633                         {
634                                 reg = r.v[i + relativeAddress(r, src)];
635                         }
636             break;
637                 case Shader::PARAMETER_VOID:                    return r.r[0];   // Dummy
638                 case Shader::PARAMETER_FLOAT4LITERAL:
639                         reg.x = Float4(src.value[0]);
640                         reg.y = Float4(src.value[1]);
641                         reg.z = Float4(src.value[2]);
642                         reg.w = Float4(src.value[3]);
643                         break;
644                 case Shader::PARAMETER_ADDR:                    reg = r.a0;             break;
645                 case Shader::PARAMETER_CONSTBOOL:               return r.r[0];   // Dummy
646                 case Shader::PARAMETER_CONSTINT:                return r.r[0];   // Dummy
647                 case Shader::PARAMETER_LOOP:                    return r.r[0];   // Dummy
648                 case Shader::PARAMETER_PREDICATE:               return r.r[0];   // Dummy
649                 case Shader::PARAMETER_SAMPLER:
650                         if(src.rel.type == Shader::PARAMETER_VOID)
651                         {
652                                 reg.x = As<Float4>(Int4(i));
653                         }
654                         else if(src.rel.type == Shader::PARAMETER_TEMP)
655                         {
656                                 reg.x = As<Float4>(Int4(i) + RoundInt(r.r[src.rel.index].x));
657                         }
658                         return reg;
659                 case Shader::PARAMETER_OUTPUT:
660             if(src.rel.type == Shader::PARAMETER_VOID)
661                         {
662                                 reg = r.o[i];
663                         }
664                         else
665                         {
666                                 reg = r.o[i + relativeAddress(r, src)];
667                         }
668                         break;
669                 default:
670                         ASSERT(false);
671                 }
672
673                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
674                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
675                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
676                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
677
678                 Vector4f mod;
679
680                 switch(src.modifier)
681                 {
682                 case Shader::MODIFIER_NONE:
683                         mod.x = x;
684                         mod.y = y;
685                         mod.z = z;
686                         mod.w = w;
687                         break;
688                 case Shader::MODIFIER_NEGATE:
689                         mod.x = -x;
690                         mod.y = -y;
691                         mod.z = -z;
692                         mod.w = -w;
693                         break;
694                 case Shader::MODIFIER_ABS:
695                         mod.x = Abs(x);
696                         mod.y = Abs(y);
697                         mod.z = Abs(z);
698                         mod.w = Abs(w);
699                         break;
700                 case Shader::MODIFIER_ABS_NEGATE:
701                         mod.x = -Abs(x);
702                         mod.y = -Abs(y);
703                         mod.z = -Abs(z);
704                         mod.w = -Abs(w);
705                         break;
706                 case Shader::MODIFIER_NOT:
707                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
708                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
709                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
710                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
711                         break;
712                 default:
713                         ASSERT(false);
714                 }
715
716                 return mod;
717         }
718
719         Vector4f VertexProgram::readConstant(Registers &r, const Src &src, int offset)
720         {
721                 Vector4f c;
722
723                 int i = src.index + offset;
724
725                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
726                 {
727                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]));
728
729                         c.x = c.x.xxxx;
730                         c.y = c.y.yyyy;
731                         c.z = c.z.zzzz;
732                         c.w = c.w.wwww;
733
734                         if(localShaderConstants)   // Constant may be known at compile time
735                         {
736                                 for(size_t j = 0; j < shader->getLength(); j++)
737                                 {
738                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
739
740                                         if(instruction.opcode == Shader::OPCODE_DEF)
741                                         {
742                                                 if(instruction.dst.index == i)
743                                                 {
744                                                         c.x = Float4(instruction.src[0].value[0]);
745                                                         c.y = Float4(instruction.src[0].value[1]);
746                                                         c.z = Float4(instruction.src[0].value[2]);
747                                                         c.w = Float4(instruction.src[0].value[3]);
748
749                                                         break;
750                                                 }
751                                         }
752                                 }
753                         }
754                 }
755                 else if(src.rel.type == Shader::PARAMETER_LOOP)
756                 {
757                         Int loopCounter = r.aL[r.loopDepth];
758
759                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16);
760
761                         c.x = c.x.xxxx;
762                         c.y = c.y.yyyy;
763                         c.z = c.z.zzzz;
764                         c.w = c.w.wwww;
765                 }
766                 else
767                 {
768                         if(src.rel.deterministic)
769                         {
770                                 Int a = relativeAddress(r, src);
771                         
772                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + a * 16);
773
774                                 c.x = c.x.xxxx;
775                                 c.y = c.y.yyyy;
776                                 c.z = c.z.zzzz;
777                                 c.w = c.w.wwww;
778                         }
779                         else
780                         {
781                                 int component = src.rel.swizzle & 0x03;
782                                 Float4 a;
783
784                                 switch(src.rel.type)
785                                 {
786                                 case Shader::PARAMETER_ADDR:   a = r.a0[component]; break;
787                                 case Shader::PARAMETER_TEMP:   a = r.r[src.rel.index][component]; break;
788                                 case Shader::PARAMETER_INPUT:  a = r.v[src.rel.index][component]; break;
789                                 case Shader::PARAMETER_OUTPUT: a = r.o[src.rel.index][component]; break;
790                                 case Shader::PARAMETER_CONST:  a = *Pointer<Float>(r.data + OFFSET(DrawData,vs.c[src.rel.index][component])); break;
791                                 default: ASSERT(false);
792                                 }
793
794                                 Int4 index = Int4(i) + RoundInt(a) * Int4(src.rel.scale);
795
796                                 index = Min(As<UInt4>(index), UInt4(256));   // Clamp to constant register range, c[256] = {0, 0, 0, 0}
797                                 
798                                 Int index0 = Extract(index, 0);
799                                 Int index1 = Extract(index, 1);
800                                 Int index2 = Extract(index, 2);
801                                 Int index3 = Extract(index, 3);
802
803                                 c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16);
804                                 c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16);
805                                 c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16);
806                                 c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16);
807
808                                 transpose4x4(c.x, c.y, c.z, c.w);
809                         }
810                 }
811
812                 return c;
813         }
814
815         Int VertexProgram::relativeAddress(Registers &r, const Shader::Parameter &var)
816         {
817                 ASSERT(var.rel.deterministic);
818
819                 if(var.rel.type == Shader::PARAMETER_TEMP)
820                 {
821                         return RoundInt(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale;
822                 }
823                 else if(var.rel.type == Shader::PARAMETER_INPUT)
824                 {
825                         return RoundInt(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale;
826                 }
827                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
828                 {
829                         return RoundInt(Extract(r.o[var.rel.index].x, 0)) * var.rel.scale;
830                 }
831                 else if(var.rel.type == Shader::PARAMETER_CONST)
832                 {
833                         RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[var.rel.index]));
834
835                         return RoundInt(Extract(c, 0)) * var.rel.scale;
836                 }
837                 else ASSERT(false);
838
839                 return 0;
840         }
841
842         Int4 VertexProgram::enableMask(Registers &r, const Shader::Instruction *instruction)
843         {
844                 Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
845                 
846                 if(!whileTest)
847                 {
848                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
849                         {
850                                 enable &= r.enableBreak;
851                         }
852
853                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
854                         {
855                                 enable &= r.enableContinue;
856                         }
857
858                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
859                         {
860                                 enable &= r.enableLeave;
861                         }
862                 }
863
864                 return enable;
865         }
866
867         void VertexProgram::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
868         {
869                 Vector4f row0 = fetchRegisterF(r, src1, 0);
870                 Vector4f row1 = fetchRegisterF(r, src1, 1);
871
872                 dst.x = dot3(src0, row0);
873                 dst.y = dot3(src0, row1);
874         }
875
876         void VertexProgram::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
877         {
878                 Vector4f row0 = fetchRegisterF(r, src1, 0);
879                 Vector4f row1 = fetchRegisterF(r, src1, 1);
880                 Vector4f row2 = fetchRegisterF(r, src1, 2);
881
882                 dst.x = dot3(src0, row0);
883                 dst.y = dot3(src0, row1);
884                 dst.z = dot3(src0, row2);
885         }
886
887         void VertexProgram::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
888         {
889                 Vector4f row0 = fetchRegisterF(r, src1, 0);
890                 Vector4f row1 = fetchRegisterF(r, src1, 1);
891                 Vector4f row2 = fetchRegisterF(r, src1, 2);
892                 Vector4f row3 = fetchRegisterF(r, src1, 3);
893
894                 dst.x = dot3(src0, row0);
895                 dst.y = dot3(src0, row1);
896                 dst.z = dot3(src0, row2);
897                 dst.w = dot3(src0, row3);
898         }
899
900         void VertexProgram::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
901         {
902                 Vector4f row0 = fetchRegisterF(r, src1, 0);
903                 Vector4f row1 = fetchRegisterF(r, src1, 1);
904                 Vector4f row2 = fetchRegisterF(r, src1, 2);
905
906                 dst.x = dot4(src0, row0);
907                 dst.y = dot4(src0, row1);
908                 dst.z = dot4(src0, row2);
909         }
910
911         void VertexProgram::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
912         {
913                 Vector4f row0 = fetchRegisterF(r, src1, 0);
914                 Vector4f row1 = fetchRegisterF(r, src1, 1);
915                 Vector4f row2 = fetchRegisterF(r, src1, 2);
916                 Vector4f row3 = fetchRegisterF(r, src1, 3);
917
918                 dst.x = dot4(src0, row0);
919                 dst.y = dot4(src0, row1);
920                 dst.z = dot4(src0, row2);
921                 dst.w = dot4(src0, row3);
922         }
923
924         void VertexProgram::BREAK(Registers &r)
925         {
926                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
927                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
928
929                 if(breakDepth == 0)
930                 {
931                         r.enableIndex = r.enableIndex - breakDepth;
932                         Nucleus::createBr(endBlock);
933                 }
934                 else
935                 {
936                         r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
937                         Bool allBreak = SignMask(r.enableBreak) == 0x0;
938
939                         r.enableIndex = r.enableIndex - breakDepth;
940                         branch(allBreak, endBlock, deadBlock);
941                 }
942
943                 Nucleus::setInsertBlock(deadBlock);
944                 r.enableIndex = r.enableIndex + breakDepth;
945         }
946
947         void VertexProgram::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
948         {
949                 Int4 condition;
950
951                 switch(control)
952                 {
953                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
954                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
955                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
956                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
957                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
958                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
959                 default:
960                         ASSERT(false);
961                 }
962
963                 BREAK(r, condition);
964         }
965
966         void VertexProgram::BREAKP(Registers &r, const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
967         {
968                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
969
970                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
971                 {
972                         condition = ~condition;
973                 }
974
975                 BREAK(r, condition);
976         }
977
978         void VertexProgram::BREAK(Registers &r, Int4 &condition)
979         {
980                 condition &= r.enableStack[r.enableIndex];
981
982                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
983                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
984
985                 r.enableBreak = r.enableBreak & ~condition;
986                 Bool allBreak = SignMask(r.enableBreak) == 0x0;
987
988                 r.enableIndex = r.enableIndex - breakDepth;
989                 branch(allBreak, endBlock, continueBlock);
990
991                 Nucleus::setInsertBlock(continueBlock);
992                 r.enableIndex = r.enableIndex + breakDepth;
993         }
994
995         void VertexProgram::CONTINUE(Registers &r)
996         {
997                 r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
998         }
999
1000         void VertexProgram::TEST()
1001         {
1002                 whileTest = true;
1003         }
1004
1005         void VertexProgram::CALL(Registers &r, int labelIndex, int callSiteIndex)
1006         {
1007                 if(!labelBlock[labelIndex])
1008                 {
1009                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1010                 }
1011
1012                 if(callRetBlock[labelIndex].size() > 1)
1013                 {
1014                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1015                 }
1016
1017                 Int4 restoreLeave = r.enableLeave;
1018
1019                 Nucleus::createBr(labelBlock[labelIndex]);
1020                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1021
1022                 r.enableLeave = restoreLeave;
1023         }
1024
1025         void VertexProgram::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
1026         {
1027                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1028                 {
1029                         CALLNZb(r, labelIndex, callSiteIndex, src);
1030                 }
1031                 else if(src.type == Shader::PARAMETER_PREDICATE)
1032                 {
1033                         CALLNZp(r, labelIndex, callSiteIndex, src);
1034                 }
1035                 else ASSERT(false);
1036         }
1037
1038         void VertexProgram::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
1039         {
1040                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1041                 
1042                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1043                 {
1044                         condition = !condition; 
1045                 }
1046
1047                 if(!labelBlock[labelIndex])
1048                 {
1049                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1050                 }
1051
1052                 if(callRetBlock[labelIndex].size() > 1)
1053                 {
1054                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1055                 }
1056
1057                 Int4 restoreLeave = r.enableLeave;
1058
1059                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1060                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1061
1062                 r.enableLeave = restoreLeave;
1063         }
1064
1065         void VertexProgram::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
1066         {
1067                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1068
1069                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1070                 {
1071                         condition = ~condition;
1072                 }
1073
1074                 condition &= r.enableStack[r.enableIndex];
1075
1076                 if(!labelBlock[labelIndex])
1077                 {
1078                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1079                 }
1080
1081                 if(callRetBlock[labelIndex].size() > 1)
1082                 {
1083                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1084                 }
1085
1086                 r.enableIndex++;
1087                 r.enableStack[r.enableIndex] = condition;
1088                 Int4 restoreLeave = r.enableLeave;
1089
1090                 Bool notAllFalse = SignMask(condition) != 0;
1091                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1092                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1093
1094                 r.enableIndex--;
1095                 r.enableLeave = restoreLeave;
1096         }
1097
1098         void VertexProgram::ELSE(Registers &r)
1099         {
1100                 ifDepth--;
1101
1102                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1103                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1104
1105                 if(isConditionalIf[ifDepth])
1106                 {
1107                         Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1108                         Bool notAllFalse = SignMask(condition) != 0;
1109
1110                         branch(notAllFalse, falseBlock, endBlock);
1111
1112                         r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1113                 }
1114                 else
1115                 {
1116                         Nucleus::createBr(endBlock);
1117                         Nucleus::setInsertBlock(falseBlock);
1118                 }
1119
1120                 ifFalseBlock[ifDepth] = endBlock;
1121
1122                 ifDepth++;
1123         }
1124
1125         void VertexProgram::ENDIF(Registers &r)
1126         {
1127                 ifDepth--;
1128
1129                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1130
1131                 Nucleus::createBr(endBlock);
1132                 Nucleus::setInsertBlock(endBlock);
1133
1134                 if(isConditionalIf[ifDepth])
1135                 {
1136                         breakDepth--;
1137                         r.enableIndex--;
1138                 }
1139         }
1140
1141         void VertexProgram::ENDLOOP(Registers &r)
1142         {
1143                 loopRepDepth--;
1144
1145                 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth];   // FIXME: +=
1146
1147                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1148                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1149
1150                 Nucleus::createBr(testBlock);
1151                 Nucleus::setInsertBlock(endBlock);
1152
1153                 r.loopDepth--;
1154                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1155         }
1156
1157         void VertexProgram::ENDREP(Registers &r)
1158         {
1159                 loopRepDepth--;
1160
1161                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1162                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1163
1164                 Nucleus::createBr(testBlock);
1165                 Nucleus::setInsertBlock(endBlock);
1166
1167                 r.loopDepth--;
1168                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1169         }
1170
1171         void VertexProgram::ENDWHILE(Registers &r)
1172         {
1173                 loopRepDepth--;
1174
1175                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1176                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1177
1178                 Nucleus::createBr(testBlock);
1179                 Nucleus::setInsertBlock(endBlock);
1180
1181                 r.enableIndex--;
1182                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1183                 whileTest = false;
1184         }
1185
1186         void VertexProgram::IF(Registers &r, const Src &src)
1187         {
1188                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1189                 {
1190                         IFb(r, src);
1191                 }
1192                 else if(src.type == Shader::PARAMETER_PREDICATE)
1193                 {
1194                         IFp(r, src);
1195                 }
1196                 else
1197                 {
1198                         Int4 condition = As<Int4>(fetchRegisterF(r, src).x);
1199                         IF(r, condition);
1200                 }
1201         }
1202
1203         void VertexProgram::IFb(Registers &r, const Src &boolRegister)
1204         {
1205                 ASSERT(ifDepth < 24 + 4);
1206
1207                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1208
1209                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1210                 {
1211                         condition = !condition;
1212                 }
1213
1214                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1215                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1216
1217                 branch(condition, trueBlock, falseBlock);
1218
1219                 isConditionalIf[ifDepth] = false;
1220                 ifFalseBlock[ifDepth] = falseBlock;
1221
1222                 ifDepth++;
1223         }
1224
1225         void VertexProgram::IFp(Registers &r, const Src &predicateRegister)
1226         {
1227                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1228
1229                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1230                 {
1231                         condition = ~condition;
1232                 }
1233
1234                 IF(r, condition);
1235         }
1236
1237         void VertexProgram::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
1238         {
1239                 Int4 condition;
1240
1241                 switch(control)
1242                 {
1243                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1244                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1245                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1246                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1247                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1248                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1249                 default:
1250                         ASSERT(false);
1251                 }
1252
1253                 IF(r, condition);
1254         }
1255
1256         void VertexProgram::IF(Registers &r, Int4 &condition)
1257         {
1258                 condition &= r.enableStack[r.enableIndex];
1259
1260                 r.enableIndex++;
1261                 r.enableStack[r.enableIndex] = condition;
1262
1263                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1264                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1265
1266                 Bool notAllFalse = SignMask(condition) != 0;
1267
1268                 branch(notAllFalse, trueBlock, falseBlock);
1269
1270                 isConditionalIf[ifDepth] = true;
1271                 ifFalseBlock[ifDepth] = falseBlock;
1272
1273                 ifDepth++;
1274                 breakDepth++;
1275         }
1276
1277         void VertexProgram::LABEL(int labelIndex)
1278         {
1279                 if(!labelBlock[labelIndex])
1280                 {
1281                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1282                 }
1283
1284                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1285                 currentLabel = labelIndex;
1286         }
1287
1288         void VertexProgram::LOOP(Registers &r, const Src &integerRegister)
1289         {
1290                 r.loopDepth++;
1291
1292                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1293                 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1294                 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1295
1296                 // FIXME: Compiles to two instructions?
1297                 If(r.increment[r.loopDepth] == 0)
1298                 {
1299                         r.increment[r.loopDepth] = 1;
1300                 }
1301
1302                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1303                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1304                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1305
1306                 loopRepTestBlock[loopRepDepth] = testBlock;
1307                 loopRepEndBlock[loopRepDepth] = endBlock;
1308
1309                 // FIXME: jump(testBlock)
1310                 Nucleus::createBr(testBlock);
1311                 Nucleus::setInsertBlock(testBlock);
1312
1313                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1314                 Nucleus::setInsertBlock(loopBlock);
1315
1316                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1317                 
1318                 loopRepDepth++;
1319                 breakDepth = 0;
1320         }
1321
1322         void VertexProgram::REP(Registers &r, const Src &integerRegister)
1323         {
1324                 r.loopDepth++;
1325
1326                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1327                 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
1328
1329                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1330                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1331                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1332
1333                 loopRepTestBlock[loopRepDepth] = testBlock;
1334                 loopRepEndBlock[loopRepDepth] = endBlock;
1335
1336                 // FIXME: jump(testBlock)
1337                 Nucleus::createBr(testBlock);
1338                 Nucleus::setInsertBlock(testBlock);
1339
1340                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1341                 Nucleus::setInsertBlock(loopBlock);
1342
1343                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1344
1345                 loopRepDepth++;
1346                 breakDepth = 0;
1347         }
1348
1349         void VertexProgram::WHILE(Registers &r, const Src &temporaryRegister)
1350         {
1351                 r.enableIndex++;
1352
1353                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1354                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1355                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1356                 
1357                 loopRepTestBlock[loopRepDepth] = testBlock;
1358                 loopRepEndBlock[loopRepDepth] = endBlock;
1359
1360                 Int4 restoreBreak = r.enableBreak;
1361                 Int4 restoreContinue = r.enableContinue;
1362
1363                 // FIXME: jump(testBlock)
1364                 Nucleus::createBr(testBlock);
1365                 Nucleus::setInsertBlock(testBlock);
1366                 r.enableContinue = restoreContinue;
1367
1368                 const Vector4f &src = fetchRegisterF(r, temporaryRegister);
1369                 Int4 condition = As<Int4>(src.x);
1370                 condition &= r.enableStack[r.enableIndex - 1];
1371                 r.enableStack[r.enableIndex] = condition;
1372
1373                 Bool notAllFalse = SignMask(condition) != 0;
1374                 branch(notAllFalse, loopBlock, endBlock);
1375                 
1376                 Nucleus::setInsertBlock(endBlock);
1377                 r.enableBreak = restoreBreak;
1378                 
1379                 Nucleus::setInsertBlock(loopBlock);
1380
1381                 loopRepDepth++;
1382                 breakDepth = 0;
1383         }
1384
1385         void VertexProgram::RET(Registers &r)
1386         {
1387                 if(currentLabel == -1)
1388                 {
1389                         returnBlock = Nucleus::createBasicBlock();
1390                         Nucleus::createBr(returnBlock);
1391                 }
1392                 else
1393                 {
1394                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1395
1396                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1397                         {
1398                                 // FIXME: Encapsulate
1399                                 UInt index = r.callStack[--r.stackIndex];
1400  
1401                                 llvm::Value *value = index.loadValue();
1402                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1403
1404                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1405                                 {
1406                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1407                                 }
1408                         }
1409                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1410                         {
1411                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1412                         }
1413                         else   // Function isn't called
1414                         {
1415                                 Nucleus::createBr(unreachableBlock);
1416                         }
1417
1418                         Nucleus::setInsertBlock(unreachableBlock);
1419                         Nucleus::createUnreachable();
1420                 }
1421         }
1422
1423         void VertexProgram::LEAVE(Registers &r)
1424         {
1425                 r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
1426
1427                 // FIXME: Return from function if all instances left
1428                 // FIXME: Use enableLeave in other control-flow constructs
1429         }
1430
1431         void VertexProgram::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1432         {
1433                 Vector4f tmp;
1434                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w);
1435
1436                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1437                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1438                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1439                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1440         }
1441
1442         void VertexProgram::TEX(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1443         {
1444                 Float4 lod = Float4(0.0f);
1445                 Vector4f tmp;
1446                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, lod);
1447
1448                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1449                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1450                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1451                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1452         }
1453
1454         void VertexProgram::sampleTexture(Registers &r, Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q)
1455         {
1456                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1457                 {
1458                         Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
1459                         sampler[s.index]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);        
1460                 }
1461                 else
1462                 {
1463                         Int index = As<Int>(Float(fetchRegisterF(r, s).x.x));
1464
1465                         for(int i = 0; i < 16; i++)
1466                         {
1467                                 if(shader->usesSampler(i))
1468                                 {
1469                                         If(index == i)
1470                                         {
1471                                                 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
1472                                                 sampler[i]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);
1473                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1474                                         }
1475                                 }
1476                         }
1477                 }
1478         }
1479 }