OSDN Git Service

New integer related core functions
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2013 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "VertexProgram.hpp"
13
14 #include "Renderer.hpp"
15 #include "VertexShader.hpp"
16 #include "Vertex.hpp"
17 #include "Half.hpp"
18 #include "SamplerCore.hpp"
19 #include "Debug.hpp"
20
21 namespace sw
22 {
23         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) : VertexRoutine(state, shader)
24         {
25                 ifDepth = 0;
26                 loopRepDepth = 0;
27                 breakDepth = 0;
28                 currentLabel = -1;
29                 whileTest = false;
30
31                 for(int i = 0; i < 2048; i++)
32                 {
33                         labelBlock[i] = 0;
34                 }
35         }
36
37         VertexProgram::~VertexProgram()
38         {
39                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
40                 {
41                         delete sampler[i];
42                 }
43         }
44
45         void VertexProgram::pipeline(Registers &r)
46         {
47                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
48                 {
49                         sampler[i] = new SamplerCore(r.constants, state.samplerState[i]);
50                 }
51
52                 if(!state.preTransformed)
53                 {
54                         program(r);
55                 }
56                 else
57                 {
58                         passThrough(r);
59                 }
60         }
61
62         void VertexProgram::program(Registers &r)
63         {
64         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
65
66                 unsigned short version = shader->getVersion();
67
68                 r.enableIndex = 0;
69                 r.stackIndex = 0;
70
71                 if(shader->containsLeaveInstruction())
72                 {
73                         r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
74                 }
75
76                 // Create all call site return blocks up front
77                 for(size_t i = 0; i < shader->getLength(); i++)
78                 {
79                         const Shader::Instruction *instruction = shader->getInstruction(i);
80                         Shader::Opcode opcode = instruction->opcode;
81
82                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
83                         {
84                                 const Dst &dst = instruction->dst;
85
86                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
87                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
88                         }
89                 }
90         
91                 for(size_t i = 0; i < shader->getLength(); i++)
92                 {
93                         const Shader::Instruction *instruction = shader->getInstruction(i);
94                         Shader::Opcode opcode = instruction->opcode;
95
96                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
97                         {
98                                 continue;
99                         }
100
101                         Dst dst = instruction->dst;
102                         Src src0 = instruction->src[0];
103                         Src src1 = instruction->src[1];
104                         Src src2 = instruction->src[2];
105
106                         bool predicate = instruction->predicate;
107                         Control control = instruction->control;
108                         bool integer = dst.type == Shader::PARAMETER_ADDR;
109                         bool pp = dst.partialPrecision;
110
111                         Vector4f d;
112                         Vector4f s0;
113                         Vector4f s1;
114                         Vector4f s2;
115
116                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
117                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
118                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
119
120                         switch(opcode)
121                         {
122                         case Shader::OPCODE_VS_1_0:                                                                             break;
123                         case Shader::OPCODE_VS_1_1:                                                                             break;
124                         case Shader::OPCODE_VS_2_0:                                                                             break;
125                         case Shader::OPCODE_VS_2_x:                                                                             break;
126                         case Shader::OPCODE_VS_2_sw:                                                                    break;
127                         case Shader::OPCODE_VS_3_0:                                                                             break;
128                         case Shader::OPCODE_VS_3_sw:                                                                    break;
129                         case Shader::OPCODE_DCL:                                                                                break;
130                         case Shader::OPCODE_DEF:                                                                                break;
131                         case Shader::OPCODE_DEFI:                                                                               break;
132                         case Shader::OPCODE_DEFB:                                                                               break;
133                         case Shader::OPCODE_NOP:                                                                                break;
134                         case Shader::OPCODE_ABS:                abs(d, s0);                                             break;
135                         case Shader::OPCODE_ADD:                add(d, s0, s1);                                 break;
136                         case Shader::OPCODE_CRS:                crs(d, s0, s1);                                 break;
137                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                break;
138                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                break;
139                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                break;
140                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                break;
141                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                    break;
142                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                    break;
143                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                    break;
144                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                    break;
145                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);              break;
146                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);              break;
147                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);              break;
148                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);              break;
149                         case Shader::OPCODE_DP1:                dp1(d, s0, s1);                                 break;
150                         case Shader::OPCODE_DP2:                dp2(d, s0, s1);                                 break;
151                         case Shader::OPCODE_DP3:                dp3(d, s0, s1);                                 break;
152                         case Shader::OPCODE_DP4:                dp4(d, s0, s1);                                 break;
153                         case Shader::OPCODE_ATT:                att(d, s0, s1);                                 break;
154                         case Shader::OPCODE_EXP2X:              exp2x(d, s0, pp);                               break;
155                         case Shader::OPCODE_EXP2:               exp2(d, s0, pp);                                break;
156                         case Shader::OPCODE_EXPP:               expp(d, s0, version);                   break;
157                         case Shader::OPCODE_EXP:                exp(d, s0, pp);                                 break;
158                         case Shader::OPCODE_FRC:                frc(d, s0);                                             break;
159                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
160                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
161                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
162                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
163                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
164                         case Shader::OPCODE_LIT:                lit(d, s0);                                             break;
165                         case Shader::OPCODE_LOG2X:              log2x(d, s0, pp);                               break;
166                         case Shader::OPCODE_LOG2:               log2(d, s0, pp);                                break;
167                         case Shader::OPCODE_LOGP:               logp(d, s0, version);                   break;
168                         case Shader::OPCODE_LOG:                log(d, s0, pp);                                 break;
169                         case Shader::OPCODE_LRP:                lrp(d, s0, s1, s2);                             break;
170                         case Shader::OPCODE_STEP:               step(d, s0, s1);                                break;
171                         case Shader::OPCODE_SMOOTH:             smooth(d, s0, s1, s2);                  break;
172                         case Shader::OPCODE_M3X2:               M3X2(r, d, s0, src1);                   break;
173                         case Shader::OPCODE_M3X3:               M3X3(r, d, s0, src1);                   break;
174                         case Shader::OPCODE_M3X4:               M3X4(r, d, s0, src1);                   break;
175                         case Shader::OPCODE_M4X3:               M4X3(r, d, s0, src1);                   break;
176                         case Shader::OPCODE_M4X4:               M4X4(r, d, s0, src1);                   break;
177                         case Shader::OPCODE_MAD:                mad(d, s0, s1, s2);                             break;
178                         case Shader::OPCODE_MAX:                max(d, s0, s1);                                 break;
179                         case Shader::OPCODE_MIN:                min(d, s0, s1);                                 break;
180                         case Shader::OPCODE_MOV:                mov(d, s0, integer);                    break;
181                         case Shader::OPCODE_MOVA:               mov(d, s0);                                             break;
182                         case Shader::OPCODE_F2B:                f2b(d, s0);                                             break;
183                         case Shader::OPCODE_B2F:                b2f(d, s0);                                             break;
184                         case Shader::OPCODE_MUL:                mul(d, s0, s1);                                 break;
185                         case Shader::OPCODE_NRM2:               nrm2(d, s0, pp);                                break;
186                         case Shader::OPCODE_NRM3:               nrm3(d, s0, pp);                                break;
187                         case Shader::OPCODE_NRM4:               nrm4(d, s0, pp);                                break;
188                         case Shader::OPCODE_POWX:               powx(d, s0, s1, pp);                    break;
189                         case Shader::OPCODE_POW:                pow(d, s0, s1, pp);                             break;
190                         case Shader::OPCODE_RCPX:               rcpx(d, s0, pp);                                break;
191                         case Shader::OPCODE_DIV:                div(d, s0, s1);                                 break;
192                         case Shader::OPCODE_MOD:                mod(d, s0, s1);                                 break;
193                         case Shader::OPCODE_RSQX:               rsqx(d, s0, pp);                                break;
194                         case Shader::OPCODE_SQRT:               sqrt(d, s0, pp);                                break;
195                         case Shader::OPCODE_RSQ:                rsq(d, s0, pp);                                 break;
196                         case Shader::OPCODE_LEN2:               len2(d.x, s0, pp);                              break;
197                         case Shader::OPCODE_LEN3:               len3(d.x, s0, pp);                              break;
198                         case Shader::OPCODE_LEN4:               len4(d.x, s0, pp);                              break;
199                         case Shader::OPCODE_DIST1:              dist1(d.x, s0, s1, pp);                 break;
200                         case Shader::OPCODE_DIST2:              dist2(d.x, s0, s1, pp);                 break;
201                         case Shader::OPCODE_DIST3:              dist3(d.x, s0, s1, pp);                 break;
202                         case Shader::OPCODE_DIST4:              dist4(d.x, s0, s1, pp);                 break;
203                         case Shader::OPCODE_SGE:                step(d, s1, s0);                                break;
204                         case Shader::OPCODE_SGN:                sgn(d, s0);                                             break;
205                         case Shader::OPCODE_SINCOS:             sincos(d, s0, pp);                              break;
206                         case Shader::OPCODE_COS:                cos(d, s0, pp);                                 break;
207                         case Shader::OPCODE_SIN:                sin(d, s0, pp);                                 break;
208                         case Shader::OPCODE_TAN:                tan(d, s0);                                             break;
209                         case Shader::OPCODE_ACOS:               acos(d, s0);                                    break;
210                         case Shader::OPCODE_ASIN:               asin(d, s0);                                    break;
211                         case Shader::OPCODE_ATAN:               atan(d, s0);                                    break;
212                         case Shader::OPCODE_ATAN2:              atan2(d, s0, s1);                               break;
213                         case Shader::OPCODE_COSH:               cosh(d, s0, pp);                                break;
214                         case Shader::OPCODE_SINH:               sinh(d, s0, pp);                                break;
215                         case Shader::OPCODE_TANH:               tanh(d, s0, pp);                                break;
216                         case Shader::OPCODE_ACOSH:              acosh(d, s0, pp);                               break;
217                         case Shader::OPCODE_ASINH:              asinh(d, s0, pp);                               break;
218                         case Shader::OPCODE_ATANH:              atanh(d, s0, pp);                               break;
219                         case Shader::OPCODE_SLT:                slt(d, s0, s1);                                 break;
220                         case Shader::OPCODE_SUB:                sub(d, s0, s1);                                 break;
221                         case Shader::OPCODE_BREAK:              BREAK(r);                                               break;
222                         case Shader::OPCODE_BREAKC:             BREAKC(r, s0, s1, control);             break;
223                         case Shader::OPCODE_BREAKP:             BREAKP(r, src0);                                break;
224                         case Shader::OPCODE_CONTINUE:   CONTINUE(r);                                    break;
225                         case Shader::OPCODE_TEST:               TEST();                                                 break;
226                         case Shader::OPCODE_CALL:               CALL(r, dst.label, dst.callSite);         break;
227                         case Shader::OPCODE_CALLNZ:             CALLNZ(r, dst.label, dst.callSite, src0); break;
228                         case Shader::OPCODE_ELSE:               ELSE(r);                                                break;
229                         case Shader::OPCODE_ENDIF:              ENDIF(r);                                               break;
230                         case Shader::OPCODE_ENDLOOP:    ENDLOOP(r);                                             break;
231                         case Shader::OPCODE_ENDREP:             ENDREP(r);                                              break;
232                         case Shader::OPCODE_ENDWHILE:   ENDWHILE(r);                                    break;
233                         case Shader::OPCODE_IF:                 IF(r, src0);                                    break;
234                         case Shader::OPCODE_IFC:                IFC(r, s0, s1, control);                break;
235                         case Shader::OPCODE_LABEL:              LABEL(dst.index);                               break;
236                         case Shader::OPCODE_LOOP:               LOOP(r, src1);                                  break;
237                         case Shader::OPCODE_REP:                REP(r, src0);                                   break;
238                         case Shader::OPCODE_WHILE:              WHILE(r, src0);                                 break;
239                         case Shader::OPCODE_RET:                RET(r);                                                 break;
240                         case Shader::OPCODE_LEAVE:              LEAVE(r);                                               break;
241                         case Shader::OPCODE_CMP:                cmp(d, s0, s1, control);                break;
242                         case Shader::OPCODE_ICMP:               icmp(d, s0, s1, control);               break;
243                         case Shader::OPCODE_SELECT:             select(d, s0, s1, s2);                  break;
244                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                 break;
245                         case Shader::OPCODE_INSERT:             insert(d, s0, s1.x, s2.x);              break;
246                         case Shader::OPCODE_ALL:                all(d.x, s0);                                   break;
247                         case Shader::OPCODE_ANY:                any(d.x, s0);                                   break;
248                         case Shader::OPCODE_NOT:                not(d, s0);                                             break;
249                         case Shader::OPCODE_OR:                 or(d, s0, s1);                                  break;
250                         case Shader::OPCODE_XOR:                xor(d, s0, s1);                                 break;
251                         case Shader::OPCODE_AND:                and(d, s0, s1);                                 break;
252                         case Shader::OPCODE_TEXLDL:             TEXLDL(r, d, s0, src1);                 break;
253                         case Shader::OPCODE_TEX:                TEX(r, d, s0, src1);                    break;
254                         case Shader::OPCODE_END:                                                                                break;
255                         default:
256                                 ASSERT(false);
257                         }
258
259                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
260                         {
261                                 if(dst.integer)
262                                 {
263                                         switch(opcode)
264                                         {
265                                         case Shader::OPCODE_DIV:
266                                                 if(dst.x) d.x = Trunc(d.x);
267                                                 if(dst.y) d.y = Trunc(d.y);
268                                                 if(dst.z) d.z = Trunc(d.z);
269                                                 if(dst.w) d.w = Trunc(d.w);
270                                                 break;
271                                         default:
272                                                 break;   // No truncation to integer required when arguments are integer
273                                         }
274                                 }
275
276                                 if(dst.saturate)
277                                 {
278                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
279                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
280                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
281                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
282
283                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
284                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
285                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
286                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
287                                 }
288
289                                 if(instruction->isPredicated())
290                                 {
291                                         Vector4f pDst;   // FIXME: Rename
292
293                                         switch(dst.type)
294                                         {
295                                         case Shader::PARAMETER_VOID:                                                                                                                                            break;
296                                         case Shader::PARAMETER_TEMP:
297                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
298                                                 {
299                                                         if(dst.x) pDst.x = r.r[dst.index].x;
300                                                         if(dst.y) pDst.y = r.r[dst.index].y;
301                                                         if(dst.z) pDst.z = r.r[dst.index].z;
302                                                         if(dst.w) pDst.w = r.r[dst.index].w;
303                                                 }
304                                                 else
305                                                 {
306                                                         Int a = relativeAddress(r, dst);
307
308                                                         if(dst.x) pDst.x = r.r[dst.index + a].x;
309                                                         if(dst.y) pDst.y = r.r[dst.index + a].y;
310                                                         if(dst.z) pDst.z = r.r[dst.index + a].z;
311                                                         if(dst.w) pDst.w = r.r[dst.index + a].w;
312                                                 }
313                                                 break;
314                                         case Shader::PARAMETER_ADDR:            pDst = r.a0;                                                                                                    break;
315                                         case Shader::PARAMETER_RASTOUT:
316                                                 switch(dst.index)
317                                                 {
318                                                 case 0:
319                                                         if(dst.x) pDst.x = r.o[Pos].x;
320                                                         if(dst.y) pDst.y = r.o[Pos].y;
321                                                         if(dst.z) pDst.z = r.o[Pos].z;
322                                                         if(dst.w) pDst.w = r.o[Pos].w;
323                                                         break;
324                                                 case 1:
325                                                         pDst.x = r.o[Fog].x;
326                                                         break;
327                                                 case 2:
328                                                         pDst.x = r.o[Pts].y;
329                                                         break;
330                                                 default:
331                                                         ASSERT(false);
332                                                 }
333                                                 break;
334                                         case Shader::PARAMETER_ATTROUT:
335                                                 if(dst.x) pDst.x = r.o[D0 + dst.index].x;
336                                                 if(dst.y) pDst.y = r.o[D0 + dst.index].y;
337                                                 if(dst.z) pDst.z = r.o[D0 + dst.index].z;
338                                                 if(dst.w) pDst.w = r.o[D0 + dst.index].w;
339                                                 break;
340                                         case Shader::PARAMETER_TEXCRDOUT:
341                                 //      case Shader::PARAMETER_OUTPUT:
342                                                 if(version < 0x0300)
343                                                 {
344                                                         if(dst.x) pDst.x = r.o[T0 + dst.index].x;
345                                                         if(dst.y) pDst.y = r.o[T0 + dst.index].y;
346                                                         if(dst.z) pDst.z = r.o[T0 + dst.index].z;
347                                                         if(dst.w) pDst.w = r.o[T0 + dst.index].w;
348                                                 }
349                                                 else
350                                                 {
351                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
352                                                         {
353                                                                 if(dst.x) pDst.x = r.o[dst.index].x;
354                                                                 if(dst.y) pDst.y = r.o[dst.index].y;
355                                                                 if(dst.z) pDst.z = r.o[dst.index].z;
356                                                                 if(dst.w) pDst.w = r.o[dst.index].w;
357                                                         }
358                                                         else if(dst.rel.type == Shader::PARAMETER_LOOP)
359                                                         {
360                                                                 Int aL = r.aL[r.loopDepth];
361
362                                                                 if(dst.x) pDst.x = r.o[dst.index + aL].x;
363                                                                 if(dst.y) pDst.y = r.o[dst.index + aL].y;
364                                                                 if(dst.z) pDst.z = r.o[dst.index + aL].z;
365                                                                 if(dst.w) pDst.w = r.o[dst.index + aL].w;
366                                                         }
367                                                         else
368                                                         {
369                                                                 Int a = relativeAddress(r, dst);
370
371                                                                 if(dst.x) pDst.x = r.o[dst.index + a].x;
372                                                                 if(dst.y) pDst.y = r.o[dst.index + a].y;
373                                                                 if(dst.z) pDst.z = r.o[dst.index + a].z;
374                                                                 if(dst.w) pDst.w = r.o[dst.index + a].w;
375                                                         }
376                                                 }
377                                                 break;
378                                         case Shader::PARAMETER_LABEL:                                                                                                                                           break;
379                                         case Shader::PARAMETER_PREDICATE:       pDst = r.p0;                                                                                                    break;
380                                         case Shader::PARAMETER_INPUT:                                                                                                                                           break;
381                                         default:
382                                                 ASSERT(false);
383                                         }
384
385                                         Int4 enable = enableMask(r, instruction);
386
387                                         Int4 xEnable = enable;
388                                         Int4 yEnable = enable;
389                                         Int4 zEnable = enable;
390                                         Int4 wEnable = enable;
391
392                                         if(predicate)
393                                         {
394                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
395
396                                                 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
397                                                 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
398                                                 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
399                                                 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
400
401                                                 if(!instruction->predicateNot)
402                                                 {
403                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
404                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
405                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
406                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
407                                                 }
408                                                 else
409                                                 {
410                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
411                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
412                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
413                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
414                                                 }
415                                         }
416
417                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
418                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
419                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
420                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
421
422                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
423                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
424                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
425                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
426                                 }
427
428                                 switch(dst.type)
429                                 {
430                                 case Shader::PARAMETER_VOID:
431                                         break;
432                                 case Shader::PARAMETER_TEMP:
433                                         if(dst.rel.type == Shader::PARAMETER_VOID)
434                                         {
435                                                 if(dst.x) r.r[dst.index].x = d.x;
436                                                 if(dst.y) r.r[dst.index].y = d.y;
437                                                 if(dst.z) r.r[dst.index].z = d.z;
438                                                 if(dst.w) r.r[dst.index].w = d.w;
439                                         }
440                                         else
441                                         {
442                                                 Int a = relativeAddress(r, dst);
443
444                                                 if(dst.x) r.r[dst.index + a].x = d.x;
445                                                 if(dst.y) r.r[dst.index + a].y = d.y;
446                                                 if(dst.z) r.r[dst.index + a].z = d.z;
447                                                 if(dst.w) r.r[dst.index + a].w = d.w;
448                                         }
449                                         break;
450                                 case Shader::PARAMETER_ADDR:
451                                         if(dst.x) r.a0.x = d.x;
452                                         if(dst.y) r.a0.y = d.y;
453                                         if(dst.z) r.a0.z = d.z;
454                                         if(dst.w) r.a0.w = d.w;
455                                         break;
456                                 case Shader::PARAMETER_RASTOUT:
457                                         switch(dst.index)
458                                         {
459                                         case 0:
460                                                 if(dst.x) r.o[Pos].x = d.x;
461                                                 if(dst.y) r.o[Pos].y = d.y;
462                                                 if(dst.z) r.o[Pos].z = d.z;
463                                                 if(dst.w) r.o[Pos].w = d.w;
464                                                 break;
465                                         case 1:
466                                                 r.o[Fog].x = d.x;
467                                                 break;
468                                         case 2:         
469                                                 r.o[Pts].y = d.x;
470                                                 break;
471                                         default:        ASSERT(false);
472                                         }
473                                         break;
474                                 case Shader::PARAMETER_ATTROUT: 
475                                         if(dst.x) r.o[D0 + dst.index].x = d.x;
476                                         if(dst.y) r.o[D0 + dst.index].y = d.y;
477                                         if(dst.z) r.o[D0 + dst.index].z = d.z;
478                                         if(dst.w) r.o[D0 + dst.index].w = d.w;
479                                         break;
480                                 case Shader::PARAMETER_TEXCRDOUT:
481                         //      case Shader::PARAMETER_OUTPUT:
482                                         if(version < 0x0300)
483                                         {
484                                                 if(dst.x) r.o[T0 + dst.index].x = d.x;
485                                                 if(dst.y) r.o[T0 + dst.index].y = d.y;
486                                                 if(dst.z) r.o[T0 + dst.index].z = d.z;
487                                                 if(dst.w) r.o[T0 + dst.index].w = d.w;
488                                         }
489                                         else
490                                         {
491                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
492                                                 {
493                                                         if(dst.x) r.o[dst.index].x = d.x;
494                                                         if(dst.y) r.o[dst.index].y = d.y;
495                                                         if(dst.z) r.o[dst.index].z = d.z;
496                                                         if(dst.w) r.o[dst.index].w = d.w;
497                                                 }
498                                                 else if(dst.rel.type == Shader::PARAMETER_LOOP)
499                                                 {
500                                                         Int aL = r.aL[r.loopDepth];
501
502                                                         if(dst.x) r.o[dst.index + aL].x = d.x;
503                                                         if(dst.y) r.o[dst.index + aL].y = d.y;
504                                                         if(dst.z) r.o[dst.index + aL].z = d.z;
505                                                         if(dst.w) r.o[dst.index + aL].w = d.w;
506                                                 }
507                                                 else
508                                                 {
509                                                         Int a = relativeAddress(r, dst);
510
511                                                         if(dst.x) r.o[dst.index + a].x = d.x;
512                                                         if(dst.y) r.o[dst.index + a].y = d.y;
513                                                         if(dst.z) r.o[dst.index + a].z = d.z;
514                                                         if(dst.w) r.o[dst.index + a].w = d.w;
515                                                 }
516                                         }
517                                         break;
518                                 case Shader::PARAMETER_LABEL:                                                                                                                                           break;
519                                 case Shader::PARAMETER_PREDICATE:       r.p0 = d;                                                                                                               break;
520                                 case Shader::PARAMETER_INPUT:                                                                                                                                           break;
521                                 default:
522                                         ASSERT(false);
523                                 }
524                         }
525                 }
526
527                 if(currentLabel != -1)
528                 {
529                         Nucleus::setInsertBlock(returnBlock);
530                 }
531         }
532
533         void VertexProgram::passThrough(Registers &r)
534         {
535                 if(shader)
536                 {
537                         for(int i = 0; i < 12; i++)
538                         {
539                                 unsigned char usage = shader->output[i][0].usage;
540
541                                 switch(usage)
542                                 {
543                                 case 0xFF:
544                                         continue;
545                                 case Shader::USAGE_PSIZE:
546                                         r.o[i].y = r.v[i].x;
547                                         break;
548                                 case Shader::USAGE_TEXCOORD:
549                                         r.o[i].x = r.v[i].x;
550                                         r.o[i].y = r.v[i].y;
551                                         r.o[i].z = r.v[i].z;
552                                         r.o[i].w = r.v[i].w;
553                                         break;
554                                 case Shader::USAGE_POSITION:
555                                         r.o[i].x = r.v[i].x;
556                                         r.o[i].y = r.v[i].y;
557                                         r.o[i].z = r.v[i].z;
558                                         r.o[i].w = r.v[i].w;
559                                         break;
560                                 case Shader::USAGE_COLOR:
561                                         r.o[i].x = r.v[i].x;
562                                         r.o[i].y = r.v[i].y;
563                                         r.o[i].z = r.v[i].z;
564                                         r.o[i].w = r.v[i].w;
565                                         break;
566                                 case Shader::USAGE_FOG:
567                                         r.o[i].x = r.v[i].x;
568                                         break;
569                                 default:
570                                         ASSERT(false);
571                                 }
572                         }
573                 }
574                 else
575                 {
576                         r.o[Pos].x = r.v[PositionT].x;
577                         r.o[Pos].y = r.v[PositionT].y;
578                         r.o[Pos].z = r.v[PositionT].z;
579                         r.o[Pos].w = r.v[PositionT].w;
580
581                         for(int i = 0; i < 2; i++)
582                         {
583                                 r.o[D0 + i].x = r.v[Color0 + i].x;
584                                 r.o[D0 + i].y = r.v[Color0 + i].y;
585                                 r.o[D0 + i].z = r.v[Color0 + i].z;
586                                 r.o[D0 + i].w = r.v[Color0 + i].w;
587                         }
588
589                         for(int i = 0; i < 8; i++)
590                         {
591                                 r.o[T0 + i].x = r.v[TexCoord0 + i].x;
592                                 r.o[T0 + i].y = r.v[TexCoord0 + i].y;
593                                 r.o[T0 + i].z = r.v[TexCoord0 + i].z;
594                                 r.o[T0 + i].w = r.v[TexCoord0 + i].w;
595                         }
596
597                         r.o[Pts].y = r.v[PointSize].x;
598                 }
599         }
600
601         Vector4f VertexProgram::fetchRegisterF(Registers &r, const Src &src, int offset)
602         {
603                 int i = src.index + offset;
604
605                 Vector4f reg;
606
607                 switch(src.type)
608                 {
609                 case Shader::PARAMETER_TEMP:
610                         if(src.rel.type == Shader::PARAMETER_VOID)
611                         {
612                                 reg = r.r[i];
613                         }
614                         else
615                         {
616                                 reg = r.r[i + relativeAddress(r, src)];
617                         }
618                         break;
619                 case Shader::PARAMETER_CONST:
620                         reg = readConstant(r, src, offset);
621                         break;
622                 case Shader::PARAMETER_INPUT:
623             if(src.rel.type == Shader::PARAMETER_VOID)
624                         {
625                                 reg = r.v[i];
626                         }
627                         else
628                         {
629                                 reg = r.v[i + relativeAddress(r, src)];
630                         }
631             break;
632                 case Shader::PARAMETER_VOID:                    return r.r[0];   // Dummy
633                 case Shader::PARAMETER_FLOAT4LITERAL:
634                         reg.x = Float4(src.value[0]);
635                         reg.y = Float4(src.value[1]);
636                         reg.z = Float4(src.value[2]);
637                         reg.w = Float4(src.value[3]);
638                         break;
639                 case Shader::PARAMETER_ADDR:                    reg = r.a0;             break;
640                 case Shader::PARAMETER_CONSTBOOL:               return r.r[0];   // Dummy
641                 case Shader::PARAMETER_CONSTINT:                return r.r[0];   // Dummy
642                 case Shader::PARAMETER_LOOP:                    return r.r[0];   // Dummy
643                 case Shader::PARAMETER_PREDICATE:               return r.r[0];   // Dummy
644                 case Shader::PARAMETER_SAMPLER:
645                         if(src.rel.type == Shader::PARAMETER_VOID)
646                         {
647                                 reg.x = As<Float4>(Int4(i));
648                         }
649                         else if(src.rel.type == Shader::PARAMETER_TEMP)
650                         {
651                                 reg.x = As<Float4>(Int4(i) + RoundInt(r.r[src.rel.index].x));
652                         }
653                         return reg;
654                 case Shader::PARAMETER_OUTPUT:
655             if(src.rel.type == Shader::PARAMETER_VOID)
656                         {
657                                 reg = r.o[i];
658                         }
659                         else
660                         {
661                                 reg = r.o[i + relativeAddress(r, src)];
662                         }
663                         break;
664                 case Shader::PARAMETER_MISCTYPE:
665                         reg.x = Float(r.instanceID);
666                         return reg;
667                 default:
668                         ASSERT(false);
669                 }
670
671                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
672                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
673                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
674                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
675
676                 Vector4f mod;
677
678                 switch(src.modifier)
679                 {
680                 case Shader::MODIFIER_NONE:
681                         mod.x = x;
682                         mod.y = y;
683                         mod.z = z;
684                         mod.w = w;
685                         break;
686                 case Shader::MODIFIER_NEGATE:
687                         mod.x = -x;
688                         mod.y = -y;
689                         mod.z = -z;
690                         mod.w = -w;
691                         break;
692                 case Shader::MODIFIER_ABS:
693                         mod.x = Abs(x);
694                         mod.y = Abs(y);
695                         mod.z = Abs(z);
696                         mod.w = Abs(w);
697                         break;
698                 case Shader::MODIFIER_ABS_NEGATE:
699                         mod.x = -Abs(x);
700                         mod.y = -Abs(y);
701                         mod.z = -Abs(z);
702                         mod.w = -Abs(w);
703                         break;
704                 case Shader::MODIFIER_NOT:
705                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
706                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
707                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
708                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
709                         break;
710                 default:
711                         ASSERT(false);
712                 }
713
714                 return mod;
715         }
716
717         Vector4f VertexProgram::readConstant(Registers &r, const Src &src, int offset)
718         {
719                 Vector4f c;
720
721                 int i = src.index + offset;
722
723                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
724                 {
725                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]));
726
727                         c.x = c.x.xxxx;
728                         c.y = c.y.yyyy;
729                         c.z = c.z.zzzz;
730                         c.w = c.w.wwww;
731
732                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
733                         {
734                                 for(size_t j = 0; j < shader->getLength(); j++)
735                                 {
736                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
737
738                                         if(instruction.opcode == Shader::OPCODE_DEF)
739                                         {
740                                                 if(instruction.dst.index == i)
741                                                 {
742                                                         c.x = Float4(instruction.src[0].value[0]);
743                                                         c.y = Float4(instruction.src[0].value[1]);
744                                                         c.z = Float4(instruction.src[0].value[2]);
745                                                         c.w = Float4(instruction.src[0].value[3]);
746
747                                                         break;
748                                                 }
749                                         }
750                                 }
751                         }
752                 }
753                 else if(src.rel.type == Shader::PARAMETER_LOOP)
754                 {
755                         Int loopCounter = r.aL[r.loopDepth];
756
757                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16);
758
759                         c.x = c.x.xxxx;
760                         c.y = c.y.yyyy;
761                         c.z = c.z.zzzz;
762                         c.w = c.w.wwww;
763                 }
764                 else
765                 {
766                         if(src.rel.deterministic)
767                         {
768                                 Int a = relativeAddress(r, src);
769                         
770                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + a * 16);
771
772                                 c.x = c.x.xxxx;
773                                 c.y = c.y.yyyy;
774                                 c.z = c.z.zzzz;
775                                 c.w = c.w.wwww;
776                         }
777                         else
778                         {
779                                 int component = src.rel.swizzle & 0x03;
780                                 Float4 a;
781
782                                 switch(src.rel.type)
783                                 {
784                                 case Shader::PARAMETER_ADDR:   a = r.a0[component]; break;
785                                 case Shader::PARAMETER_TEMP:   a = r.r[src.rel.index][component]; break;
786                                 case Shader::PARAMETER_INPUT:  a = r.v[src.rel.index][component]; break;
787                                 case Shader::PARAMETER_OUTPUT: a = r.o[src.rel.index][component]; break;
788                                 case Shader::PARAMETER_CONST:  a = *Pointer<Float>(r.data + OFFSET(DrawData,vs.c[src.rel.index][component])); break;
789                                 default: ASSERT(false);
790                                 }
791
792                                 Int4 index = Int4(i) + RoundInt(a) * Int4(src.rel.scale);
793
794                                 index = Min(As<UInt4>(index), UInt4(256));   // Clamp to constant register range, c[256] = {0, 0, 0, 0}
795                                 
796                                 Int index0 = Extract(index, 0);
797                                 Int index1 = Extract(index, 1);
798                                 Int index2 = Extract(index, 2);
799                                 Int index3 = Extract(index, 3);
800
801                                 c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16);
802                                 c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16);
803                                 c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16);
804                                 c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16);
805
806                                 transpose4x4(c.x, c.y, c.z, c.w);
807                         }
808                 }
809
810                 return c;
811         }
812
813         Int VertexProgram::relativeAddress(Registers &r, const Shader::Parameter &var)
814         {
815                 ASSERT(var.rel.deterministic);
816
817                 if(var.rel.type == Shader::PARAMETER_TEMP)
818                 {
819                         return RoundInt(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale;
820                 }
821                 else if(var.rel.type == Shader::PARAMETER_INPUT)
822                 {
823                         return RoundInt(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale;
824                 }
825                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
826                 {
827                         return RoundInt(Extract(r.o[var.rel.index].x, 0)) * var.rel.scale;
828                 }
829                 else if(var.rel.type == Shader::PARAMETER_CONST)
830                 {
831                         RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[var.rel.index]));
832
833                         return RoundInt(Extract(c, 0)) * var.rel.scale;
834                 }
835                 else ASSERT(false);
836
837                 return 0;
838         }
839
840         Int4 VertexProgram::enableMask(Registers &r, const Shader::Instruction *instruction)
841         {
842                 Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
843                 
844                 if(!whileTest)
845                 {
846                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
847                         {
848                                 enable &= r.enableBreak;
849                         }
850
851                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
852                         {
853                                 enable &= r.enableContinue;
854                         }
855
856                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
857                         {
858                                 enable &= r.enableLeave;
859                         }
860                 }
861
862                 return enable;
863         }
864
865         void VertexProgram::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
866         {
867                 Vector4f row0 = fetchRegisterF(r, src1, 0);
868                 Vector4f row1 = fetchRegisterF(r, src1, 1);
869
870                 dst.x = dot3(src0, row0);
871                 dst.y = dot3(src0, row1);
872         }
873
874         void VertexProgram::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
875         {
876                 Vector4f row0 = fetchRegisterF(r, src1, 0);
877                 Vector4f row1 = fetchRegisterF(r, src1, 1);
878                 Vector4f row2 = fetchRegisterF(r, src1, 2);
879
880                 dst.x = dot3(src0, row0);
881                 dst.y = dot3(src0, row1);
882                 dst.z = dot3(src0, row2);
883         }
884
885         void VertexProgram::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
886         {
887                 Vector4f row0 = fetchRegisterF(r, src1, 0);
888                 Vector4f row1 = fetchRegisterF(r, src1, 1);
889                 Vector4f row2 = fetchRegisterF(r, src1, 2);
890                 Vector4f row3 = fetchRegisterF(r, src1, 3);
891
892                 dst.x = dot3(src0, row0);
893                 dst.y = dot3(src0, row1);
894                 dst.z = dot3(src0, row2);
895                 dst.w = dot3(src0, row3);
896         }
897
898         void VertexProgram::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
899         {
900                 Vector4f row0 = fetchRegisterF(r, src1, 0);
901                 Vector4f row1 = fetchRegisterF(r, src1, 1);
902                 Vector4f row2 = fetchRegisterF(r, src1, 2);
903
904                 dst.x = dot4(src0, row0);
905                 dst.y = dot4(src0, row1);
906                 dst.z = dot4(src0, row2);
907         }
908
909         void VertexProgram::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
910         {
911                 Vector4f row0 = fetchRegisterF(r, src1, 0);
912                 Vector4f row1 = fetchRegisterF(r, src1, 1);
913                 Vector4f row2 = fetchRegisterF(r, src1, 2);
914                 Vector4f row3 = fetchRegisterF(r, src1, 3);
915
916                 dst.x = dot4(src0, row0);
917                 dst.y = dot4(src0, row1);
918                 dst.z = dot4(src0, row2);
919                 dst.w = dot4(src0, row3);
920         }
921
922         void VertexProgram::BREAK(Registers &r)
923         {
924                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
925                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
926
927                 if(breakDepth == 0)
928                 {
929                         r.enableIndex = r.enableIndex - breakDepth;
930                         Nucleus::createBr(endBlock);
931                 }
932                 else
933                 {
934                         r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
935                         Bool allBreak = SignMask(r.enableBreak) == 0x0;
936
937                         r.enableIndex = r.enableIndex - breakDepth;
938                         branch(allBreak, endBlock, deadBlock);
939                 }
940
941                 Nucleus::setInsertBlock(deadBlock);
942                 r.enableIndex = r.enableIndex + breakDepth;
943         }
944
945         void VertexProgram::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
946         {
947                 Int4 condition;
948
949                 switch(control)
950                 {
951                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
952                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
953                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
954                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
955                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
956                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
957                 default:
958                         ASSERT(false);
959                 }
960
961                 BREAK(r, condition);
962         }
963
964         void VertexProgram::BREAKP(Registers &r, const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
965         {
966                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
967
968                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
969                 {
970                         condition = ~condition;
971                 }
972
973                 BREAK(r, condition);
974         }
975
976         void VertexProgram::BREAK(Registers &r, Int4 &condition)
977         {
978                 condition &= r.enableStack[r.enableIndex];
979
980                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
981                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
982
983                 r.enableBreak = r.enableBreak & ~condition;
984                 Bool allBreak = SignMask(r.enableBreak) == 0x0;
985
986                 r.enableIndex = r.enableIndex - breakDepth;
987                 branch(allBreak, endBlock, continueBlock);
988
989                 Nucleus::setInsertBlock(continueBlock);
990                 r.enableIndex = r.enableIndex + breakDepth;
991         }
992
993         void VertexProgram::CONTINUE(Registers &r)
994         {
995                 r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
996         }
997
998         void VertexProgram::TEST()
999         {
1000                 whileTest = true;
1001         }
1002
1003         void VertexProgram::CALL(Registers &r, int labelIndex, int callSiteIndex)
1004         {
1005                 if(!labelBlock[labelIndex])
1006                 {
1007                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1008                 }
1009
1010                 if(callRetBlock[labelIndex].size() > 1)
1011                 {
1012                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1013                 }
1014
1015                 Int4 restoreLeave = r.enableLeave;
1016
1017                 Nucleus::createBr(labelBlock[labelIndex]);
1018                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1019
1020                 r.enableLeave = restoreLeave;
1021         }
1022
1023         void VertexProgram::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
1024         {
1025                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1026                 {
1027                         CALLNZb(r, labelIndex, callSiteIndex, src);
1028                 }
1029                 else if(src.type == Shader::PARAMETER_PREDICATE)
1030                 {
1031                         CALLNZp(r, labelIndex, callSiteIndex, src);
1032                 }
1033                 else ASSERT(false);
1034         }
1035
1036         void VertexProgram::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
1037         {
1038                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1039                 
1040                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1041                 {
1042                         condition = !condition; 
1043                 }
1044
1045                 if(!labelBlock[labelIndex])
1046                 {
1047                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1048                 }
1049
1050                 if(callRetBlock[labelIndex].size() > 1)
1051                 {
1052                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1053                 }
1054
1055                 Int4 restoreLeave = r.enableLeave;
1056
1057                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1058                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1059
1060                 r.enableLeave = restoreLeave;
1061         }
1062
1063         void VertexProgram::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
1064         {
1065                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1066
1067                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1068                 {
1069                         condition = ~condition;
1070                 }
1071
1072                 condition &= r.enableStack[r.enableIndex];
1073
1074                 if(!labelBlock[labelIndex])
1075                 {
1076                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1077                 }
1078
1079                 if(callRetBlock[labelIndex].size() > 1)
1080                 {
1081                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1082                 }
1083
1084                 r.enableIndex++;
1085                 r.enableStack[r.enableIndex] = condition;
1086                 Int4 restoreLeave = r.enableLeave;
1087
1088                 Bool notAllFalse = SignMask(condition) != 0;
1089                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1090                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1091
1092                 r.enableIndex--;
1093                 r.enableLeave = restoreLeave;
1094         }
1095
1096         void VertexProgram::ELSE(Registers &r)
1097         {
1098                 ifDepth--;
1099
1100                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1101                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1102
1103                 if(isConditionalIf[ifDepth])
1104                 {
1105                         Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1106                         Bool notAllFalse = SignMask(condition) != 0;
1107
1108                         branch(notAllFalse, falseBlock, endBlock);
1109
1110                         r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1111                 }
1112                 else
1113                 {
1114                         Nucleus::createBr(endBlock);
1115                         Nucleus::setInsertBlock(falseBlock);
1116                 }
1117
1118                 ifFalseBlock[ifDepth] = endBlock;
1119
1120                 ifDepth++;
1121         }
1122
1123         void VertexProgram::ENDIF(Registers &r)
1124         {
1125                 ifDepth--;
1126
1127                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1128
1129                 Nucleus::createBr(endBlock);
1130                 Nucleus::setInsertBlock(endBlock);
1131
1132                 if(isConditionalIf[ifDepth])
1133                 {
1134                         breakDepth--;
1135                         r.enableIndex--;
1136                 }
1137         }
1138
1139         void VertexProgram::ENDLOOP(Registers &r)
1140         {
1141                 loopRepDepth--;
1142
1143                 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth];   // FIXME: +=
1144
1145                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1146                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1147
1148                 Nucleus::createBr(testBlock);
1149                 Nucleus::setInsertBlock(endBlock);
1150
1151                 r.loopDepth--;
1152                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1153         }
1154
1155         void VertexProgram::ENDREP(Registers &r)
1156         {
1157                 loopRepDepth--;
1158
1159                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1160                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1161
1162                 Nucleus::createBr(testBlock);
1163                 Nucleus::setInsertBlock(endBlock);
1164
1165                 r.loopDepth--;
1166                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1167         }
1168
1169         void VertexProgram::ENDWHILE(Registers &r)
1170         {
1171                 loopRepDepth--;
1172
1173                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1174                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1175
1176                 Nucleus::createBr(testBlock);
1177                 Nucleus::setInsertBlock(endBlock);
1178
1179                 r.enableIndex--;
1180                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1181                 whileTest = false;
1182         }
1183
1184         void VertexProgram::IF(Registers &r, const Src &src)
1185         {
1186                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1187                 {
1188                         IFb(r, src);
1189                 }
1190                 else if(src.type == Shader::PARAMETER_PREDICATE)
1191                 {
1192                         IFp(r, src);
1193                 }
1194                 else
1195                 {
1196                         Int4 condition = As<Int4>(fetchRegisterF(r, src).x);
1197                         IF(r, condition);
1198                 }
1199         }
1200
1201         void VertexProgram::IFb(Registers &r, const Src &boolRegister)
1202         {
1203                 ASSERT(ifDepth < 24 + 4);
1204
1205                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1206
1207                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1208                 {
1209                         condition = !condition;
1210                 }
1211
1212                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1213                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1214
1215                 branch(condition, trueBlock, falseBlock);
1216
1217                 isConditionalIf[ifDepth] = false;
1218                 ifFalseBlock[ifDepth] = falseBlock;
1219
1220                 ifDepth++;
1221         }
1222
1223         void VertexProgram::IFp(Registers &r, const Src &predicateRegister)
1224         {
1225                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1226
1227                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1228                 {
1229                         condition = ~condition;
1230                 }
1231
1232                 IF(r, condition);
1233         }
1234
1235         void VertexProgram::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
1236         {
1237                 Int4 condition;
1238
1239                 switch(control)
1240                 {
1241                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1242                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1243                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1244                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1245                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1246                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1247                 default:
1248                         ASSERT(false);
1249                 }
1250
1251                 IF(r, condition);
1252         }
1253
1254         void VertexProgram::IF(Registers &r, Int4 &condition)
1255         {
1256                 condition &= r.enableStack[r.enableIndex];
1257
1258                 r.enableIndex++;
1259                 r.enableStack[r.enableIndex] = condition;
1260
1261                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1262                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1263
1264                 Bool notAllFalse = SignMask(condition) != 0;
1265
1266                 branch(notAllFalse, trueBlock, falseBlock);
1267
1268                 isConditionalIf[ifDepth] = true;
1269                 ifFalseBlock[ifDepth] = falseBlock;
1270
1271                 ifDepth++;
1272                 breakDepth++;
1273         }
1274
1275         void VertexProgram::LABEL(int labelIndex)
1276         {
1277                 if(!labelBlock[labelIndex])
1278                 {
1279                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1280                 }
1281
1282                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1283                 currentLabel = labelIndex;
1284         }
1285
1286         void VertexProgram::LOOP(Registers &r, const Src &integerRegister)
1287         {
1288                 r.loopDepth++;
1289
1290                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1291                 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1292                 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1293
1294                 // FIXME: Compiles to two instructions?
1295                 If(r.increment[r.loopDepth] == 0)
1296                 {
1297                         r.increment[r.loopDepth] = 1;
1298                 }
1299
1300                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1301                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1302                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1303
1304                 loopRepTestBlock[loopRepDepth] = testBlock;
1305                 loopRepEndBlock[loopRepDepth] = endBlock;
1306
1307                 // FIXME: jump(testBlock)
1308                 Nucleus::createBr(testBlock);
1309                 Nucleus::setInsertBlock(testBlock);
1310
1311                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1312                 Nucleus::setInsertBlock(loopBlock);
1313
1314                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1315                 
1316                 loopRepDepth++;
1317                 breakDepth = 0;
1318         }
1319
1320         void VertexProgram::REP(Registers &r, const Src &integerRegister)
1321         {
1322                 r.loopDepth++;
1323
1324                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1325                 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
1326
1327                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1328                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1329                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1330
1331                 loopRepTestBlock[loopRepDepth] = testBlock;
1332                 loopRepEndBlock[loopRepDepth] = endBlock;
1333
1334                 // FIXME: jump(testBlock)
1335                 Nucleus::createBr(testBlock);
1336                 Nucleus::setInsertBlock(testBlock);
1337
1338                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1339                 Nucleus::setInsertBlock(loopBlock);
1340
1341                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1342
1343                 loopRepDepth++;
1344                 breakDepth = 0;
1345         }
1346
1347         void VertexProgram::WHILE(Registers &r, const Src &temporaryRegister)
1348         {
1349                 r.enableIndex++;
1350
1351                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1352                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1353                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1354                 
1355                 loopRepTestBlock[loopRepDepth] = testBlock;
1356                 loopRepEndBlock[loopRepDepth] = endBlock;
1357
1358                 Int4 restoreBreak = r.enableBreak;
1359                 Int4 restoreContinue = r.enableContinue;
1360
1361                 // FIXME: jump(testBlock)
1362                 Nucleus::createBr(testBlock);
1363                 Nucleus::setInsertBlock(testBlock);
1364                 r.enableContinue = restoreContinue;
1365
1366                 const Vector4f &src = fetchRegisterF(r, temporaryRegister);
1367                 Int4 condition = As<Int4>(src.x);
1368                 condition &= r.enableStack[r.enableIndex - 1];
1369                 r.enableStack[r.enableIndex] = condition;
1370
1371                 Bool notAllFalse = SignMask(condition) != 0;
1372                 branch(notAllFalse, loopBlock, endBlock);
1373                 
1374                 Nucleus::setInsertBlock(endBlock);
1375                 r.enableBreak = restoreBreak;
1376                 
1377                 Nucleus::setInsertBlock(loopBlock);
1378
1379                 loopRepDepth++;
1380                 breakDepth = 0;
1381         }
1382
1383         void VertexProgram::RET(Registers &r)
1384         {
1385                 if(currentLabel == -1)
1386                 {
1387                         returnBlock = Nucleus::createBasicBlock();
1388                         Nucleus::createBr(returnBlock);
1389                 }
1390                 else
1391                 {
1392                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1393
1394                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1395                         {
1396                                 // FIXME: Encapsulate
1397                                 UInt index = r.callStack[--r.stackIndex];
1398  
1399                                 llvm::Value *value = index.loadValue();
1400                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1401
1402                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1403                                 {
1404                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1405                                 }
1406                         }
1407                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1408                         {
1409                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1410                         }
1411                         else   // Function isn't called
1412                         {
1413                                 Nucleus::createBr(unreachableBlock);
1414                         }
1415
1416                         Nucleus::setInsertBlock(unreachableBlock);
1417                         Nucleus::createUnreachable();
1418                 }
1419         }
1420
1421         void VertexProgram::LEAVE(Registers &r)
1422         {
1423                 r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
1424
1425                 // FIXME: Return from function if all instances left
1426                 // FIXME: Use enableLeave in other control-flow constructs
1427         }
1428
1429         void VertexProgram::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1430         {
1431                 Vector4f tmp;
1432                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w);
1433
1434                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1435                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1436                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1437                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1438         }
1439
1440         void VertexProgram::TEX(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1441         {
1442                 Float4 lod = Float4(0.0f);
1443                 Vector4f tmp;
1444                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, lod);
1445
1446                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1447                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1448                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1449                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1450         }
1451
1452         void VertexProgram::sampleTexture(Registers &r, Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q)
1453         {
1454                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1455                 {
1456                         Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
1457                         sampler[s.index]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);        
1458                 }
1459                 else
1460                 {
1461                         Int index = As<Int>(Float(fetchRegisterF(r, s).x.x));
1462
1463                         for(int i = 0; i < 16; i++)
1464                         {
1465                                 if(shader->usesSampler(i))
1466                                 {
1467                                         If(index == i)
1468                                         {
1469                                                 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
1470                                                 sampler[i]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);
1471                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1472                                         }
1473                                 }
1474                         }
1475                 }
1476         }
1477 }