OSDN Git Service

Update to June 11 2013 code drop.
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2013 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "VertexProgram.hpp"
13
14 #include "Renderer.hpp"
15 #include "VertexShader.hpp"
16 #include "Vertex.hpp"
17 #include "Half.hpp"
18 #include "SamplerCore.hpp"
19 #include "Debug.hpp"
20
21 extern bool localShaderConstants;
22
23 namespace sw
24 {
25         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) : VertexRoutine(state, shader)
26         {
27                 ifDepth = 0;
28                 loopRepDepth = 0;
29                 breakDepth = 0;
30                 currentLabel = -1;
31                 whileTest = false;
32
33                 for(int i = 0; i < 2048; i++)
34                 {
35                         labelBlock[i] = 0;
36                 }
37         }
38
39         VertexProgram::~VertexProgram()
40         {
41                 for(int i = 0; i < 4; i++)
42                 {
43                         delete sampler[i];
44                 }
45         }
46
47         void VertexProgram::pipeline(Registers &r)
48         {
49                 for(int i = 0; i < 4; i++)
50                 {
51                         sampler[i] = new SamplerCore(r.constants, state.samplerState[i]);
52                 }
53
54                 if(!state.preTransformed)
55                 {
56                         program(r);
57                 }
58                 else
59                 {
60                         passThrough(r);
61                 }
62         }
63
64         void VertexProgram::program(Registers &r)
65         {
66         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
67
68                 unsigned short version = shader->getVersion();
69
70                 r.enableIndex = 0;
71                 r.stackIndex = 0;
72
73                 // Create all call site return blocks up front
74                 for(int i = 0; i < shader->getLength(); i++)
75                 {
76                         const Shader::Instruction *instruction = shader->getInstruction(i);
77                         Shader::Opcode opcode = instruction->opcode;
78
79                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
80                         {
81                                 const Dst &dst = instruction->dst;
82
83                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
84                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
85                         }
86                 }
87         
88                 for(int i = 0; i < shader->getLength(); i++)
89                 {
90                         const Shader::Instruction *instruction = shader->getInstruction(i);
91                         Shader::Opcode opcode = instruction->opcode;
92
93                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
94                         {
95                                 continue;
96                         }
97
98                         Dst dst = instruction->dst;
99                         Src src0 = instruction->src[0];
100                         Src src1 = instruction->src[1];
101                         Src src2 = instruction->src[2];
102
103                         bool predicate = instruction->predicate;
104                         int size = shader->size(opcode);
105                         Usage usage = instruction->usage;
106                         unsigned char usageIndex = instruction->usageIndex;
107                         Control control = instruction->control;
108                         bool integer = dst.type == Shader::PARAMETER_ADDR;
109                         bool pp = dst.partialPrecision;
110
111                         Vector4f d;
112                         Vector4f s0;
113                         Vector4f s1;
114                         Vector4f s2;
115
116                         if(src0.type != Shader::PARAMETER_VOID) s0 = reg(r, src0);
117                         if(src1.type != Shader::PARAMETER_VOID) s1 = reg(r, src1);
118                         if(src2.type != Shader::PARAMETER_VOID) s2 = reg(r, src2);
119
120                         switch(opcode)
121                         {
122                         case Shader::OPCODE_VS_1_0:                                                                             break;
123                         case Shader::OPCODE_VS_1_1:                                                                             break;
124                         case Shader::OPCODE_VS_2_0:                                                                             break;
125                         case Shader::OPCODE_VS_2_x:                                                                             break;
126                         case Shader::OPCODE_VS_2_sw:                                                                    break;
127                         case Shader::OPCODE_VS_3_0:                                                                             break;
128                         case Shader::OPCODE_VS_3_sw:                                                                    break;
129                         case Shader::OPCODE_DCL:                                                                                break;
130                         case Shader::OPCODE_DEF:                                                                                break;
131                         case Shader::OPCODE_DEFI:                                                                               break;
132                         case Shader::OPCODE_DEFB:                                                                               break;
133                         case Shader::OPCODE_NOP:                                                                                break;
134                         case Shader::OPCODE_ABS:                abs(d, s0);                                             break;
135                         case Shader::OPCODE_ADD:                add(d, s0, s1);                                 break;
136                         case Shader::OPCODE_CRS:                crs(d, s0, s1);                                 break;
137                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                break;
138                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                break;
139                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                break;
140                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                break;
141                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                    break;
142                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                    break;
143                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                    break;
144                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                    break;
145                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);              break;
146                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);              break;
147                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);              break;
148                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);              break;
149                         case Shader::OPCODE_DP1:                dp1(d, s0, s1);                                 break;
150                         case Shader::OPCODE_DP2:                dp2(d, s0, s1);                                 break;
151                         case Shader::OPCODE_DP3:                dp3(d, s0, s1);                                 break;
152                         case Shader::OPCODE_DP4:                dp4(d, s0, s1);                                 break;
153                         case Shader::OPCODE_ATT:                att(d, s0, s1);                                 break;
154                         case Shader::OPCODE_EXP2X:              exp2x(d, s0, pp);                               break;
155                         case Shader::OPCODE_EXP2:               exp2(d, s0, pp);                                break;
156                         case Shader::OPCODE_EXPP:               expp(d, s0, version);                   break;
157                         case Shader::OPCODE_EXP:                exp(d, s0, pp);                                 break;
158                         case Shader::OPCODE_FRC:                frc(d, s0);                                             break;
159                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
160                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
161                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
162                         case Shader::OPCODE_LIT:                lit(d, s0);                                             break;
163                         case Shader::OPCODE_LOG2X:              log2x(d, s0, pp);                               break;
164                         case Shader::OPCODE_LOG2:               log2(d, s0, pp);                                break;
165                         case Shader::OPCODE_LOGP:               logp(d, s0, version);                   break;
166                         case Shader::OPCODE_LOG:                log(d, s0, pp);                                 break;
167                         case Shader::OPCODE_LRP:                lrp(d, s0, s1, s2);                             break;
168                         case Shader::OPCODE_STEP:               step(d, s0, s1);                                break;
169                         case Shader::OPCODE_SMOOTH:             smooth(d, s0, s1, s2);                  break;
170                         case Shader::OPCODE_M3X2:               M3X2(r, d, s0, src1);                   break;
171                         case Shader::OPCODE_M3X3:               M3X3(r, d, s0, src1);                   break;
172                         case Shader::OPCODE_M3X4:               M3X4(r, d, s0, src1);                   break;
173                         case Shader::OPCODE_M4X3:               M4X3(r, d, s0, src1);                   break;
174                         case Shader::OPCODE_M4X4:               M4X4(r, d, s0, src1);                   break;
175                         case Shader::OPCODE_MAD:                mad(d, s0, s1, s2);                             break;
176                         case Shader::OPCODE_MAX:                max(d, s0, s1);                                 break;
177                         case Shader::OPCODE_MIN:                min(d, s0, s1);                                 break;
178                         case Shader::OPCODE_MOV:                mov(d, s0, integer);                    break;
179                         case Shader::OPCODE_MOVA:               mov(d, s0);                                             break;
180                         case Shader::OPCODE_F2B:                f2b(d, s0);                                             break;
181                         case Shader::OPCODE_B2F:                b2f(d, s0);                                             break;
182                         case Shader::OPCODE_MUL:                mul(d, s0, s1);                                 break;
183                         case Shader::OPCODE_NRM2:               nrm2(d, s0, pp);                                break;
184                         case Shader::OPCODE_NRM3:               nrm3(d, s0, pp);                                break;
185                         case Shader::OPCODE_NRM4:               nrm4(d, s0, pp);                                break;
186                         case Shader::OPCODE_POWX:               powx(d, s0, s1, pp);                    break;
187                         case Shader::OPCODE_POW:                pow(d, s0, s1, pp);                             break;
188                         case Shader::OPCODE_RCPX:               rcpx(d, s0, pp);                                break;
189                         case Shader::OPCODE_DIV:                div(d, s0, s1);                                 break;
190                         case Shader::OPCODE_MOD:                mod(d, s0, s1);                                 break;
191                         case Shader::OPCODE_RSQX:               rsqx(d, s0, pp);                                break;
192                         case Shader::OPCODE_SQRT:               sqrt(d, s0, pp);                                break;
193                         case Shader::OPCODE_RSQ:                rsq(d, s0, pp);                                 break;
194                         case Shader::OPCODE_LEN2:               len2(d.x, s0, pp);                              break;
195                         case Shader::OPCODE_LEN3:               len3(d.x, s0, pp);                              break;
196                         case Shader::OPCODE_LEN4:               len4(d.x, s0, pp);                              break;
197                         case Shader::OPCODE_DIST1:              dist1(d.x, s0, s1, pp);                 break;
198                         case Shader::OPCODE_DIST2:              dist2(d.x, s0, s1, pp);                 break;
199                         case Shader::OPCODE_DIST3:              dist3(d.x, s0, s1, pp);                 break;
200                         case Shader::OPCODE_DIST4:              dist4(d.x, s0, s1, pp);                 break;
201                         case Shader::OPCODE_SGE:                step(d, s1, s0);                                break;
202                         case Shader::OPCODE_SGN:                sgn(d, s0);                                             break;
203                         case Shader::OPCODE_SINCOS:             sincos(d, s0, pp);                              break;
204                         case Shader::OPCODE_COS:                cos(d, s0, pp);                                 break;
205                         case Shader::OPCODE_SIN:                sin(d, s0, pp);                                 break;
206                         case Shader::OPCODE_TAN:                tan(d, s0);                                             break;
207                         case Shader::OPCODE_ACOS:               acos(d, s0);                                    break;
208                         case Shader::OPCODE_ASIN:               asin(d, s0);                                    break;
209                         case Shader::OPCODE_ATAN:               atan(d, s0);                                    break;
210                         case Shader::OPCODE_ATAN2:              atan2(d, s0, s1);                               break;
211                         case Shader::OPCODE_SLT:                slt(d, s0, s1);                                 break;
212                         case Shader::OPCODE_SUB:                sub(d, s0, s1);                                 break;
213                         case Shader::OPCODE_BREAK:              BREAK(r);                                               break;
214                         case Shader::OPCODE_BREAKC:             BREAKC(r, s0, s1, control);             break;
215                         case Shader::OPCODE_BREAKP:             BREAKP(r, src0);                                break;
216                         case Shader::OPCODE_CONTINUE:   CONTINUE(r);                                    break;
217                         case Shader::OPCODE_TEST:               TEST();                                                 break;
218                         case Shader::OPCODE_CALL:               CALL(r, dst.label, dst.callSite);         break;
219                         case Shader::OPCODE_CALLNZ:             CALLNZ(r, dst.label, dst.callSite, src0); break;
220                         case Shader::OPCODE_ELSE:               ELSE(r);                                                break;
221                         case Shader::OPCODE_ENDIF:              ENDIF(r);                                               break;
222                         case Shader::OPCODE_ENDLOOP:    ENDLOOP(r);                                             break;
223                         case Shader::OPCODE_ENDREP:             ENDREP(r);                                              break;
224                         case Shader::OPCODE_ENDWHILE:   ENDWHILE(r);                                    break;
225                         case Shader::OPCODE_IF:                 IF(r, src0);                                    break;
226                         case Shader::OPCODE_IFC:                IFC(r, s0, s1, control);                break;
227                         case Shader::OPCODE_LABEL:              LABEL(dst.index);                               break;
228                         case Shader::OPCODE_LOOP:               LOOP(r, src1);                                  break;
229                         case Shader::OPCODE_REP:                REP(r, src0);                                   break;
230                         case Shader::OPCODE_WHILE:              WHILE(r, src0);                                 break;
231                         case Shader::OPCODE_RET:                RET(r);                                                 break;
232                         case Shader::OPCODE_LEAVE:              LEAVE(r);                                               break;
233                         case Shader::OPCODE_CMP:                cmp(d, s0, s1, control);                break;
234                         case Shader::OPCODE_ICMP:               icmp(d, s0, s1, control);               break;
235                         case Shader::OPCODE_SELECT:             select(d, s0, s1, s2);                  break;
236                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                 break;
237                         case Shader::OPCODE_INSERT:             insert(d, s0, s1.x, s2.x);              break;
238                         case Shader::OPCODE_ALL:                all(d.x, s0);                                   break;
239                         case Shader::OPCODE_ANY:                any(d.x, s0);                                   break;
240                         case Shader::OPCODE_NOT:                not(d, s0);                                             break;
241                         case Shader::OPCODE_OR:                 or(d.x, s0.x, s1.x);                    break;
242                         case Shader::OPCODE_XOR:                xor(d.x, s0.x, s1.x);                   break;
243                         case Shader::OPCODE_AND:                and(d.x, s0.x, s1.x);                   break;
244                         case Shader::OPCODE_TEXLDL:             TEXLDL(r, d, s0, src1);                 break;
245                         case Shader::OPCODE_TEX:                TEX(r, d, s0, src1);                    break;
246                         case Shader::OPCODE_END:                                                                                break;
247                         default:
248                                 ASSERT(false);
249                         }
250
251                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
252                         {
253                                 if(dst.integer)
254                                 {
255                                         switch(opcode)
256                                         {
257                                         case Shader::OPCODE_DIV:
258                                                 if(dst.x) d.x = Trunc(d.x);
259                                                 if(dst.y) d.y = Trunc(d.y);
260                                                 if(dst.z) d.z = Trunc(d.z);
261                                                 if(dst.w) d.w = Trunc(d.w);
262                                                 break;
263                                         default:
264                                                 break;   // No truncation to integer required when arguments are integer
265                                         }
266                                 }
267
268                                 if(dst.saturate)
269                                 {
270                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
271                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
272                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
273                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
274
275                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
276                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
277                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
278                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
279                                 }
280
281                                 if(shader->containsDynamicBranching())
282                                 {
283                                         Vector4f pDst;   // FIXME: Rename
284
285                                         switch(dst.type)
286                                         {
287                                         case Shader::PARAMETER_VOID:                                                                                                                                            break;
288                                         case Shader::PARAMETER_TEMP:
289                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
290                                                 {
291                                                         if(dst.x) pDst.x = r.r[dst.index].x;
292                                                         if(dst.y) pDst.y = r.r[dst.index].y;
293                                                         if(dst.z) pDst.z = r.r[dst.index].z;
294                                                         if(dst.w) pDst.w = r.r[dst.index].w;
295                                                 }
296                                                 else
297                                                 {
298                                                         Int a = relativeAddress(r, dst);
299
300                                                         if(dst.x) pDst.x = r.r[dst.index + a].x;
301                                                         if(dst.y) pDst.y = r.r[dst.index + a].y;
302                                                         if(dst.z) pDst.z = r.r[dst.index + a].z;
303                                                         if(dst.w) pDst.w = r.r[dst.index + a].w;
304                                                 }
305                                                 break;
306                                         case Shader::PARAMETER_ADDR:            pDst = r.a0;                                                                                                    break;
307                                         case Shader::PARAMETER_RASTOUT:
308                                                 switch(dst.index)
309                                                 {
310                                                 case 0:
311                                                         if(dst.x) pDst.x = r.o[Pos].x;
312                                                         if(dst.y) pDst.y = r.o[Pos].y;
313                                                         if(dst.z) pDst.z = r.o[Pos].z;
314                                                         if(dst.w) pDst.w = r.o[Pos].w;
315                                                         break;
316                                                 case 1:
317                                                         pDst.x = r.o[Fog].x;
318                                                         break;
319                                                 case 2:
320                                                         pDst.x = r.o[Pts].y;
321                                                         break;
322                                                 default:
323                                                         ASSERT(false);
324                                                 }
325                                                 break;
326                                         case Shader::PARAMETER_ATTROUT:
327                                                 if(dst.x) pDst.x = r.o[D0 + dst.index].x;
328                                                 if(dst.y) pDst.y = r.o[D0 + dst.index].y;
329                                                 if(dst.z) pDst.z = r.o[D0 + dst.index].z;
330                                                 if(dst.w) pDst.w = r.o[D0 + dst.index].w;
331                                                 break;
332                                         case Shader::PARAMETER_TEXCRDOUT:
333                                 //      case Shader::PARAMETER_OUTPUT:
334                                                 if(version < 0x0300)
335                                                 {
336                                                         if(dst.x) pDst.x = r.o[T0 + dst.index].x;
337                                                         if(dst.y) pDst.y = r.o[T0 + dst.index].y;
338                                                         if(dst.z) pDst.z = r.o[T0 + dst.index].z;
339                                                         if(dst.w) pDst.w = r.o[T0 + dst.index].w;
340                                                 }
341                                                 else
342                                                 {
343                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
344                                                         {
345                                                                 if(dst.x) pDst.x = r.o[dst.index].x;
346                                                                 if(dst.y) pDst.y = r.o[dst.index].y;
347                                                                 if(dst.z) pDst.z = r.o[dst.index].z;
348                                                                 if(dst.w) pDst.w = r.o[dst.index].w;
349                                                         }
350                                                         else if(dst.rel.type == Shader::PARAMETER_LOOP)
351                                                         {
352                                                                 Int aL = r.aL[r.loopDepth];
353
354                                                                 if(dst.x) pDst.x = r.o[dst.index + aL].x;
355                                                                 if(dst.y) pDst.y = r.o[dst.index + aL].y;
356                                                                 if(dst.z) pDst.z = r.o[dst.index + aL].z;
357                                                                 if(dst.w) pDst.w = r.o[dst.index + aL].w;
358                                                         }
359                                                         else
360                                                         {
361                                                                 Int a = relativeAddress(r, dst);
362
363                                                                 if(dst.x) pDst.x = r.o[dst.index + a].x;
364                                                                 if(dst.y) pDst.y = r.o[dst.index + a].y;
365                                                                 if(dst.z) pDst.z = r.o[dst.index + a].z;
366                                                                 if(dst.w) pDst.w = r.o[dst.index + a].w;
367                                                         }
368                                                 }
369                                                 break;
370                                         case Shader::PARAMETER_LABEL:                                                                                                                                           break;
371                                         case Shader::PARAMETER_PREDICATE:       pDst = r.p0;                                                                                                    break;
372                                         case Shader::PARAMETER_INPUT:                                                                                                                                           break;
373                                         default:
374                                                 ASSERT(false);
375                                         }
376
377                                         Int4 enable = enableMask(r, instruction);
378
379                                         Int4 xEnable = enable;
380                                         Int4 yEnable = enable;
381                                         Int4 zEnable = enable;
382                                         Int4 wEnable = enable;
383
384                                         if(predicate)
385                                         {
386                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
387
388                                                 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
389                                                 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
390                                                 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
391                                                 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
392
393                                                 if(!instruction->predicateNot)
394                                                 {
395                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
396                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
397                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
398                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
399                                                 }
400                                                 else
401                                                 {
402                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
403                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
404                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
405                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
406                                                 }
407                                         }
408
409                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
410                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
411                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
412                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
413
414                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
415                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
416                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
417                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
418                                 }
419
420                                 switch(dst.type)
421                                 {
422                                 case Shader::PARAMETER_VOID:
423                                         break;
424                                 case Shader::PARAMETER_TEMP:
425                                         if(dst.rel.type == Shader::PARAMETER_VOID)
426                                         {
427                                                 if(dst.x) r.r[dst.index].x = d.x;
428                                                 if(dst.y) r.r[dst.index].y = d.y;
429                                                 if(dst.z) r.r[dst.index].z = d.z;
430                                                 if(dst.w) r.r[dst.index].w = d.w;
431                                         }
432                                         else
433                                         {
434                                                 Int a = relativeAddress(r, dst);
435
436                                                 if(dst.x) r.r[dst.index + a].x = d.x;
437                                                 if(dst.y) r.r[dst.index + a].y = d.y;
438                                                 if(dst.z) r.r[dst.index + a].z = d.z;
439                                                 if(dst.w) r.r[dst.index + a].w = d.w;
440                                         }
441                                         break;
442                                 case Shader::PARAMETER_ADDR:
443                                         if(dst.x) r.a0.x = d.x;
444                                         if(dst.y) r.a0.y = d.y;
445                                         if(dst.z) r.a0.z = d.z;
446                                         if(dst.w) r.a0.w = d.w;
447                                         break;
448                                 case Shader::PARAMETER_RASTOUT:
449                                         switch(dst.index)
450                                         {
451                                         case 0:
452                                                 if(dst.x) r.o[Pos].x = d.x;
453                                                 if(dst.y) r.o[Pos].y = d.y;
454                                                 if(dst.z) r.o[Pos].z = d.z;
455                                                 if(dst.w) r.o[Pos].w = d.w;
456                                                 break;
457                                         case 1:
458                                                 r.o[Fog].x = d.x;
459                                                 break;
460                                         case 2:         
461                                                 r.o[Pts].y = d.x;
462                                                 break;
463                                         default:        ASSERT(false);
464                                         }
465                                         break;
466                                 case Shader::PARAMETER_ATTROUT: 
467                                         if(dst.x) r.o[D0 + dst.index].x = d.x;
468                                         if(dst.y) r.o[D0 + dst.index].y = d.y;
469                                         if(dst.z) r.o[D0 + dst.index].z = d.z;
470                                         if(dst.w) r.o[D0 + dst.index].w = d.w;
471                                         break;
472                                 case Shader::PARAMETER_TEXCRDOUT:
473                         //      case Shader::PARAMETER_OUTPUT:
474                                         if(version < 0x0300)
475                                         {
476                                                 if(dst.x) r.o[T0 + dst.index].x = d.x;
477                                                 if(dst.y) r.o[T0 + dst.index].y = d.y;
478                                                 if(dst.z) r.o[T0 + dst.index].z = d.z;
479                                                 if(dst.w) r.o[T0 + dst.index].w = d.w;
480                                         }
481                                         else
482                                         {
483                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
484                                                 {
485                                                         if(dst.x) r.o[dst.index].x = d.x;
486                                                         if(dst.y) r.o[dst.index].y = d.y;
487                                                         if(dst.z) r.o[dst.index].z = d.z;
488                                                         if(dst.w) r.o[dst.index].w = d.w;
489                                                 }
490                                                 else if(dst.rel.type == Shader::PARAMETER_LOOP)
491                                                 {
492                                                         Int aL = r.aL[r.loopDepth];
493
494                                                         if(dst.x) r.o[dst.index + aL].x = d.x;
495                                                         if(dst.y) r.o[dst.index + aL].y = d.y;
496                                                         if(dst.z) r.o[dst.index + aL].z = d.z;
497                                                         if(dst.w) r.o[dst.index + aL].w = d.w;
498                                                 }
499                                                 else
500                                                 {
501                                                         Int a = relativeAddress(r, dst);
502
503                                                         if(dst.x) r.o[dst.index + a].x = d.x;
504                                                         if(dst.y) r.o[dst.index + a].y = d.y;
505                                                         if(dst.z) r.o[dst.index + a].z = d.z;
506                                                         if(dst.w) r.o[dst.index + a].w = d.w;
507                                                 }
508                                         }
509                                         break;
510                                 case Shader::PARAMETER_LABEL:                                                                                                                                           break;
511                                 case Shader::PARAMETER_PREDICATE:       r.p0 = d;                                                                                                               break;
512                                 case Shader::PARAMETER_INPUT:                                                                                                                                           break;
513                                 default:
514                                         ASSERT(false);
515                                 }
516                         }
517                 }
518
519                 if(currentLabel != -1)
520                 {
521                         Nucleus::setInsertBlock(returnBlock);
522                 }
523         }
524
525         void VertexProgram::passThrough(Registers &r)
526         {
527                 if(shader)
528                 {
529                         for(int i = 0; i < 12; i++)
530                         {
531                                 unsigned char usage = shader->output[i][0].usage;
532                                 unsigned char index = shader->output[i][0].index;
533
534                                 switch(usage)
535                                 {
536                                 case 0xFF:
537                                         continue;
538                                 case Shader::USAGE_PSIZE:
539                                         r.o[i].y = r.v[i].x;
540                                         break;
541                                 case Shader::USAGE_TEXCOORD:
542                                         r.o[i].x = r.v[i].x;
543                                         r.o[i].y = r.v[i].y;
544                                         r.o[i].z = r.v[i].z;
545                                         r.o[i].w = r.v[i].w;
546                                         break;
547                                 case Shader::USAGE_POSITION:
548                                         r.o[i].x = r.v[i].x;
549                                         r.o[i].y = r.v[i].y;
550                                         r.o[i].z = r.v[i].z;
551                                         r.o[i].w = r.v[i].w;
552                                         break;
553                                 case Shader::USAGE_COLOR:
554                                         r.o[i].x = r.v[i].x;
555                                         r.o[i].y = r.v[i].y;
556                                         r.o[i].z = r.v[i].z;
557                                         r.o[i].w = r.v[i].w;
558                                         break;
559                                 case Shader::USAGE_FOG:
560                                         r.o[i].x = r.v[i].x;
561                                         break;
562                                 default:
563                                         ASSERT(false);
564                                 }
565                         }
566                 }
567                 else
568                 {
569                         r.o[Pos].x = r.v[PositionT].x;
570                         r.o[Pos].y = r.v[PositionT].y;
571                         r.o[Pos].z = r.v[PositionT].z;
572                         r.o[Pos].w = r.v[PositionT].w;
573
574                         for(int i = 0; i < 2; i++)
575                         {
576                                 r.o[D0 + i].x = r.v[Color0 + i].x;
577                                 r.o[D0 + i].y = r.v[Color0 + i].y;
578                                 r.o[D0 + i].z = r.v[Color0 + i].z;
579                                 r.o[D0 + i].w = r.v[Color0 + i].w;
580                         }
581
582                         for(int i = 0; i < 8; i++)
583                         {
584                                 r.o[T0 + i].x = r.v[TexCoord0 + i].x;
585                                 r.o[T0 + i].y = r.v[TexCoord0 + i].y;
586                                 r.o[T0 + i].z = r.v[TexCoord0 + i].z;
587                                 r.o[T0 + i].w = r.v[TexCoord0 + i].w;
588                         }
589
590                         r.o[Pts].y = r.v[PointSize].x;
591                 }
592         }
593
594         Vector4f VertexProgram::reg(Registers &r, const Src &src, int offset)
595         {
596                 int i = src.index + offset;
597
598                 Vector4f reg;
599
600                 switch(src.type)
601                 {
602                 case Shader::PARAMETER_TEMP:
603                         if(src.rel.type == Shader::PARAMETER_VOID)
604                         {
605                                 reg = r.r[i];
606                         }
607                         else
608                         {
609                                 reg = r.r[i + relativeAddress(r, src)];
610                         }
611                         break;
612                 case Shader::PARAMETER_CONST:
613                         reg = readConstant(r, src, offset);
614                         break;
615                 case Shader::PARAMETER_INPUT:
616             if(src.rel.type == Shader::PARAMETER_VOID)
617                         {
618                                 reg = r.v[i];
619                         }
620                         else
621                         {
622                                 reg = r.v[i + relativeAddress(r, src)];
623                         }
624             break;
625                 case Shader::PARAMETER_VOID:                    return r.r[0];   // Dummy
626                 case Shader::PARAMETER_FLOAT4LITERAL:
627                         reg.x = Float4(src.value[0]);
628                         reg.y = Float4(src.value[1]);
629                         reg.z = Float4(src.value[2]);
630                         reg.w = Float4(src.value[3]);
631                         break;
632                 case Shader::PARAMETER_ADDR:                    reg = r.a0;             break;
633                 case Shader::PARAMETER_CONSTBOOL:               return r.r[0];   // Dummy
634                 case Shader::PARAMETER_CONSTINT:                return r.r[0];   // Dummy
635                 case Shader::PARAMETER_LOOP:                    return r.r[0];   // Dummy
636                 case Shader::PARAMETER_PREDICATE:               return r.r[0];   // Dummy
637                 case Shader::PARAMETER_SAMPLER:
638                         if(src.rel.type == Shader::PARAMETER_VOID)
639                         {
640                                 reg.x = As<Float4>(Int4(i));
641                         }
642                         else if(src.rel.type == Shader::PARAMETER_TEMP)
643                         {
644                                 reg.x = As<Float4>(Int4(i) + RoundInt(r.r[src.rel.index].x));
645                         }
646                         return reg;
647                 case Shader::PARAMETER_OUTPUT:
648             if(src.rel.type == Shader::PARAMETER_VOID)
649                         {
650                                 reg = r.o[i];
651                         }
652                         else
653                         {
654                                 reg = r.o[i + relativeAddress(r, src)];
655                         }
656                         break;
657                 default:
658                         ASSERT(false);
659                 }
660
661                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
662                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
663                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
664                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
665
666                 Vector4f mod;
667
668                 switch(src.modifier)
669                 {
670                 case Shader::MODIFIER_NONE:
671                         mod.x = x;
672                         mod.y = y;
673                         mod.z = z;
674                         mod.w = w;
675                         break;
676                 case Shader::MODIFIER_NEGATE:
677                         mod.x = -x;
678                         mod.y = -y;
679                         mod.z = -z;
680                         mod.w = -w;
681                         break;
682                 case Shader::MODIFIER_ABS:
683                         mod.x = Abs(x);
684                         mod.y = Abs(y);
685                         mod.z = Abs(z);
686                         mod.w = Abs(w);
687                         break;
688                 case Shader::MODIFIER_ABS_NEGATE:
689                         mod.x = -Abs(x);
690                         mod.y = -Abs(y);
691                         mod.z = -Abs(z);
692                         mod.w = -Abs(w);
693                         break;
694                 case Shader::MODIFIER_NOT:
695                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
696                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
697                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
698                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
699                         break;
700                 default:
701                         ASSERT(false);
702                 }
703
704                 return mod;
705         }
706
707         Vector4f VertexProgram::readConstant(Registers &r, const Src &src, int offset)
708         {
709                 Vector4f c;
710
711                 int i = src.index + offset;
712
713                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
714                 {
715                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]));
716
717                         c.x = c.x.xxxx;
718                         c.y = c.y.yyyy;
719                         c.z = c.z.zzzz;
720                         c.w = c.w.wwww;
721
722                         if(localShaderConstants)   // Constant may be known at compile time
723                         {
724                                 for(int j = 0; j < shader->getLength(); j++)
725                                 {
726                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
727
728                                         if(instruction.opcode == Shader::OPCODE_DEF)
729                                         {
730                                                 if(instruction.dst.index == i)
731                                                 {
732                                                         c.x = Float4(instruction.src[0].value[0]);
733                                                         c.y = Float4(instruction.src[0].value[1]);
734                                                         c.z = Float4(instruction.src[0].value[2]);
735                                                         c.w = Float4(instruction.src[0].value[3]);
736
737                                                         break;
738                                                 }
739                                         }
740                                 }
741                         }
742                 }
743                 else if(src.rel.type == Shader::PARAMETER_LOOP)
744                 {
745                         Int loopCounter = r.aL[r.loopDepth];
746
747                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16);
748
749                         c.x = c.x.xxxx;
750                         c.y = c.y.yyyy;
751                         c.z = c.z.zzzz;
752                         c.w = c.w.wwww;
753                 }
754                 else
755                 {
756                         if(src.rel.deterministic)
757                         {
758                                 Int a = relativeAddress(r, src);
759                         
760                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + a * 16);
761
762                                 c.x = c.x.xxxx;
763                                 c.y = c.y.yyyy;
764                                 c.z = c.z.zzzz;
765                                 c.w = c.w.wwww;
766                         }
767                         else
768                         {
769                                 int component = src.rel.swizzle & 0x03;
770                                 Float4 a;
771
772                                 switch(src.rel.type)
773                                 {
774                                 case Shader::PARAMETER_ADDR:   a = r.a0[component]; break;
775                                 case Shader::PARAMETER_TEMP:   a = r.r[src.rel.index][component]; break;
776                                 case Shader::PARAMETER_INPUT:  a = r.v[src.rel.index][component]; break;
777                                 case Shader::PARAMETER_OUTPUT: a = r.o[src.rel.index][component]; break;
778                                 case Shader::PARAMETER_CONST:  a = *Pointer<Float>(r.data + OFFSET(DrawData,vs.c[src.rel.index][component])); break;
779                                 default: ASSERT(false);
780                                 }
781
782                                 Int4 index = Int4(i) + RoundInt(a) * Int4(src.rel.scale);
783
784                                 index = Min(As<UInt4>(index), UInt4(256));   // Clamp to constant register range, c[256] = {0, 0, 0, 0}
785                                 
786                                 Int index0 = Extract(index, 0);
787                                 Int index1 = Extract(index, 1);
788                                 Int index2 = Extract(index, 2);
789                                 Int index3 = Extract(index, 3);
790
791                                 c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16);
792                                 c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16);
793                                 c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16);
794                                 c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16);
795
796                                 transpose4x4(c.x, c.y, c.z, c.w);
797                         }
798                 }
799
800                 return c;
801         }
802
803         Int VertexProgram::relativeAddress(Registers &r, const Shader::Parameter &var)
804         {
805                 ASSERT(var.rel.deterministic);
806
807                 if(var.rel.type == Shader::PARAMETER_TEMP)
808                 {
809                         return RoundInt(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale;
810                 }
811                 else if(var.rel.type == Shader::PARAMETER_INPUT)
812                 {
813                         return RoundInt(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale;
814                 }
815                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
816                 {
817                         return RoundInt(Extract(r.o[var.rel.index].x, 0)) * var.rel.scale;
818                 }
819                 else if(var.rel.type == Shader::PARAMETER_CONST)
820                 {
821                         RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[var.rel.index]));
822
823                         return RoundInt(Extract(c, 0)) * var.rel.scale;
824                 }
825                 else ASSERT(false);
826
827                 return 0;
828         }
829
830         Int4 VertexProgram::enableMask(Registers &r, const Shader::Instruction *instruction)
831         {
832                 Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
833                 
834                 if(!whileTest)
835                 {
836                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
837                         {
838                                 enable &= r.enableBreak;
839                         }
840
841                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
842                         {
843                                 enable &= r.enableContinue;
844                         }
845
846                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
847                         {
848                                 enable &= r.enableLeave;
849                         }
850                 }
851
852                 return enable;
853         }
854
855         void VertexProgram::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
856         {
857                 Vector4f row0 = reg(r, src1, 0);
858                 Vector4f row1 = reg(r, src1, 1);
859
860                 dst.x = dot3(src0, row0);
861                 dst.y = dot3(src0, row1);
862         }
863
864         void VertexProgram::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
865         {
866                 Vector4f row0 = reg(r, src1, 0);
867                 Vector4f row1 = reg(r, src1, 1);
868                 Vector4f row2 = reg(r, src1, 2);
869
870                 dst.x = dot3(src0, row0);
871                 dst.y = dot3(src0, row1);
872                 dst.z = dot3(src0, row2);
873         }
874
875         void VertexProgram::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
876         {
877                 Vector4f row0 = reg(r, src1, 0);
878                 Vector4f row1 = reg(r, src1, 1);
879                 Vector4f row2 = reg(r, src1, 2);
880                 Vector4f row3 = reg(r, src1, 3);
881
882                 dst.x = dot3(src0, row0);
883                 dst.y = dot3(src0, row1);
884                 dst.z = dot3(src0, row2);
885                 dst.w = dot3(src0, row3);
886         }
887
888         void VertexProgram::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
889         {
890                 Vector4f row0 = reg(r, src1, 0);
891                 Vector4f row1 = reg(r, src1, 1);
892                 Vector4f row2 = reg(r, src1, 2);
893
894                 dst.x = dot4(src0, row0);
895                 dst.y = dot4(src0, row1);
896                 dst.z = dot4(src0, row2);
897         }
898
899         void VertexProgram::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
900         {
901                 Vector4f row0 = reg(r, src1, 0);
902                 Vector4f row1 = reg(r, src1, 1);
903                 Vector4f row2 = reg(r, src1, 2);
904                 Vector4f row3 = reg(r, src1, 3);
905
906                 dst.x = dot4(src0, row0);
907                 dst.y = dot4(src0, row1);
908                 dst.z = dot4(src0, row2);
909                 dst.w = dot4(src0, row3);
910         }
911
912         void VertexProgram::BREAK(Registers &r)
913         {
914                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
915                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
916
917                 if(breakDepth == 0)
918                 {
919                         r.enableIndex = r.enableIndex - breakDepth;
920                         Nucleus::createBr(endBlock);
921                 }
922                 else
923                 {
924                         r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
925                         Bool allBreak = SignMask(r.enableBreak) == 0x0;
926
927                         r.enableIndex = r.enableIndex - breakDepth;
928                         branch(allBreak, endBlock, deadBlock);
929                 }
930
931                 Nucleus::setInsertBlock(deadBlock);
932                 r.enableIndex = r.enableIndex + breakDepth;
933         }
934
935         void VertexProgram::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
936         {
937                 Int4 condition;
938
939                 switch(control)
940                 {
941                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
942                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
943                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
944                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
945                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
946                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
947                 default:
948                         ASSERT(false);
949                 }
950
951                 BREAK(r, condition);
952         }
953
954         void VertexProgram::BREAKP(Registers &r, const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
955         {
956                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
957
958                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
959                 {
960                         condition = ~condition;
961                 }
962
963                 BREAK(r, condition);
964         }
965
966         void VertexProgram::BREAK(Registers &r, Int4 &condition)
967         {
968                 condition &= r.enableStack[r.enableIndex];
969
970                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
971                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
972
973                 r.enableBreak = r.enableBreak & ~condition;
974                 Bool allBreak = SignMask(r.enableBreak) == 0x0;
975
976                 r.enableIndex = r.enableIndex - breakDepth;
977                 branch(allBreak, endBlock, continueBlock);
978
979                 Nucleus::setInsertBlock(continueBlock);
980                 r.enableIndex = r.enableIndex + breakDepth;
981         }
982
983         void VertexProgram::CONTINUE(Registers &r)
984         {
985                 r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
986         }
987
988         void VertexProgram::TEST()
989         {
990                 whileTest = true;
991         }
992
993         void VertexProgram::CALL(Registers &r, int labelIndex, int callSiteIndex)
994         {
995                 if(!labelBlock[labelIndex])
996                 {
997                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
998                 }
999
1000                 if(callRetBlock[labelIndex].size() > 1)
1001                 {
1002                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1003                 }
1004
1005                 Int4 restoreLeave = r.enableLeave;
1006
1007                 Nucleus::createBr(labelBlock[labelIndex]);
1008                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1009
1010                 r.enableLeave = restoreLeave;
1011         }
1012
1013         void VertexProgram::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
1014         {
1015                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1016                 {
1017                         CALLNZb(r, labelIndex, callSiteIndex, src);
1018                 }
1019                 else if(src.type == Shader::PARAMETER_PREDICATE)
1020                 {
1021                         CALLNZp(r, labelIndex, callSiteIndex, src);
1022                 }
1023                 else ASSERT(false);
1024         }
1025
1026         void VertexProgram::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
1027         {
1028                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1029                 
1030                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1031                 {
1032                         condition = !condition; 
1033                 }
1034
1035                 if(!labelBlock[labelIndex])
1036                 {
1037                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1038                 }
1039
1040                 if(callRetBlock[labelIndex].size() > 1)
1041                 {
1042                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1043                 }
1044
1045                 Int4 restoreLeave = r.enableLeave;
1046
1047                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1048                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1049
1050                 r.enableLeave = restoreLeave;
1051         }
1052
1053         void VertexProgram::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
1054         {
1055                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1056
1057                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1058                 {
1059                         condition = ~condition;
1060                 }
1061
1062                 condition &= r.enableStack[r.enableIndex];
1063
1064                 if(!labelBlock[labelIndex])
1065                 {
1066                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1067                 }
1068
1069                 if(callRetBlock[labelIndex].size() > 1)
1070                 {
1071                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1072                 }
1073
1074                 r.enableIndex++;
1075                 r.enableStack[r.enableIndex] = condition;
1076                 Int4 restoreLeave = r.enableLeave;
1077
1078                 Bool notAllFalse = SignMask(condition) != 0;
1079                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1080                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1081
1082                 r.enableIndex--;
1083                 r.enableLeave = restoreLeave;
1084         }
1085
1086         void VertexProgram::ELSE(Registers &r)
1087         {
1088                 ifDepth--;
1089
1090                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1091                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1092
1093                 if(isConditionalIf[ifDepth])
1094                 {
1095                         Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1096                         Bool notAllFalse = SignMask(condition) != 0;
1097
1098                         branch(notAllFalse, falseBlock, endBlock);
1099
1100                         r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1101                 }
1102                 else
1103                 {
1104                         Nucleus::createBr(endBlock);
1105                         Nucleus::setInsertBlock(falseBlock);
1106                 }
1107
1108                 ifFalseBlock[ifDepth] = endBlock;
1109
1110                 ifDepth++;
1111         }
1112
1113         void VertexProgram::ENDIF(Registers &r)
1114         {
1115                 ifDepth--;
1116
1117                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1118
1119                 Nucleus::createBr(endBlock);
1120                 Nucleus::setInsertBlock(endBlock);
1121
1122                 if(isConditionalIf[ifDepth])
1123                 {
1124                         breakDepth--;
1125                         r.enableIndex--;
1126                 }
1127         }
1128
1129         void VertexProgram::ENDLOOP(Registers &r)
1130         {
1131                 loopRepDepth--;
1132
1133                 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth];   // FIXME: +=
1134
1135                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1136                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1137
1138                 Nucleus::createBr(testBlock);
1139                 Nucleus::setInsertBlock(endBlock);
1140
1141                 r.loopDepth--;
1142                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1143         }
1144
1145         void VertexProgram::ENDREP(Registers &r)
1146         {
1147                 loopRepDepth--;
1148
1149                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1150                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1151
1152                 Nucleus::createBr(testBlock);
1153                 Nucleus::setInsertBlock(endBlock);
1154
1155                 r.loopDepth--;
1156                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1157         }
1158
1159         void VertexProgram::ENDWHILE(Registers &r)
1160         {
1161                 loopRepDepth--;
1162
1163                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1164                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1165
1166                 Nucleus::createBr(testBlock);
1167                 Nucleus::setInsertBlock(endBlock);
1168
1169                 r.enableIndex--;
1170                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1171                 whileTest = false;
1172         }
1173
1174         void VertexProgram::IF(Registers &r, const Src &src)
1175         {
1176                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1177                 {
1178                         IFb(r, src);
1179                 }
1180                 else if(src.type == Shader::PARAMETER_PREDICATE)
1181                 {
1182                         IFp(r, src);
1183                 }
1184                 else
1185                 {
1186                         Int4 condition = As<Int4>(reg(r, src).x);
1187                         IF(r, condition);
1188                 }
1189         }
1190
1191         void VertexProgram::IFb(Registers &r, const Src &boolRegister)
1192         {
1193                 ASSERT(ifDepth < 24 + 4);
1194
1195                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1196
1197                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1198                 {
1199                         condition = !condition;
1200                 }
1201
1202                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1203                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1204
1205                 branch(condition, trueBlock, falseBlock);
1206
1207                 isConditionalIf[ifDepth] = false;
1208                 ifFalseBlock[ifDepth] = falseBlock;
1209
1210                 ifDepth++;
1211         }
1212
1213         void VertexProgram::IFp(Registers &r, const Src &predicateRegister)
1214         {
1215                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1216
1217                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1218                 {
1219                         condition = ~condition;
1220                 }
1221
1222                 IF(r, condition);
1223         }
1224
1225         void VertexProgram::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
1226         {
1227                 Int4 condition;
1228
1229                 switch(control)
1230                 {
1231                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1232                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1233                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1234                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1235                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1236                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1237                 default:
1238                         ASSERT(false);
1239                 }
1240
1241                 IF(r, condition);
1242         }
1243
1244         void VertexProgram::IF(Registers &r, Int4 &condition)
1245         {
1246                 condition &= r.enableStack[r.enableIndex];
1247
1248                 r.enableIndex++;
1249                 r.enableStack[r.enableIndex] = condition;
1250
1251                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1252                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1253
1254                 Bool notAllFalse = SignMask(condition) != 0;
1255
1256                 branch(notAllFalse, trueBlock, falseBlock);
1257
1258                 isConditionalIf[ifDepth] = true;
1259                 ifFalseBlock[ifDepth] = falseBlock;
1260
1261                 ifDepth++;
1262                 breakDepth++;
1263         }
1264
1265         void VertexProgram::LABEL(int labelIndex)
1266         {
1267                 if(!labelBlock[labelIndex])
1268                 {
1269                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1270                 }
1271
1272                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1273                 currentLabel = labelIndex;
1274         }
1275
1276         void VertexProgram::LOOP(Registers &r, const Src &integerRegister)
1277         {
1278                 r.loopDepth++;
1279
1280                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1281                 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1282                 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1283
1284                 // FIXME: Compiles to two instructions?
1285                 If(r.increment[r.loopDepth] == 0)
1286                 {
1287                         r.increment[r.loopDepth] = 1;
1288                 }
1289
1290                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1291                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1292                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1293
1294                 loopRepTestBlock[loopRepDepth] = testBlock;
1295                 loopRepEndBlock[loopRepDepth] = endBlock;
1296
1297                 // FIXME: jump(testBlock)
1298                 Nucleus::createBr(testBlock);
1299                 Nucleus::setInsertBlock(testBlock);
1300
1301                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1302                 Nucleus::setInsertBlock(loopBlock);
1303
1304                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1305                 
1306                 loopRepDepth++;
1307                 breakDepth = 0;
1308         }
1309
1310         void VertexProgram::REP(Registers &r, const Src &integerRegister)
1311         {
1312                 r.loopDepth++;
1313
1314                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1315                 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
1316
1317                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1318                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1319                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1320
1321                 loopRepTestBlock[loopRepDepth] = testBlock;
1322                 loopRepEndBlock[loopRepDepth] = endBlock;
1323
1324                 // FIXME: jump(testBlock)
1325                 Nucleus::createBr(testBlock);
1326                 Nucleus::setInsertBlock(testBlock);
1327
1328                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1329                 Nucleus::setInsertBlock(loopBlock);
1330
1331                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1332
1333                 loopRepDepth++;
1334                 breakDepth = 0;
1335         }
1336
1337         void VertexProgram::WHILE(Registers &r, const Src &temporaryRegister)
1338         {
1339                 r.enableIndex++;
1340
1341                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1342                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1343                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1344                 
1345                 loopRepTestBlock[loopRepDepth] = testBlock;
1346                 loopRepEndBlock[loopRepDepth] = endBlock;
1347
1348                 Int4 restoreBreak = r.enableBreak;
1349                 Int4 restoreContinue = r.enableContinue;
1350
1351                 // FIXME: jump(testBlock)
1352                 Nucleus::createBr(testBlock);
1353                 Nucleus::setInsertBlock(testBlock);
1354                 r.enableContinue = restoreContinue;
1355
1356                 const Vector4f &src = reg(r, temporaryRegister);
1357                 Int4 condition = As<Int4>(src.x);
1358                 condition &= r.enableStack[r.enableIndex - 1];
1359                 r.enableStack[r.enableIndex] = condition;
1360
1361                 Bool notAllFalse = SignMask(condition) != 0;
1362                 branch(notAllFalse, loopBlock, endBlock);
1363                 
1364                 Nucleus::setInsertBlock(endBlock);
1365                 r.enableBreak = restoreBreak;
1366                 
1367                 Nucleus::setInsertBlock(loopBlock);
1368
1369                 loopRepDepth++;
1370                 breakDepth = 0;
1371         }
1372
1373         void VertexProgram::RET(Registers &r)
1374         {
1375                 if(currentLabel == -1)
1376                 {
1377                         returnBlock = Nucleus::createBasicBlock();
1378                         Nucleus::createBr(returnBlock);
1379                 }
1380                 else
1381                 {
1382                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1383
1384                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1385                         {
1386                                 // FIXME: Encapsulate
1387                                 UInt index = r.callStack[--r.stackIndex];
1388  
1389                                 llvm::Value *value = index.loadValue();
1390                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1391
1392                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1393                                 {
1394                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1395                                 }
1396                         }
1397                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1398                         {
1399                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1400                         }
1401                         else   // Function isn't called
1402                         {
1403                                 Nucleus::createBr(unreachableBlock);
1404                         }
1405
1406                         Nucleus::setInsertBlock(unreachableBlock);
1407                         Nucleus::createUnreachable();
1408                 }
1409         }
1410
1411         void VertexProgram::LEAVE(Registers &r)
1412         {
1413                 r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
1414
1415                 // FIXME: Return from function if all instances left
1416                 // FIXME: Use enableLeave in other control-flow constructs
1417         }
1418
1419         void VertexProgram::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1420         {
1421                 Vector4f tmp;
1422                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w);
1423
1424                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1425                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1426                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1427                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1428         }
1429
1430         void VertexProgram::TEX(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1431         {
1432                 Float4 lod = Float4(0.0f);
1433                 Vector4f tmp;
1434                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, lod);
1435
1436                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1437                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1438                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1439                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1440         }
1441
1442         void VertexProgram::sampleTexture(Registers &r, Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q)
1443         {
1444                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1445                 {
1446                         Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
1447                         sampler[s.index]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);        
1448                 }
1449                 else
1450                 {
1451                         Int index = As<Int>(Float(reg(r, s).x.x));
1452
1453                         for(int i = 0; i < 16; i++)
1454                         {
1455                                 if(shader->usesSampler(i))
1456                                 {
1457                                         If(index == i)
1458                                         {
1459                                                 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
1460                                                 sampler[i]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);
1461                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1462                                         }
1463                                 }
1464                         }
1465                 }
1466         }
1467 }