OSDN Git Service

Passing uniform buffers to the vertex/pixel programs
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2013 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "VertexProgram.hpp"
13
14 #include "Renderer.hpp"
15 #include "VertexShader.hpp"
16 #include "Vertex.hpp"
17 #include "Half.hpp"
18 #include "SamplerCore.hpp"
19 #include "Debug.hpp"
20
21 namespace sw
22 {
23         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
24                 : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
25         {
26                 ifDepth = 0;
27                 loopRepDepth = 0;
28                 breakDepth = 0;
29                 currentLabel = -1;
30                 whileTest = false;
31
32                 for(int i = 0; i < 2048; i++)
33                 {
34                         labelBlock[i] = 0;
35                 }
36
37                 loopDepth = -1;
38                 enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
39
40                 if(shader && shader->containsBreakInstruction())
41                 {
42                         enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
43                 }
44
45                 if(shader && shader->containsContinueInstruction())
46                 {
47                         enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
48                 }
49
50                 if(shader->instanceIdDeclared)
51                 {
52                         instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
53                 }
54         }
55
56         VertexProgram::~VertexProgram()
57         {
58                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
59                 {
60                         delete sampler[i];
61                 }
62         }
63
64         void VertexProgram::pipeline()
65         {
66                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
67                 {
68                         sampler[i] = new SamplerCore(constants, state.samplerState[i]);
69                 }
70
71                 if(!state.preTransformed)
72                 {
73                         program();
74                 }
75                 else
76                 {
77                         passThrough();
78                 }
79         }
80
81         void VertexProgram::program()
82         {
83         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
84
85                 unsigned short version = shader->getVersion();
86
87                 enableIndex = 0;
88                 stackIndex = 0;
89
90                 if(shader->containsLeaveInstruction())
91                 {
92                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
93                 }
94
95                 // Create all call site return blocks up front
96                 for(size_t i = 0; i < shader->getLength(); i++)
97                 {
98                         const Shader::Instruction *instruction = shader->getInstruction(i);
99                         Shader::Opcode opcode = instruction->opcode;
100
101                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
102                         {
103                                 const Dst &dst = instruction->dst;
104
105                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
106                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
107                         }
108                 }
109
110                 for(size_t i = 0; i < shader->getLength(); i++)
111                 {
112                         const Shader::Instruction *instruction = shader->getInstruction(i);
113                         Shader::Opcode opcode = instruction->opcode;
114
115                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
116                         {
117                                 continue;
118                         }
119
120                         Dst dst = instruction->dst;
121                         Src src0 = instruction->src[0];
122                         Src src1 = instruction->src[1];
123                         Src src2 = instruction->src[2];
124                         Src src3 = instruction->src[3];
125                         Src src4 = instruction->src[4];
126
127                         bool predicate = instruction->predicate;
128                         Control control = instruction->control;
129                         bool integer = dst.type == Shader::PARAMETER_ADDR;
130                         bool pp = dst.partialPrecision;
131
132                         Vector4f d;
133                         Vector4f s0;
134                         Vector4f s1;
135                         Vector4f s2;
136                         Vector4f s3;
137                         Vector4f s4;
138
139                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(src0);
140                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(src1);
141                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(src2);
142                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(src3);
143                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegisterF(src4);
144
145                         switch(opcode)
146                         {
147                         case Shader::OPCODE_VS_1_0:                                     break;
148                         case Shader::OPCODE_VS_1_1:                                     break;
149                         case Shader::OPCODE_VS_2_0:                                     break;
150                         case Shader::OPCODE_VS_2_x:                                     break;
151                         case Shader::OPCODE_VS_2_sw:                                    break;
152                         case Shader::OPCODE_VS_3_0:                                     break;
153                         case Shader::OPCODE_VS_3_sw:                                    break;
154                         case Shader::OPCODE_DCL:                                        break;
155                         case Shader::OPCODE_DEF:                                        break;
156                         case Shader::OPCODE_DEFI:                                       break;
157                         case Shader::OPCODE_DEFB:                                       break;
158                         case Shader::OPCODE_NOP:                                        break;
159                         case Shader::OPCODE_ABS:        abs(d, s0);                     break;
160                         case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
161                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
162                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
163                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);        break;
164                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);        break;
165                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);        break;
166                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);        break;
167                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);            break;
168                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);            break;
169                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);            break;
170                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);            break;
171                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);      break;
172                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);      break;
173                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);      break;
174                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);      break;
175                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                 break;
176                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                 break;
177                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                 break;
178                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                 break;
179                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
180                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
181                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
182                         case Shader::OPCODE_ATT:        att(d, s0, s1);                 break;
183                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);               break;
184                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                break;
185                         case Shader::OPCODE_EXPP:       expp(d, s0, version);           break;
186                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                 break;
187                         case Shader::OPCODE_FRC:        frc(d, s0);                     break;
188                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
189                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
190                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
191                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
192                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
193                         case Shader::OPCODE_LIT:        lit(d, s0);                     break;
194                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);               break;
195                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                break;
196                         case Shader::OPCODE_LOGP:       logp(d, s0, version);           break;
197                         case Shader::OPCODE_LOG:        log(d, s0, pp);                 break;
198                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);             break;
199                         case Shader::OPCODE_STEP:       step(d, s0, s1);                break;
200                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);          break;
201                         case Shader::OPCODE_FLOATBITSTOINT:
202                         case Shader::OPCODE_FLOATBITSTOUINT:
203                         case Shader::OPCODE_INTBITSTOFLOAT:
204                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
205                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
206                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
207                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
208                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);              break;
209                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);              break;
210                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);             break;
211                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);            break;
212                         case Shader::OPCODE_MAX:        max(d, s0, s1);                 break;
213                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
214                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
215                         case Shader::OPCODE_MIN:        min(d, s0, s1);                 break;
216                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
217                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
218                         case Shader::OPCODE_MOV:        mov(d, s0, integer);            break;
219                         case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
220                         case Shader::OPCODE_NEG:        neg(d, s0);                     break;
221                         case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
222                         case Shader::OPCODE_F2B:        f2b(d, s0);                     break;
223                         case Shader::OPCODE_B2F:        b2f(d, s0);                     break;
224                         case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
225                         case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
226                         case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
227                         case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
228                         case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
229                         case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
230                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                 break;
231                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
232                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                break;
233                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                break;
234                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                break;
235                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);            break;
236                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);             break;
237                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                break;
238                         case Shader::OPCODE_DIV:        div(d, s0, s1);                 break;
239                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
240                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
241                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                 break;
242                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
243                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
244                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
245                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                break;
246                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                break;
247                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                break;
248                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                break;
249                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                 break;
250                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);              break;
251                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);              break;
252                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);              break;
253                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);         break;
254                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);         break;
255                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);         break;
256                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
257                         case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
258                         case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
259                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
260                         case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
261                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
262                         case Shader::OPCODE_TAN:        tan(d, s0);                     break;
263                         case Shader::OPCODE_ACOS:       acos(d, s0);                    break;
264                         case Shader::OPCODE_ASIN:       asin(d, s0);                    break;
265                         case Shader::OPCODE_ATAN:       atan(d, s0);                    break;
266                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1);               break;
267                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                break;
268                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                break;
269                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                break;
270                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);               break;
271                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);               break;
272                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);               break;
273                         case Shader::OPCODE_SLT:        slt(d, s0, s1);                 break;
274                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                 break;
275                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
276                         case Shader::OPCODE_BREAK:      BREAK();                        break;
277                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);        break;
278                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                   break;
279                         case Shader::OPCODE_CONTINUE:   CONTINUE();                     break;
280                         case Shader::OPCODE_TEST:       TEST();                         break;
281                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);  break;
282                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0); break;
283                         case Shader::OPCODE_ELSE:       ELSE();                         break;
284                         case Shader::OPCODE_ENDIF:      ENDIF();                        break;
285                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
286                         case Shader::OPCODE_ENDREP:     ENDREP();                       break;
287                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
288                         case Shader::OPCODE_IF:         IF(src0);                       break;
289                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
290                         case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
291                         case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
292                         case Shader::OPCODE_REP:        REP(src0);                      break;
293                         case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
294                         case Shader::OPCODE_RET:        RET();                          break;
295                         case Shader::OPCODE_LEAVE:      LEAVE();                        break;
296                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
297                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);       break;
298                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
299                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);          break;
300                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);         break;
301                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
302                         case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
303                         case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
304                         case Shader::OPCODE_NOT:        not(d, s0);                     break;
305                         case Shader::OPCODE_OR:         or(d, s0, s1);                  break;
306                         case Shader::OPCODE_XOR:        xor(d, s0, s1);                 break;
307                         case Shader::OPCODE_AND:        and(d, s0, s1);                 break;
308                         case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
309                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
310                         case Shader::OPCODE_TEXLDL:     TEXLDL(d, s0, src1);            break;
311                         case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
312                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2, s3); break;
313                         case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2);      break;
314                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2);    break;
315                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2, s3); break;
316                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
317                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break;
318                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
319                         case Shader::OPCODE_END:                                        break;
320                         default:
321                                 ASSERT(false);
322                         }
323
324                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
325                         {
326                                 if(dst.integer)
327                                 {
328                                         switch(opcode)
329                                         {
330                                         case Shader::OPCODE_DIV:
331                                                 if(dst.x) d.x = Trunc(d.x);
332                                                 if(dst.y) d.y = Trunc(d.y);
333                                                 if(dst.z) d.z = Trunc(d.z);
334                                                 if(dst.w) d.w = Trunc(d.w);
335                                                 break;
336                                         default:
337                                                 break;   // No truncation to integer required when arguments are integer
338                                         }
339                                 }
340
341                                 if(dst.saturate)
342                                 {
343                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
344                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
345                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
346                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
347
348                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
349                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
350                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
351                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
352                                 }
353
354                                 if(instruction->isPredicated())
355                                 {
356                                         Vector4f pDst;   // FIXME: Rename
357
358                                         switch(dst.type)
359                                         {
360                                         case Shader::PARAMETER_VOID: break;
361                                         case Shader::PARAMETER_TEMP:
362                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
363                                                 {
364                                                         if(dst.x) pDst.x = r[dst.index].x;
365                                                         if(dst.y) pDst.y = r[dst.index].y;
366                                                         if(dst.z) pDst.z = r[dst.index].z;
367                                                         if(dst.w) pDst.w = r[dst.index].w;
368                                                 }
369                                                 else
370                                                 {
371                                                         Int a = relativeAddress(dst);
372
373                                                         if(dst.x) pDst.x = r[dst.index + a].x;
374                                                         if(dst.y) pDst.y = r[dst.index + a].y;
375                                                         if(dst.z) pDst.z = r[dst.index + a].z;
376                                                         if(dst.w) pDst.w = r[dst.index + a].w;
377                                                 }
378                                                 break;
379                                         case Shader::PARAMETER_ADDR: pDst = a0; break;
380                                         case Shader::PARAMETER_RASTOUT:
381                                                 switch(dst.index)
382                                                 {
383                                                 case 0:
384                                                         if(dst.x) pDst.x = o[Pos].x;
385                                                         if(dst.y) pDst.y = o[Pos].y;
386                                                         if(dst.z) pDst.z = o[Pos].z;
387                                                         if(dst.w) pDst.w = o[Pos].w;
388                                                         break;
389                                                 case 1:
390                                                         pDst.x = o[Fog].x;
391                                                         break;
392                                                 case 2:
393                                                         pDst.x = o[Pts].y;
394                                                         break;
395                                                 default:
396                                                         ASSERT(false);
397                                                 }
398                                                 break;
399                                         case Shader::PARAMETER_ATTROUT:
400                                                 if(dst.x) pDst.x = o[D0 + dst.index].x;
401                                                 if(dst.y) pDst.y = o[D0 + dst.index].y;
402                                                 if(dst.z) pDst.z = o[D0 + dst.index].z;
403                                                 if(dst.w) pDst.w = o[D0 + dst.index].w;
404                                                 break;
405                                         case Shader::PARAMETER_TEXCRDOUT:
406                                 //      case Shader::PARAMETER_OUTPUT:
407                                                 if(version < 0x0300)
408                                                 {
409                                                         if(dst.x) pDst.x = o[T0 + dst.index].x;
410                                                         if(dst.y) pDst.y = o[T0 + dst.index].y;
411                                                         if(dst.z) pDst.z = o[T0 + dst.index].z;
412                                                         if(dst.w) pDst.w = o[T0 + dst.index].w;
413                                                 }
414                                                 else
415                                                 {
416                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
417                                                         {
418                                                                 if(dst.x) pDst.x = o[dst.index].x;
419                                                                 if(dst.y) pDst.y = o[dst.index].y;
420                                                                 if(dst.z) pDst.z = o[dst.index].z;
421                                                                 if(dst.w) pDst.w = o[dst.index].w;
422                                                         }
423                                                         else
424                                                         {
425                                                                 Int a = relativeAddress(dst);
426
427                                                                 if(dst.x) pDst.x = o[dst.index + a].x;
428                                                                 if(dst.y) pDst.y = o[dst.index + a].y;
429                                                                 if(dst.z) pDst.z = o[dst.index + a].z;
430                                                                 if(dst.w) pDst.w = o[dst.index + a].w;
431                                                         }
432                                                 }
433                                                 break;
434                                         case Shader::PARAMETER_LABEL:                break;
435                                         case Shader::PARAMETER_PREDICATE: pDst = p0; break;
436                                         case Shader::PARAMETER_INPUT:                break;
437                                         default:
438                                                 ASSERT(false);
439                                         }
440
441                                         Int4 enable = enableMask(instruction);
442
443                                         Int4 xEnable = enable;
444                                         Int4 yEnable = enable;
445                                         Int4 zEnable = enable;
446                                         Int4 wEnable = enable;
447
448                                         if(predicate)
449                                         {
450                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
451
452                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
453                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
454                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
455                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
456
457                                                 if(!instruction->predicateNot)
458                                                 {
459                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
460                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
461                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
462                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
463                                                 }
464                                                 else
465                                                 {
466                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
467                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
468                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
469                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
470                                                 }
471                                         }
472
473                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
474                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
475                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
476                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
477
478                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
479                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
480                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
481                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
482                                 }
483
484                                 switch(dst.type)
485                                 {
486                                 case Shader::PARAMETER_VOID:
487                                         break;
488                                 case Shader::PARAMETER_TEMP:
489                                         if(dst.rel.type == Shader::PARAMETER_VOID)
490                                         {
491                                                 if(dst.x) r[dst.index].x = d.x;
492                                                 if(dst.y) r[dst.index].y = d.y;
493                                                 if(dst.z) r[dst.index].z = d.z;
494                                                 if(dst.w) r[dst.index].w = d.w;
495                                         }
496                                         else
497                                         {
498                                                 Int a = relativeAddress(dst);
499
500                                                 if(dst.x) r[dst.index + a].x = d.x;
501                                                 if(dst.y) r[dst.index + a].y = d.y;
502                                                 if(dst.z) r[dst.index + a].z = d.z;
503                                                 if(dst.w) r[dst.index + a].w = d.w;
504                                         }
505                                         break;
506                                 case Shader::PARAMETER_ADDR:
507                                         if(dst.x) a0.x = d.x;
508                                         if(dst.y) a0.y = d.y;
509                                         if(dst.z) a0.z = d.z;
510                                         if(dst.w) a0.w = d.w;
511                                         break;
512                                 case Shader::PARAMETER_RASTOUT:
513                                         switch(dst.index)
514                                         {
515                                         case 0:
516                                                 if(dst.x) o[Pos].x = d.x;
517                                                 if(dst.y) o[Pos].y = d.y;
518                                                 if(dst.z) o[Pos].z = d.z;
519                                                 if(dst.w) o[Pos].w = d.w;
520                                                 break;
521                                         case 1:
522                                                 o[Fog].x = d.x;
523                                                 break;
524                                         case 2:
525                                                 o[Pts].y = d.x;
526                                                 break;
527                                         default:        ASSERT(false);
528                                         }
529                                         break;
530                                 case Shader::PARAMETER_ATTROUT:
531                                         if(dst.x) o[D0 + dst.index].x = d.x;
532                                         if(dst.y) o[D0 + dst.index].y = d.y;
533                                         if(dst.z) o[D0 + dst.index].z = d.z;
534                                         if(dst.w) o[D0 + dst.index].w = d.w;
535                                         break;
536                                 case Shader::PARAMETER_TEXCRDOUT:
537                         //      case Shader::PARAMETER_OUTPUT:
538                                         if(version < 0x0300)
539                                         {
540                                                 if(dst.x) o[T0 + dst.index].x = d.x;
541                                                 if(dst.y) o[T0 + dst.index].y = d.y;
542                                                 if(dst.z) o[T0 + dst.index].z = d.z;
543                                                 if(dst.w) o[T0 + dst.index].w = d.w;
544                                         }
545                                         else
546                                         {
547                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
548                                                 {
549                                                         if(dst.x) o[dst.index].x = d.x;
550                                                         if(dst.y) o[dst.index].y = d.y;
551                                                         if(dst.z) o[dst.index].z = d.z;
552                                                         if(dst.w) o[dst.index].w = d.w;
553                                                 }
554                                                 else
555                                                 {
556                                                         Int a = relativeAddress(dst);
557
558                                                         if(dst.x) o[dst.index + a].x = d.x;
559                                                         if(dst.y) o[dst.index + a].y = d.y;
560                                                         if(dst.z) o[dst.index + a].z = d.z;
561                                                         if(dst.w) o[dst.index + a].w = d.w;
562                                                 }
563                                         }
564                                         break;
565                                 case Shader::PARAMETER_LABEL:             break;
566                                 case Shader::PARAMETER_PREDICATE: p0 = d; break;
567                                 case Shader::PARAMETER_INPUT:             break;
568                                 default:
569                                         ASSERT(false);
570                                 }
571                         }
572                 }
573
574                 if(currentLabel != -1)
575                 {
576                         Nucleus::setInsertBlock(returnBlock);
577                 }
578         }
579
580         void VertexProgram::passThrough()
581         {
582                 if(shader)
583                 {
584                         for(int i = 0; i < 12; i++)
585                         {
586                                 unsigned char usage = shader->output[i][0].usage;
587
588                                 switch(usage)
589                                 {
590                                 case 0xFF:
591                                         continue;
592                                 case Shader::USAGE_PSIZE:
593                                         o[i].y = v[i].x;
594                                         break;
595                                 case Shader::USAGE_TEXCOORD:
596                                         o[i].x = v[i].x;
597                                         o[i].y = v[i].y;
598                                         o[i].z = v[i].z;
599                                         o[i].w = v[i].w;
600                                         break;
601                                 case Shader::USAGE_POSITION:
602                                         o[i].x = v[i].x;
603                                         o[i].y = v[i].y;
604                                         o[i].z = v[i].z;
605                                         o[i].w = v[i].w;
606                                         break;
607                                 case Shader::USAGE_COLOR:
608                                         o[i].x = v[i].x;
609                                         o[i].y = v[i].y;
610                                         o[i].z = v[i].z;
611                                         o[i].w = v[i].w;
612                                         break;
613                                 case Shader::USAGE_FOG:
614                                         o[i].x = v[i].x;
615                                         break;
616                                 default:
617                                         ASSERT(false);
618                                 }
619                         }
620                 }
621                 else
622                 {
623                         o[Pos].x = v[PositionT].x;
624                         o[Pos].y = v[PositionT].y;
625                         o[Pos].z = v[PositionT].z;
626                         o[Pos].w = v[PositionT].w;
627
628                         for(int i = 0; i < 2; i++)
629                         {
630                                 o[D0 + i].x = v[Color0 + i].x;
631                                 o[D0 + i].y = v[Color0 + i].y;
632                                 o[D0 + i].z = v[Color0 + i].z;
633                                 o[D0 + i].w = v[Color0 + i].w;
634                         }
635
636                         for(int i = 0; i < 8; i++)
637                         {
638                                 o[T0 + i].x = v[TexCoord0 + i].x;
639                                 o[T0 + i].y = v[TexCoord0 + i].y;
640                                 o[T0 + i].z = v[TexCoord0 + i].z;
641                                 o[T0 + i].w = v[TexCoord0 + i].w;
642                         }
643
644                         o[Pts].y = v[PointSize].x;
645                 }
646         }
647
648         Vector4f VertexProgram::fetchRegisterF(const Src &src, unsigned int offset)
649         {
650                 Vector4f reg;
651                 unsigned int i = src.index + offset;
652
653                 switch(src.type)
654                 {
655                 case Shader::PARAMETER_TEMP:
656                         if(src.rel.type == Shader::PARAMETER_VOID)
657                         {
658                                 reg = r[i];
659                         }
660                         else
661                         {
662                                 reg = r[i + relativeAddress(src, src.bufferIndex)];
663                         }
664                         break;
665                 case Shader::PARAMETER_CONST:
666                         reg = readConstant(src, offset);
667                         break;
668                 case Shader::PARAMETER_INPUT:
669             if(src.rel.type == Shader::PARAMETER_VOID)
670                         {
671                                 reg = v[i];
672                         }
673                         else
674                         {
675                                 reg = v[i + relativeAddress(src, src.bufferIndex)];
676                         }
677             break;
678                 case Shader::PARAMETER_VOID: return r[0];   // Dummy
679                 case Shader::PARAMETER_FLOAT4LITERAL:
680                         reg.x = Float4(src.value[0]);
681                         reg.y = Float4(src.value[1]);
682                         reg.z = Float4(src.value[2]);
683                         reg.w = Float4(src.value[3]);
684                         break;
685                 case Shader::PARAMETER_ADDR:      reg = a0; break;
686                 case Shader::PARAMETER_CONSTBOOL: return r[0];   // Dummy
687                 case Shader::PARAMETER_CONSTINT:  return r[0];   // Dummy
688                 case Shader::PARAMETER_LOOP:      return r[0];   // Dummy
689                 case Shader::PARAMETER_PREDICATE: return r[0];   // Dummy
690                 case Shader::PARAMETER_SAMPLER:
691                         if(src.rel.type == Shader::PARAMETER_VOID)
692                         {
693                                 reg.x = As<Float4>(Int4(i));
694                         }
695                         else if(src.rel.type == Shader::PARAMETER_TEMP)
696                         {
697                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
698                         }
699                         return reg;
700                 case Shader::PARAMETER_OUTPUT:
701             if(src.rel.type == Shader::PARAMETER_VOID)
702                         {
703                                 reg = o[i];
704                         }
705                         else
706                         {
707                                 reg = o[i + relativeAddress(src, src.bufferIndex)];
708                         }
709                         break;
710                 case Shader::PARAMETER_MISCTYPE:
711                         reg.x = As<Float>(Int(instanceID));
712                         return reg;
713                 default:
714                         ASSERT(false);
715                 }
716
717                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
718                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
719                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
720                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
721
722                 Vector4f mod;
723
724                 switch(src.modifier)
725                 {
726                 case Shader::MODIFIER_NONE:
727                         mod.x = x;
728                         mod.y = y;
729                         mod.z = z;
730                         mod.w = w;
731                         break;
732                 case Shader::MODIFIER_NEGATE:
733                         mod.x = -x;
734                         mod.y = -y;
735                         mod.z = -z;
736                         mod.w = -w;
737                         break;
738                 case Shader::MODIFIER_ABS:
739                         mod.x = Abs(x);
740                         mod.y = Abs(y);
741                         mod.z = Abs(z);
742                         mod.w = Abs(w);
743                         break;
744                 case Shader::MODIFIER_ABS_NEGATE:
745                         mod.x = -Abs(x);
746                         mod.y = -Abs(y);
747                         mod.z = -Abs(z);
748                         mod.w = -Abs(w);
749                         break;
750                 case Shader::MODIFIER_NOT:
751                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
752                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
753                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
754                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
755                         break;
756                 default:
757                         ASSERT(false);
758                 }
759
760                 return mod;
761         }
762
763         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index)
764         {
765                 if(bufferIndex == -1)
766                 {
767                         return data + OFFSET(DrawData, vs.c[index]);
768                 }
769                 else
770                 {
771                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index;
772                 }
773         }
774
775         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
776         {
777                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
778         }
779
780         Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset)
781         {
782                 Vector4f c;
783                 unsigned int i = src.index + offset;
784
785                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
786                 {
787                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
788
789                         c.x = c.x.xxxx;
790                         c.y = c.y.yyyy;
791                         c.z = c.z.zzzz;
792                         c.w = c.w.wwww;
793
794                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
795                         {
796                                 for(size_t j = 0; j < shader->getLength(); j++)
797                                 {
798                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
799
800                                         if(instruction.opcode == Shader::OPCODE_DEF)
801                                         {
802                                                 if(instruction.dst.index == i)
803                                                 {
804                                                         c.x = Float4(instruction.src[0].value[0]);
805                                                         c.y = Float4(instruction.src[0].value[1]);
806                                                         c.z = Float4(instruction.src[0].value[2]);
807                                                         c.w = Float4(instruction.src[0].value[3]);
808
809                                                         break;
810                                                 }
811                                         }
812                                 }
813                         }
814                 }
815                 else if(src.rel.type == Shader::PARAMETER_LOOP)
816                 {
817                         Int loopCounter = aL[loopDepth];
818
819                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
820
821                         c.x = c.x.xxxx;
822                         c.y = c.y.yyyy;
823                         c.z = c.z.zzzz;
824                         c.w = c.w.wwww;
825                 }
826                 else
827                 {
828                         if(src.rel.deterministic)
829                         {
830                                 Int a = relativeAddress(src, src.bufferIndex);
831                         
832                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
833
834                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(data + OFFSET(DrawData,vs.c[i]) + a * 16);
835
836                                 c.x = c.x.xxxx;
837                                 c.y = c.y.yyyy;
838                                 c.z = c.z.zzzz;
839                                 c.w = c.w.wwww;
840                         }
841                         else
842                         {
843                                 int component = src.rel.swizzle & 0x03;
844                                 Float4 a;
845
846                                 switch(src.rel.type)
847                                 {
848                                 case Shader::PARAMETER_ADDR:   a = a0[component]; break;
849                                 case Shader::PARAMETER_TEMP:   a = r[src.rel.index][component]; break;
850                                 case Shader::PARAMETER_INPUT:  a = v[src.rel.index][component]; break;
851                                 case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
852                                 case Shader::PARAMETER_CONST:  a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
853                                 default: ASSERT(false);
854                                 }
855
856                                 Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
857
858                                 index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
859
860                                 Int index0 = Extract(index, 0);
861                                 Int index1 = Extract(index, 1);
862                                 Int index2 = Extract(index, 2);
863                                 Int index3 = Extract(index, 3);
864
865                                 c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
866                                 c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
867                                 c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
868                                 c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
869
870                                 transpose4x4(c.x, c.y, c.z, c.w);
871                         }
872                 }
873
874                 return c;
875         }
876
877         Int VertexProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
878         {
879                 ASSERT(var.rel.deterministic);
880
881                 if(var.rel.type == Shader::PARAMETER_TEMP)
882                 {
883                         return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
884                 }
885                 else if(var.rel.type == Shader::PARAMETER_INPUT)
886                 {
887                         return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
888                 }
889                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
890                 {
891                         return As<Int>(Extract(o[var.rel.index].x, 0)) * var.rel.scale;
892                 }
893                 else if(var.rel.type == Shader::PARAMETER_CONST)
894                 {
895                         RValue<Int4> c = *Pointer<Int4>(uniformAddress(bufferIndex, var.rel.index));
896
897                         return Extract(c, 0) * var.rel.scale;
898                 }
899                 else if(var.rel.type == Shader::PARAMETER_LOOP)
900                 {
901                         return aL[loopDepth];
902                 }
903                 else ASSERT(false);
904
905                 return 0;
906         }
907
908         Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
909         {
910                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
911
912                 if(!whileTest)
913                 {
914                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
915                         {
916                                 enable &= enableBreak;
917                         }
918
919                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
920                         {
921                                 enable &= enableContinue;
922                         }
923
924                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
925                         {
926                                 enable &= enableLeave;
927                         }
928                 }
929
930                 return enable;
931         }
932
933         void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
934         {
935                 Vector4f row0 = fetchRegisterF(src1, 0);
936                 Vector4f row1 = fetchRegisterF(src1, 1);
937
938                 dst.x = dot3(src0, row0);
939                 dst.y = dot3(src0, row1);
940         }
941
942         void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
943         {
944                 Vector4f row0 = fetchRegisterF(src1, 0);
945                 Vector4f row1 = fetchRegisterF(src1, 1);
946                 Vector4f row2 = fetchRegisterF(src1, 2);
947
948                 dst.x = dot3(src0, row0);
949                 dst.y = dot3(src0, row1);
950                 dst.z = dot3(src0, row2);
951         }
952
953         void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
954         {
955                 Vector4f row0 = fetchRegisterF(src1, 0);
956                 Vector4f row1 = fetchRegisterF(src1, 1);
957                 Vector4f row2 = fetchRegisterF(src1, 2);
958                 Vector4f row3 = fetchRegisterF(src1, 3);
959
960                 dst.x = dot3(src0, row0);
961                 dst.y = dot3(src0, row1);
962                 dst.z = dot3(src0, row2);
963                 dst.w = dot3(src0, row3);
964         }
965
966         void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
967         {
968                 Vector4f row0 = fetchRegisterF(src1, 0);
969                 Vector4f row1 = fetchRegisterF(src1, 1);
970                 Vector4f row2 = fetchRegisterF(src1, 2);
971
972                 dst.x = dot4(src0, row0);
973                 dst.y = dot4(src0, row1);
974                 dst.z = dot4(src0, row2);
975         }
976
977         void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
978         {
979                 Vector4f row0 = fetchRegisterF(src1, 0);
980                 Vector4f row1 = fetchRegisterF(src1, 1);
981                 Vector4f row2 = fetchRegisterF(src1, 2);
982                 Vector4f row3 = fetchRegisterF(src1, 3);
983
984                 dst.x = dot4(src0, row0);
985                 dst.y = dot4(src0, row1);
986                 dst.z = dot4(src0, row2);
987                 dst.w = dot4(src0, row3);
988         }
989
990         void VertexProgram::BREAK()
991         {
992                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
993                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
994
995                 if(breakDepth == 0)
996                 {
997                         enableIndex = enableIndex - breakDepth;
998                         Nucleus::createBr(endBlock);
999                 }
1000                 else
1001                 {
1002                         enableBreak = enableBreak & ~enableStack[enableIndex];
1003                         Bool allBreak = SignMask(enableBreak) == 0x0;
1004
1005                         enableIndex = enableIndex - breakDepth;
1006                         branch(allBreak, endBlock, deadBlock);
1007                 }
1008
1009                 Nucleus::setInsertBlock(deadBlock);
1010                 enableIndex = enableIndex + breakDepth;
1011         }
1012
1013         void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1014         {
1015                 Int4 condition;
1016
1017                 switch(control)
1018                 {
1019                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1020                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1021                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1022                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1023                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1024                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1025                 default:
1026                         ASSERT(false);
1027                 }
1028
1029                 BREAK(condition);
1030         }
1031
1032         void VertexProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1033         {
1034                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1035
1036                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1037                 {
1038                         condition = ~condition;
1039                 }
1040
1041                 BREAK(condition);
1042         }
1043
1044         void VertexProgram::BREAK(Int4 &condition)
1045         {
1046                 condition &= enableStack[enableIndex];
1047
1048                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
1049                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1050
1051                 enableBreak = enableBreak & ~condition;
1052                 Bool allBreak = SignMask(enableBreak) == 0x0;
1053
1054                 enableIndex = enableIndex - breakDepth;
1055                 branch(allBreak, endBlock, continueBlock);
1056
1057                 Nucleus::setInsertBlock(continueBlock);
1058                 enableIndex = enableIndex + breakDepth;
1059         }
1060
1061         void VertexProgram::CONTINUE()
1062         {
1063                 enableContinue = enableContinue & ~enableStack[enableIndex];
1064         }
1065
1066         void VertexProgram::TEST()
1067         {
1068                 whileTest = true;
1069         }
1070
1071         void VertexProgram::CALL(int labelIndex, int callSiteIndex)
1072         {
1073                 if(!labelBlock[labelIndex])
1074                 {
1075                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1076                 }
1077
1078                 if(callRetBlock[labelIndex].size() > 1)
1079                 {
1080                         callStack[stackIndex++] = UInt(callSiteIndex);
1081                 }
1082
1083                 Int4 restoreLeave = enableLeave;
1084
1085                 Nucleus::createBr(labelBlock[labelIndex]);
1086                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1087
1088                 enableLeave = restoreLeave;
1089         }
1090
1091         void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1092         {
1093                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1094                 {
1095                         CALLNZb(labelIndex, callSiteIndex, src);
1096                 }
1097                 else if(src.type == Shader::PARAMETER_PREDICATE)
1098                 {
1099                         CALLNZp(labelIndex, callSiteIndex, src);
1100                 }
1101                 else ASSERT(false);
1102         }
1103
1104         void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1105         {
1106                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1107
1108                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1109                 {
1110                         condition = !condition;
1111                 }
1112
1113                 if(!labelBlock[labelIndex])
1114                 {
1115                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1116                 }
1117
1118                 if(callRetBlock[labelIndex].size() > 1)
1119                 {
1120                         callStack[stackIndex++] = UInt(callSiteIndex);
1121                 }
1122
1123                 Int4 restoreLeave = enableLeave;
1124
1125                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1126                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1127
1128                 enableLeave = restoreLeave;
1129         }
1130
1131         void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1132         {
1133                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1134
1135                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1136                 {
1137                         condition = ~condition;
1138                 }
1139
1140                 condition &= enableStack[enableIndex];
1141
1142                 if(!labelBlock[labelIndex])
1143                 {
1144                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1145                 }
1146
1147                 if(callRetBlock[labelIndex].size() > 1)
1148                 {
1149                         callStack[stackIndex++] = UInt(callSiteIndex);
1150                 }
1151
1152                 enableIndex++;
1153                 enableStack[enableIndex] = condition;
1154                 Int4 restoreLeave = enableLeave;
1155
1156                 Bool notAllFalse = SignMask(condition) != 0;
1157                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1158                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1159
1160                 enableIndex--;
1161                 enableLeave = restoreLeave;
1162         }
1163
1164         void VertexProgram::ELSE()
1165         {
1166                 ifDepth--;
1167
1168                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1169                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1170
1171                 if(isConditionalIf[ifDepth])
1172                 {
1173                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1174                         Bool notAllFalse = SignMask(condition) != 0;
1175
1176                         branch(notAllFalse, falseBlock, endBlock);
1177
1178                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1179                 }
1180                 else
1181                 {
1182                         Nucleus::createBr(endBlock);
1183                         Nucleus::setInsertBlock(falseBlock);
1184                 }
1185
1186                 ifFalseBlock[ifDepth] = endBlock;
1187
1188                 ifDepth++;
1189         }
1190
1191         void VertexProgram::ENDIF()
1192         {
1193                 ifDepth--;
1194
1195                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1196
1197                 Nucleus::createBr(endBlock);
1198                 Nucleus::setInsertBlock(endBlock);
1199
1200                 if(isConditionalIf[ifDepth])
1201                 {
1202                         breakDepth--;
1203                         enableIndex--;
1204                 }
1205         }
1206
1207         void VertexProgram::ENDLOOP()
1208         {
1209                 loopRepDepth--;
1210
1211                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1212
1213                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1214                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1215
1216                 Nucleus::createBr(testBlock);
1217                 Nucleus::setInsertBlock(endBlock);
1218
1219                 loopDepth--;
1220                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1221         }
1222
1223         void VertexProgram::ENDREP()
1224         {
1225                 loopRepDepth--;
1226
1227                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1228                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1229
1230                 Nucleus::createBr(testBlock);
1231                 Nucleus::setInsertBlock(endBlock);
1232
1233                 loopDepth--;
1234                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1235         }
1236
1237         void VertexProgram::ENDWHILE()
1238         {
1239                 loopRepDepth--;
1240
1241                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1242                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1243
1244                 Nucleus::createBr(testBlock);
1245                 Nucleus::setInsertBlock(endBlock);
1246
1247                 enableIndex--;
1248                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1249                 whileTest = false;
1250         }
1251
1252         void VertexProgram::IF(const Src &src)
1253         {
1254                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1255                 {
1256                         IFb(src);
1257                 }
1258                 else if(src.type == Shader::PARAMETER_PREDICATE)
1259                 {
1260                         IFp(src);
1261                 }
1262                 else
1263                 {
1264                         Int4 condition = As<Int4>(fetchRegisterF(src).x);
1265                         IF(condition);
1266                 }
1267         }
1268
1269         void VertexProgram::IFb(const Src &boolRegister)
1270         {
1271                 ASSERT(ifDepth < 24 + 4);
1272
1273                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1274
1275                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1276                 {
1277                         condition = !condition;
1278                 }
1279
1280                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1281                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1282
1283                 branch(condition, trueBlock, falseBlock);
1284
1285                 isConditionalIf[ifDepth] = false;
1286                 ifFalseBlock[ifDepth] = falseBlock;
1287
1288                 ifDepth++;
1289         }
1290
1291         void VertexProgram::IFp(const Src &predicateRegister)
1292         {
1293                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1294
1295                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1296                 {
1297                         condition = ~condition;
1298                 }
1299
1300                 IF(condition);
1301         }
1302
1303         void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1304         {
1305                 Int4 condition;
1306
1307                 switch(control)
1308                 {
1309                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1310                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1311                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1312                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1313                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1314                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1315                 default:
1316                         ASSERT(false);
1317                 }
1318
1319                 IF(condition);
1320         }
1321
1322         void VertexProgram::IF(Int4 &condition)
1323         {
1324                 condition &= enableStack[enableIndex];
1325
1326                 enableIndex++;
1327                 enableStack[enableIndex] = condition;
1328
1329                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1330                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1331
1332                 Bool notAllFalse = SignMask(condition) != 0;
1333
1334                 branch(notAllFalse, trueBlock, falseBlock);
1335
1336                 isConditionalIf[ifDepth] = true;
1337                 ifFalseBlock[ifDepth] = falseBlock;
1338
1339                 ifDepth++;
1340                 breakDepth++;
1341         }
1342
1343         void VertexProgram::LABEL(int labelIndex)
1344         {
1345                 if(!labelBlock[labelIndex])
1346                 {
1347                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1348                 }
1349
1350                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1351                 currentLabel = labelIndex;
1352         }
1353
1354         void VertexProgram::LOOP(const Src &integerRegister)
1355         {
1356                 loopDepth++;
1357
1358                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1359                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1360                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1361
1362                 // FIXME: Compiles to two instructions?
1363                 If(increment[loopDepth] == 0)
1364                 {
1365                         increment[loopDepth] = 1;
1366                 }
1367
1368                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1369                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1370                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1371
1372                 loopRepTestBlock[loopRepDepth] = testBlock;
1373                 loopRepEndBlock[loopRepDepth] = endBlock;
1374
1375                 // FIXME: jump(testBlock)
1376                 Nucleus::createBr(testBlock);
1377                 Nucleus::setInsertBlock(testBlock);
1378
1379                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1380                 Nucleus::setInsertBlock(loopBlock);
1381
1382                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1383
1384                 loopRepDepth++;
1385                 breakDepth = 0;
1386         }
1387
1388         void VertexProgram::REP(const Src &integerRegister)
1389         {
1390                 loopDepth++;
1391
1392                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1393                 aL[loopDepth] = aL[loopDepth - 1];
1394
1395                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1396                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1397                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1398
1399                 loopRepTestBlock[loopRepDepth] = testBlock;
1400                 loopRepEndBlock[loopRepDepth] = endBlock;
1401
1402                 // FIXME: jump(testBlock)
1403                 Nucleus::createBr(testBlock);
1404                 Nucleus::setInsertBlock(testBlock);
1405
1406                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1407                 Nucleus::setInsertBlock(loopBlock);
1408
1409                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1410
1411                 loopRepDepth++;
1412                 breakDepth = 0;
1413         }
1414
1415         void VertexProgram::WHILE(const Src &temporaryRegister)
1416         {
1417                 enableIndex++;
1418
1419                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1420                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1421                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1422
1423                 loopRepTestBlock[loopRepDepth] = testBlock;
1424                 loopRepEndBlock[loopRepDepth] = endBlock;
1425
1426                 Int4 restoreBreak = enableBreak;
1427                 Int4 restoreContinue = enableContinue;
1428
1429                 // FIXME: jump(testBlock)
1430                 Nucleus::createBr(testBlock);
1431                 Nucleus::setInsertBlock(testBlock);
1432                 enableContinue = restoreContinue;
1433
1434                 const Vector4f &src = fetchRegisterF(temporaryRegister);
1435                 Int4 condition = As<Int4>(src.x);
1436                 condition &= enableStack[enableIndex - 1];
1437                 enableStack[enableIndex] = condition;
1438
1439                 Bool notAllFalse = SignMask(condition) != 0;
1440                 branch(notAllFalse, loopBlock, endBlock);
1441
1442                 Nucleus::setInsertBlock(endBlock);
1443                 enableBreak = restoreBreak;
1444
1445                 Nucleus::setInsertBlock(loopBlock);
1446
1447                 loopRepDepth++;
1448                 breakDepth = 0;
1449         }
1450
1451         void VertexProgram::RET()
1452         {
1453                 if(currentLabel == -1)
1454                 {
1455                         returnBlock = Nucleus::createBasicBlock();
1456                         Nucleus::createBr(returnBlock);
1457                 }
1458                 else
1459                 {
1460                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1461
1462                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1463                         {
1464                                 // FIXME: Encapsulate
1465                                 UInt index = callStack[--stackIndex];
1466
1467                                 llvm::Value *value = index.loadValue();
1468                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1469
1470                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1471                                 {
1472                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1473                                 }
1474                         }
1475                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1476                         {
1477                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1478                         }
1479                         else   // Function isn't called
1480                         {
1481                                 Nucleus::createBr(unreachableBlock);
1482                         }
1483
1484                         Nucleus::setInsertBlock(unreachableBlock);
1485                         Nucleus::createUnreachable();
1486                 }
1487         }
1488
1489         void VertexProgram::LEAVE()
1490         {
1491                 enableLeave = enableLeave & ~enableStack[enableIndex];
1492
1493                 // FIXME: Return from function if all instances left
1494                 // FIXME: Use enableLeave in other control-flow constructs
1495         }
1496
1497         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1)
1498         {
1499                 Vector4f tmp;
1500                 sampleTexture(tmp, src1, src0.x, src0.y, src0.z, src0.w);
1501
1502                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1503                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1504                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1505                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1506         }
1507
1508         void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
1509         {
1510                 Float4 lod = Float4(0.0f);
1511                 Vector4f tmp;
1512                 sampleTexture(tmp, src1, src0.x, src0.y, src0.z, lod);
1513
1514                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1515                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1516                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1517                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1518         }
1519
1520         void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1521         {
1522                 UNIMPLEMENTED();
1523         }
1524
1525         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src, const Src&, Vector4f &offset)
1526         {
1527                 UNIMPLEMENTED();
1528         }
1529
1530         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
1531         {
1532                 UNIMPLEMENTED();
1533         }
1534
1535         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &offset)
1536         {
1537                 UNIMPLEMENTED();
1538         }
1539
1540         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1541         {
1542                 UNIMPLEMENTED();
1543         }
1544
1545         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
1546         {
1547                 UNIMPLEMENTED();
1548         }
1549
1550         void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1551         {
1552                 Pointer<Byte> textureMipmap = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
1553                 for(int i = 0; i < 4; ++i)
1554                 {
1555                         Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap);
1556                         dst.x = Insert(dst.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
1557                         dst.y = Insert(dst.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
1558                         dst.z = Insert(dst.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
1559                 }
1560         }
1561
1562         void VertexProgram::sampleTexture(Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q)
1563         {
1564                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1565                 {
1566                         Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
1567                         sampler[s.index]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true);
1568                 }
1569                 else
1570                 {
1571                         Int index = As<Int>(Float(fetchRegisterF(s).x.x));
1572
1573                         for(int i = 0; i < 16; i++)
1574                         {
1575                                 if(shader->usesSampler(i))
1576                                 {
1577                                         If(index == i)
1578                                         {
1579                                                 Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
1580                                                 sampler[i]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true);
1581                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1582                                         }
1583                                 }
1584                         }
1585                 }
1586         }
1587 }