OSDN Git Service

TextureSize implementation
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2013 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "VertexProgram.hpp"
13
14 #include "Renderer.hpp"
15 #include "VertexShader.hpp"
16 #include "Vertex.hpp"
17 #include "Half.hpp"
18 #include "SamplerCore.hpp"
19 #include "Debug.hpp"
20
21 namespace sw
22 {
23         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) : VertexRoutine(state, shader)
24         {
25                 ifDepth = 0;
26                 loopRepDepth = 0;
27                 breakDepth = 0;
28                 currentLabel = -1;
29                 whileTest = false;
30
31                 for(int i = 0; i < 2048; i++)
32                 {
33                         labelBlock[i] = 0;
34                 }
35         }
36
37         VertexProgram::~VertexProgram()
38         {
39                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
40                 {
41                         delete sampler[i];
42                 }
43         }
44
45         void VertexProgram::pipeline(Registers &r)
46         {
47                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
48                 {
49                         sampler[i] = new SamplerCore(r.constants, state.samplerState[i]);
50                 }
51
52                 if(!state.preTransformed)
53                 {
54                         program(r);
55                 }
56                 else
57                 {
58                         passThrough(r);
59                 }
60         }
61
62         void VertexProgram::program(Registers &r)
63         {
64         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
65
66                 unsigned short version = shader->getVersion();
67
68                 r.enableIndex = 0;
69                 r.stackIndex = 0;
70
71                 if(shader->containsLeaveInstruction())
72                 {
73                         r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
74                 }
75
76                 // Create all call site return blocks up front
77                 for(size_t i = 0; i < shader->getLength(); i++)
78                 {
79                         const Shader::Instruction *instruction = shader->getInstruction(i);
80                         Shader::Opcode opcode = instruction->opcode;
81
82                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
83                         {
84                                 const Dst &dst = instruction->dst;
85
86                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
87                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
88                         }
89                 }
90         
91                 for(size_t i = 0; i < shader->getLength(); i++)
92                 {
93                         const Shader::Instruction *instruction = shader->getInstruction(i);
94                         Shader::Opcode opcode = instruction->opcode;
95
96                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
97                         {
98                                 continue;
99                         }
100
101                         Dst dst = instruction->dst;
102                         Src src0 = instruction->src[0];
103                         Src src1 = instruction->src[1];
104                         Src src2 = instruction->src[2];
105                         Src src3 = instruction->src[3];
106
107                         bool predicate = instruction->predicate;
108                         Control control = instruction->control;
109                         bool integer = dst.type == Shader::PARAMETER_ADDR;
110                         bool pp = dst.partialPrecision;
111
112                         Vector4f d;
113                         Vector4f s0;
114                         Vector4f s1;
115                         Vector4f s2;
116                         Vector4f s3;
117
118                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
119                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
120                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
121                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
122
123                         switch(opcode)
124                         {
125                         case Shader::OPCODE_VS_1_0:                                                                             break;
126                         case Shader::OPCODE_VS_1_1:                                                                             break;
127                         case Shader::OPCODE_VS_2_0:                                                                             break;
128                         case Shader::OPCODE_VS_2_x:                                                                             break;
129                         case Shader::OPCODE_VS_2_sw:                                                                    break;
130                         case Shader::OPCODE_VS_3_0:                                                                             break;
131                         case Shader::OPCODE_VS_3_sw:                                                                    break;
132                         case Shader::OPCODE_DCL:                                                                                break;
133                         case Shader::OPCODE_DEF:                                                                                break;
134                         case Shader::OPCODE_DEFI:                                                                               break;
135                         case Shader::OPCODE_DEFB:                                                                               break;
136                         case Shader::OPCODE_NOP:                                                                                break;
137                         case Shader::OPCODE_ABS:                abs(d, s0);                                             break;
138                         case Shader::OPCODE_ADD:                add(d, s0, s1);                                 break;
139                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
140                         case Shader::OPCODE_CRS:                crs(d, s0, s1);                                 break;
141                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);                break;
142                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);                break;
143                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);                break;
144                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);                break;
145                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);                    break;
146                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);                    break;
147                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);                    break;
148                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);                    break;
149                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);              break;
150                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);              break;
151                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);              break;
152                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);              break;
153                         case Shader::OPCODE_DP1:                dp1(d, s0, s1);                                 break;
154                         case Shader::OPCODE_DP2:                dp2(d, s0, s1);                                 break;
155                         case Shader::OPCODE_DP3:                dp3(d, s0, s1);                                 break;
156                         case Shader::OPCODE_DP4:                dp4(d, s0, s1);                                 break;
157                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
158                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
159                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
160                         case Shader::OPCODE_ATT:                att(d, s0, s1);                                 break;
161                         case Shader::OPCODE_EXP2X:              exp2x(d, s0, pp);                               break;
162                         case Shader::OPCODE_EXP2:               exp2(d, s0, pp);                                break;
163                         case Shader::OPCODE_EXPP:               expp(d, s0, version);                   break;
164                         case Shader::OPCODE_EXP:                exp(d, s0, pp);                                 break;
165                         case Shader::OPCODE_FRC:                frc(d, s0);                                             break;
166                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
167                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
168                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
169                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
170                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
171                         case Shader::OPCODE_LIT:                lit(d, s0);                                             break;
172                         case Shader::OPCODE_LOG2X:              log2x(d, s0, pp);                               break;
173                         case Shader::OPCODE_LOG2:               log2(d, s0, pp);                                break;
174                         case Shader::OPCODE_LOGP:               logp(d, s0, version);                   break;
175                         case Shader::OPCODE_LOG:                log(d, s0, pp);                                 break;
176                         case Shader::OPCODE_LRP:                lrp(d, s0, s1, s2);                             break;
177                         case Shader::OPCODE_STEP:               step(d, s0, s1);                                break;
178                         case Shader::OPCODE_SMOOTH:             smooth(d, s0, s1, s2);                  break;
179                         case Shader::OPCODE_FLOATBITSTOINT:
180                         case Shader::OPCODE_FLOATBITSTOUINT:
181                         case Shader::OPCODE_INTBITSTOFLOAT:
182                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
183                         case Shader::OPCODE_M3X2:               M3X2(r, d, s0, src1);                   break;
184                         case Shader::OPCODE_M3X3:               M3X3(r, d, s0, src1);                   break;
185                         case Shader::OPCODE_M3X4:               M3X4(r, d, s0, src1);                   break;
186                         case Shader::OPCODE_M4X3:               M4X3(r, d, s0, src1);                   break;
187                         case Shader::OPCODE_M4X4:               M4X4(r, d, s0, src1);                   break;
188                         case Shader::OPCODE_MAD:                mad(d, s0, s1, s2);                             break;
189                         case Shader::OPCODE_IMAD:               imad(d, s0, s1, s2);                    break;
190                         case Shader::OPCODE_MAX:                max(d, s0, s1);                                 break;
191                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
192                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
193                         case Shader::OPCODE_MIN:                min(d, s0, s1);                                 break;
194                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
195                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
196                         case Shader::OPCODE_MOV:                mov(d, s0, integer);                    break;
197                         case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
198                         case Shader::OPCODE_NEG:        neg(d, s0);                     break;
199                         case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
200                         case Shader::OPCODE_F2B:                f2b(d, s0);                                             break;
201                         case Shader::OPCODE_B2F:                b2f(d, s0);                                             break;
202                         case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
203                         case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
204                         case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
205                         case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
206                         case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
207                         case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
208                         case Shader::OPCODE_U2B:        u2b(d, s0);                     break;
209                         case Shader::OPCODE_B2U:        b2u(d, s0);                     break;
210                         case Shader::OPCODE_MUL:                mul(d, s0, s1);                                 break;
211                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
212                         case Shader::OPCODE_NRM2:               nrm2(d, s0, pp);                                break;
213                         case Shader::OPCODE_NRM3:               nrm3(d, s0, pp);                                break;
214                         case Shader::OPCODE_NRM4:               nrm4(d, s0, pp);                                break;
215                         case Shader::OPCODE_POWX:               powx(d, s0, s1, pp);                    break;
216                         case Shader::OPCODE_POW:                pow(d, s0, s1, pp);                             break;
217                         case Shader::OPCODE_RCPX:               rcpx(d, s0, pp);                                break;
218                         case Shader::OPCODE_DIV:                div(d, s0, s1);                                 break;
219                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
220                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
221                         case Shader::OPCODE_MOD:                mod(d, s0, s1);                                 break;
222                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
223                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
224                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
225                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                 break;
226                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                 break;
227                         case Shader::OPCODE_RSQX:               rsqx(d, s0, pp);                                break;
228                         case Shader::OPCODE_SQRT:               sqrt(d, s0, pp);                                break;
229                         case Shader::OPCODE_RSQ:                rsq(d, s0, pp);                                 break;
230                         case Shader::OPCODE_LEN2:               len2(d.x, s0, pp);                              break;
231                         case Shader::OPCODE_LEN3:               len3(d.x, s0, pp);                              break;
232                         case Shader::OPCODE_LEN4:               len4(d.x, s0, pp);                              break;
233                         case Shader::OPCODE_DIST1:              dist1(d.x, s0, s1, pp);                 break;
234                         case Shader::OPCODE_DIST2:              dist2(d.x, s0, s1, pp);                 break;
235                         case Shader::OPCODE_DIST3:              dist3(d.x, s0, s1, pp);                 break;
236                         case Shader::OPCODE_DIST4:              dist4(d.x, s0, s1, pp);                 break;
237                         case Shader::OPCODE_SGE:                step(d, s1, s0);                                break;
238                         case Shader::OPCODE_SGN:                sgn(d, s0);                                             break;
239                         case Shader::OPCODE_SINCOS:             sincos(d, s0, pp);                              break;
240                         case Shader::OPCODE_COS:                cos(d, s0, pp);                                 break;
241                         case Shader::OPCODE_SIN:                sin(d, s0, pp);                                 break;
242                         case Shader::OPCODE_TAN:                tan(d, s0);                                             break;
243                         case Shader::OPCODE_ACOS:               acos(d, s0);                                    break;
244                         case Shader::OPCODE_ASIN:               asin(d, s0);                                    break;
245                         case Shader::OPCODE_ATAN:               atan(d, s0);                                    break;
246                         case Shader::OPCODE_ATAN2:              atan2(d, s0, s1);                               break;
247                         case Shader::OPCODE_COSH:               cosh(d, s0, pp);                                break;
248                         case Shader::OPCODE_SINH:               sinh(d, s0, pp);                                break;
249                         case Shader::OPCODE_TANH:               tanh(d, s0, pp);                                break;
250                         case Shader::OPCODE_ACOSH:              acosh(d, s0, pp);                               break;
251                         case Shader::OPCODE_ASINH:              asinh(d, s0, pp);                               break;
252                         case Shader::OPCODE_ATANH:              atanh(d, s0, pp);                               break;
253                         case Shader::OPCODE_SLT:                slt(d, s0, s1);                                 break;
254                         case Shader::OPCODE_SUB:                sub(d, s0, s1);                                 break;
255                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
256                         case Shader::OPCODE_BREAK:              BREAK(r);                                               break;
257                         case Shader::OPCODE_BREAKC:             BREAKC(r, s0, s1, control);             break;
258                         case Shader::OPCODE_BREAKP:             BREAKP(r, src0);                                break;
259                         case Shader::OPCODE_CONTINUE:   CONTINUE(r);                                    break;
260                         case Shader::OPCODE_TEST:               TEST();                                                 break;
261                         case Shader::OPCODE_CALL:               CALL(r, dst.label, dst.callSite);         break;
262                         case Shader::OPCODE_CALLNZ:             CALLNZ(r, dst.label, dst.callSite, src0); break;
263                         case Shader::OPCODE_ELSE:               ELSE(r);                                                break;
264                         case Shader::OPCODE_ENDIF:              ENDIF(r);                                               break;
265                         case Shader::OPCODE_ENDLOOP:    ENDLOOP(r);                                             break;
266                         case Shader::OPCODE_ENDREP:             ENDREP(r);                                              break;
267                         case Shader::OPCODE_ENDWHILE:   ENDWHILE(r);                                    break;
268                         case Shader::OPCODE_IF:                 IF(r, src0);                                    break;
269                         case Shader::OPCODE_IFC:                IFC(r, s0, s1, control);                break;
270                         case Shader::OPCODE_LABEL:              LABEL(dst.index);                               break;
271                         case Shader::OPCODE_LOOP:               LOOP(r, src1);                                  break;
272                         case Shader::OPCODE_REP:                REP(r, src0);                                   break;
273                         case Shader::OPCODE_WHILE:              WHILE(r, src0);                                 break;
274                         case Shader::OPCODE_RET:                RET(r);                                                 break;
275                         case Shader::OPCODE_LEAVE:              LEAVE(r);                                               break;
276                         case Shader::OPCODE_CMP:                cmp(d, s0, s1, control);                break;
277                         case Shader::OPCODE_ICMP:               icmp(d, s0, s1, control);               break;
278                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
279                         case Shader::OPCODE_SELECT:             select(d, s0, s1, s2);                  break;
280                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);                 break;
281                         case Shader::OPCODE_INSERT:             insert(d, s0, s1.x, s2.x);              break;
282                         case Shader::OPCODE_ALL:                all(d.x, s0);                                   break;
283                         case Shader::OPCODE_ANY:                any(d.x, s0);                                   break;
284                         case Shader::OPCODE_NOT:                not(d, s0);                                             break;
285                         case Shader::OPCODE_OR:         or(d, s0, s1);                  break;
286                         case Shader::OPCODE_XOR:        xor(d, s0, s1);                 break;
287                         case Shader::OPCODE_AND:        and(d, s0, s1);                 break;
288                         case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
289                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
290                         case Shader::OPCODE_TEXLDL:             TEXLDL(r, d, s0, src1);                 break;
291                         case Shader::OPCODE_TEX:                TEX(r, d, s0, src1);                    break;
292                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(r, d, s0.x, src1);              break;
293                         case Shader::OPCODE_END:                                                                                break;
294                         default:
295                                 ASSERT(false);
296                         }
297
298                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
299                         {
300                                 if(dst.integer)
301                                 {
302                                         switch(opcode)
303                                         {
304                                         case Shader::OPCODE_DIV:
305                                                 if(dst.x) d.x = Trunc(d.x);
306                                                 if(dst.y) d.y = Trunc(d.y);
307                                                 if(dst.z) d.z = Trunc(d.z);
308                                                 if(dst.w) d.w = Trunc(d.w);
309                                                 break;
310                                         default:
311                                                 break;   // No truncation to integer required when arguments are integer
312                                         }
313                                 }
314
315                                 if(dst.saturate)
316                                 {
317                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
318                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
319                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
320                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
321
322                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
323                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
324                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
325                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
326                                 }
327
328                                 if(instruction->isPredicated())
329                                 {
330                                         Vector4f pDst;   // FIXME: Rename
331
332                                         switch(dst.type)
333                                         {
334                                         case Shader::PARAMETER_VOID:                                                                                                                                            break;
335                                         case Shader::PARAMETER_TEMP:
336                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
337                                                 {
338                                                         if(dst.x) pDst.x = r.r[dst.index].x;
339                                                         if(dst.y) pDst.y = r.r[dst.index].y;
340                                                         if(dst.z) pDst.z = r.r[dst.index].z;
341                                                         if(dst.w) pDst.w = r.r[dst.index].w;
342                                                 }
343                                                 else
344                                                 {
345                                                         Int a = relativeAddress(r, dst);
346
347                                                         if(dst.x) pDst.x = r.r[dst.index + a].x;
348                                                         if(dst.y) pDst.y = r.r[dst.index + a].y;
349                                                         if(dst.z) pDst.z = r.r[dst.index + a].z;
350                                                         if(dst.w) pDst.w = r.r[dst.index + a].w;
351                                                 }
352                                                 break;
353                                         case Shader::PARAMETER_ADDR:            pDst = r.a0;                                                                                                    break;
354                                         case Shader::PARAMETER_RASTOUT:
355                                                 switch(dst.index)
356                                                 {
357                                                 case 0:
358                                                         if(dst.x) pDst.x = r.o[Pos].x;
359                                                         if(dst.y) pDst.y = r.o[Pos].y;
360                                                         if(dst.z) pDst.z = r.o[Pos].z;
361                                                         if(dst.w) pDst.w = r.o[Pos].w;
362                                                         break;
363                                                 case 1:
364                                                         pDst.x = r.o[Fog].x;
365                                                         break;
366                                                 case 2:
367                                                         pDst.x = r.o[Pts].y;
368                                                         break;
369                                                 default:
370                                                         ASSERT(false);
371                                                 }
372                                                 break;
373                                         case Shader::PARAMETER_ATTROUT:
374                                                 if(dst.x) pDst.x = r.o[D0 + dst.index].x;
375                                                 if(dst.y) pDst.y = r.o[D0 + dst.index].y;
376                                                 if(dst.z) pDst.z = r.o[D0 + dst.index].z;
377                                                 if(dst.w) pDst.w = r.o[D0 + dst.index].w;
378                                                 break;
379                                         case Shader::PARAMETER_TEXCRDOUT:
380                                 //      case Shader::PARAMETER_OUTPUT:
381                                                 if(version < 0x0300)
382                                                 {
383                                                         if(dst.x) pDst.x = r.o[T0 + dst.index].x;
384                                                         if(dst.y) pDst.y = r.o[T0 + dst.index].y;
385                                                         if(dst.z) pDst.z = r.o[T0 + dst.index].z;
386                                                         if(dst.w) pDst.w = r.o[T0 + dst.index].w;
387                                                 }
388                                                 else
389                                                 {
390                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
391                                                         {
392                                                                 if(dst.x) pDst.x = r.o[dst.index].x;
393                                                                 if(dst.y) pDst.y = r.o[dst.index].y;
394                                                                 if(dst.z) pDst.z = r.o[dst.index].z;
395                                                                 if(dst.w) pDst.w = r.o[dst.index].w;
396                                                         }
397                                                         else if(dst.rel.type == Shader::PARAMETER_LOOP)
398                                                         {
399                                                                 Int aL = r.aL[r.loopDepth];
400
401                                                                 if(dst.x) pDst.x = r.o[dst.index + aL].x;
402                                                                 if(dst.y) pDst.y = r.o[dst.index + aL].y;
403                                                                 if(dst.z) pDst.z = r.o[dst.index + aL].z;
404                                                                 if(dst.w) pDst.w = r.o[dst.index + aL].w;
405                                                         }
406                                                         else
407                                                         {
408                                                                 Int a = relativeAddress(r, dst);
409
410                                                                 if(dst.x) pDst.x = r.o[dst.index + a].x;
411                                                                 if(dst.y) pDst.y = r.o[dst.index + a].y;
412                                                                 if(dst.z) pDst.z = r.o[dst.index + a].z;
413                                                                 if(dst.w) pDst.w = r.o[dst.index + a].w;
414                                                         }
415                                                 }
416                                                 break;
417                                         case Shader::PARAMETER_LABEL:                                                                                                                                           break;
418                                         case Shader::PARAMETER_PREDICATE:       pDst = r.p0;                                                                                                    break;
419                                         case Shader::PARAMETER_INPUT:                                                                                                                                           break;
420                                         default:
421                                                 ASSERT(false);
422                                         }
423
424                                         Int4 enable = enableMask(r, instruction);
425
426                                         Int4 xEnable = enable;
427                                         Int4 yEnable = enable;
428                                         Int4 zEnable = enable;
429                                         Int4 wEnable = enable;
430
431                                         if(predicate)
432                                         {
433                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
434
435                                                 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
436                                                 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
437                                                 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
438                                                 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
439
440                                                 if(!instruction->predicateNot)
441                                                 {
442                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
443                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
444                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
445                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
446                                                 }
447                                                 else
448                                                 {
449                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
450                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
451                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
452                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
453                                                 }
454                                         }
455
456                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
457                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
458                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
459                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
460
461                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
462                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
463                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
464                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
465                                 }
466
467                                 switch(dst.type)
468                                 {
469                                 case Shader::PARAMETER_VOID:
470                                         break;
471                                 case Shader::PARAMETER_TEMP:
472                                         if(dst.rel.type == Shader::PARAMETER_VOID)
473                                         {
474                                                 if(dst.x) r.r[dst.index].x = d.x;
475                                                 if(dst.y) r.r[dst.index].y = d.y;
476                                                 if(dst.z) r.r[dst.index].z = d.z;
477                                                 if(dst.w) r.r[dst.index].w = d.w;
478                                         }
479                                         else
480                                         {
481                                                 Int a = relativeAddress(r, dst);
482
483                                                 if(dst.x) r.r[dst.index + a].x = d.x;
484                                                 if(dst.y) r.r[dst.index + a].y = d.y;
485                                                 if(dst.z) r.r[dst.index + a].z = d.z;
486                                                 if(dst.w) r.r[dst.index + a].w = d.w;
487                                         }
488                                         break;
489                                 case Shader::PARAMETER_ADDR:
490                                         if(dst.x) r.a0.x = d.x;
491                                         if(dst.y) r.a0.y = d.y;
492                                         if(dst.z) r.a0.z = d.z;
493                                         if(dst.w) r.a0.w = d.w;
494                                         break;
495                                 case Shader::PARAMETER_RASTOUT:
496                                         switch(dst.index)
497                                         {
498                                         case 0:
499                                                 if(dst.x) r.o[Pos].x = d.x;
500                                                 if(dst.y) r.o[Pos].y = d.y;
501                                                 if(dst.z) r.o[Pos].z = d.z;
502                                                 if(dst.w) r.o[Pos].w = d.w;
503                                                 break;
504                                         case 1:
505                                                 r.o[Fog].x = d.x;
506                                                 break;
507                                         case 2:         
508                                                 r.o[Pts].y = d.x;
509                                                 break;
510                                         default:        ASSERT(false);
511                                         }
512                                         break;
513                                 case Shader::PARAMETER_ATTROUT: 
514                                         if(dst.x) r.o[D0 + dst.index].x = d.x;
515                                         if(dst.y) r.o[D0 + dst.index].y = d.y;
516                                         if(dst.z) r.o[D0 + dst.index].z = d.z;
517                                         if(dst.w) r.o[D0 + dst.index].w = d.w;
518                                         break;
519                                 case Shader::PARAMETER_TEXCRDOUT:
520                         //      case Shader::PARAMETER_OUTPUT:
521                                         if(version < 0x0300)
522                                         {
523                                                 if(dst.x) r.o[T0 + dst.index].x = d.x;
524                                                 if(dst.y) r.o[T0 + dst.index].y = d.y;
525                                                 if(dst.z) r.o[T0 + dst.index].z = d.z;
526                                                 if(dst.w) r.o[T0 + dst.index].w = d.w;
527                                         }
528                                         else
529                                         {
530                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
531                                                 {
532                                                         if(dst.x) r.o[dst.index].x = d.x;
533                                                         if(dst.y) r.o[dst.index].y = d.y;
534                                                         if(dst.z) r.o[dst.index].z = d.z;
535                                                         if(dst.w) r.o[dst.index].w = d.w;
536                                                 }
537                                                 else if(dst.rel.type == Shader::PARAMETER_LOOP)
538                                                 {
539                                                         Int aL = r.aL[r.loopDepth];
540
541                                                         if(dst.x) r.o[dst.index + aL].x = d.x;
542                                                         if(dst.y) r.o[dst.index + aL].y = d.y;
543                                                         if(dst.z) r.o[dst.index + aL].z = d.z;
544                                                         if(dst.w) r.o[dst.index + aL].w = d.w;
545                                                 }
546                                                 else
547                                                 {
548                                                         Int a = relativeAddress(r, dst);
549
550                                                         if(dst.x) r.o[dst.index + a].x = d.x;
551                                                         if(dst.y) r.o[dst.index + a].y = d.y;
552                                                         if(dst.z) r.o[dst.index + a].z = d.z;
553                                                         if(dst.w) r.o[dst.index + a].w = d.w;
554                                                 }
555                                         }
556                                         break;
557                                 case Shader::PARAMETER_LABEL:                                                                                                                                           break;
558                                 case Shader::PARAMETER_PREDICATE:       r.p0 = d;                                                                                                               break;
559                                 case Shader::PARAMETER_INPUT:                                                                                                                                           break;
560                                 default:
561                                         ASSERT(false);
562                                 }
563                         }
564                 }
565
566                 if(currentLabel != -1)
567                 {
568                         Nucleus::setInsertBlock(returnBlock);
569                 }
570         }
571
572         void VertexProgram::passThrough(Registers &r)
573         {
574                 if(shader)
575                 {
576                         for(int i = 0; i < 12; i++)
577                         {
578                                 unsigned char usage = shader->output[i][0].usage;
579
580                                 switch(usage)
581                                 {
582                                 case 0xFF:
583                                         continue;
584                                 case Shader::USAGE_PSIZE:
585                                         r.o[i].y = r.v[i].x;
586                                         break;
587                                 case Shader::USAGE_TEXCOORD:
588                                         r.o[i].x = r.v[i].x;
589                                         r.o[i].y = r.v[i].y;
590                                         r.o[i].z = r.v[i].z;
591                                         r.o[i].w = r.v[i].w;
592                                         break;
593                                 case Shader::USAGE_POSITION:
594                                         r.o[i].x = r.v[i].x;
595                                         r.o[i].y = r.v[i].y;
596                                         r.o[i].z = r.v[i].z;
597                                         r.o[i].w = r.v[i].w;
598                                         break;
599                                 case Shader::USAGE_COLOR:
600                                         r.o[i].x = r.v[i].x;
601                                         r.o[i].y = r.v[i].y;
602                                         r.o[i].z = r.v[i].z;
603                                         r.o[i].w = r.v[i].w;
604                                         break;
605                                 case Shader::USAGE_FOG:
606                                         r.o[i].x = r.v[i].x;
607                                         break;
608                                 default:
609                                         ASSERT(false);
610                                 }
611                         }
612                 }
613                 else
614                 {
615                         r.o[Pos].x = r.v[PositionT].x;
616                         r.o[Pos].y = r.v[PositionT].y;
617                         r.o[Pos].z = r.v[PositionT].z;
618                         r.o[Pos].w = r.v[PositionT].w;
619
620                         for(int i = 0; i < 2; i++)
621                         {
622                                 r.o[D0 + i].x = r.v[Color0 + i].x;
623                                 r.o[D0 + i].y = r.v[Color0 + i].y;
624                                 r.o[D0 + i].z = r.v[Color0 + i].z;
625                                 r.o[D0 + i].w = r.v[Color0 + i].w;
626                         }
627
628                         for(int i = 0; i < 8; i++)
629                         {
630                                 r.o[T0 + i].x = r.v[TexCoord0 + i].x;
631                                 r.o[T0 + i].y = r.v[TexCoord0 + i].y;
632                                 r.o[T0 + i].z = r.v[TexCoord0 + i].z;
633                                 r.o[T0 + i].w = r.v[TexCoord0 + i].w;
634                         }
635
636                         r.o[Pts].y = r.v[PointSize].x;
637                 }
638         }
639
640         Vector4f VertexProgram::fetchRegisterF(Registers &r, const Src &src, int offset)
641         {
642                 int i = src.index + offset;
643
644                 Vector4f reg;
645
646                 switch(src.type)
647                 {
648                 case Shader::PARAMETER_TEMP:
649                         if(src.rel.type == Shader::PARAMETER_VOID)
650                         {
651                                 reg = r.r[i];
652                         }
653                         else
654                         {
655                                 reg = r.r[i + relativeAddress(r, src)];
656                         }
657                         break;
658                 case Shader::PARAMETER_CONST:
659                         reg = readConstant(r, src, offset);
660                         break;
661                 case Shader::PARAMETER_INPUT:
662             if(src.rel.type == Shader::PARAMETER_VOID)
663                         {
664                                 reg = r.v[i];
665                         }
666                         else
667                         {
668                                 reg = r.v[i + relativeAddress(r, src)];
669                         }
670             break;
671                 case Shader::PARAMETER_VOID:                    return r.r[0];   // Dummy
672                 case Shader::PARAMETER_FLOAT4LITERAL:
673                         reg.x = Float4(src.value[0]);
674                         reg.y = Float4(src.value[1]);
675                         reg.z = Float4(src.value[2]);
676                         reg.w = Float4(src.value[3]);
677                         break;
678                 case Shader::PARAMETER_ADDR:                    reg = r.a0;             break;
679                 case Shader::PARAMETER_CONSTBOOL:               return r.r[0];   // Dummy
680                 case Shader::PARAMETER_CONSTINT:                return r.r[0];   // Dummy
681                 case Shader::PARAMETER_LOOP:                    return r.r[0];   // Dummy
682                 case Shader::PARAMETER_PREDICATE:               return r.r[0];   // Dummy
683                 case Shader::PARAMETER_SAMPLER:
684                         if(src.rel.type == Shader::PARAMETER_VOID)
685                         {
686                                 reg.x = As<Float4>(Int4(i));
687                         }
688                         else if(src.rel.type == Shader::PARAMETER_TEMP)
689                         {
690                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r.r[src.rel.index].x));
691                         }
692                         return reg;
693                 case Shader::PARAMETER_OUTPUT:
694             if(src.rel.type == Shader::PARAMETER_VOID)
695                         {
696                                 reg = r.o[i];
697                         }
698                         else
699                         {
700                                 reg = r.o[i + relativeAddress(r, src)];
701                         }
702                         break;
703                 case Shader::PARAMETER_MISCTYPE:
704                         reg.x = As<Float>(Int(r.instanceID));
705                         return reg;
706                 default:
707                         ASSERT(false);
708                 }
709
710                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
711                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
712                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
713                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
714
715                 Vector4f mod;
716
717                 switch(src.modifier)
718                 {
719                 case Shader::MODIFIER_NONE:
720                         mod.x = x;
721                         mod.y = y;
722                         mod.z = z;
723                         mod.w = w;
724                         break;
725                 case Shader::MODIFIER_NEGATE:
726                         mod.x = -x;
727                         mod.y = -y;
728                         mod.z = -z;
729                         mod.w = -w;
730                         break;
731                 case Shader::MODIFIER_ABS:
732                         mod.x = Abs(x);
733                         mod.y = Abs(y);
734                         mod.z = Abs(z);
735                         mod.w = Abs(w);
736                         break;
737                 case Shader::MODIFIER_ABS_NEGATE:
738                         mod.x = -Abs(x);
739                         mod.y = -Abs(y);
740                         mod.z = -Abs(z);
741                         mod.w = -Abs(w);
742                         break;
743                 case Shader::MODIFIER_NOT:
744                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
745                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
746                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
747                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
748                         break;
749                 default:
750                         ASSERT(false);
751                 }
752
753                 return mod;
754         }
755
756         Vector4f VertexProgram::readConstant(Registers &r, const Src &src, int offset)
757         {
758                 Vector4f c;
759
760                 int i = src.index + offset;
761
762                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
763                 {
764                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]));
765
766                         c.x = c.x.xxxx;
767                         c.y = c.y.yyyy;
768                         c.z = c.z.zzzz;
769                         c.w = c.w.wwww;
770
771                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
772                         {
773                                 for(size_t j = 0; j < shader->getLength(); j++)
774                                 {
775                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
776
777                                         if(instruction.opcode == Shader::OPCODE_DEF)
778                                         {
779                                                 if(instruction.dst.index == i)
780                                                 {
781                                                         c.x = Float4(instruction.src[0].value[0]);
782                                                         c.y = Float4(instruction.src[0].value[1]);
783                                                         c.z = Float4(instruction.src[0].value[2]);
784                                                         c.w = Float4(instruction.src[0].value[3]);
785
786                                                         break;
787                                                 }
788                                         }
789                                 }
790                         }
791                 }
792                 else if(src.rel.type == Shader::PARAMETER_LOOP)
793                 {
794                         Int loopCounter = r.aL[r.loopDepth];
795
796                         c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16);
797
798                         c.x = c.x.xxxx;
799                         c.y = c.y.yyyy;
800                         c.z = c.z.zzzz;
801                         c.w = c.w.wwww;
802                 }
803                 else
804                 {
805                         if(src.rel.deterministic)
806                         {
807                                 Int a = relativeAddress(r, src);
808                         
809                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + a * 16);
810
811                                 c.x = c.x.xxxx;
812                                 c.y = c.y.yyyy;
813                                 c.z = c.z.zzzz;
814                                 c.w = c.w.wwww;
815                         }
816                         else
817                         {
818                                 int component = src.rel.swizzle & 0x03;
819                                 Float4 a;
820
821                                 switch(src.rel.type)
822                                 {
823                                 case Shader::PARAMETER_ADDR:   a = r.a0[component]; break;
824                                 case Shader::PARAMETER_TEMP:   a = r.r[src.rel.index][component]; break;
825                                 case Shader::PARAMETER_INPUT:  a = r.v[src.rel.index][component]; break;
826                                 case Shader::PARAMETER_OUTPUT: a = r.o[src.rel.index][component]; break;
827                                 case Shader::PARAMETER_CONST:  a = *Pointer<Float>(r.data + OFFSET(DrawData,vs.c[src.rel.index][component])); break;
828                                 default: ASSERT(false);
829                                 }
830
831                                 Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
832
833                                 index = Min(As<UInt4>(index), UInt4(256));   // Clamp to constant register range, c[256] = {0, 0, 0, 0}
834                                 
835                                 Int index0 = Extract(index, 0);
836                                 Int index1 = Extract(index, 1);
837                                 Int index2 = Extract(index, 2);
838                                 Int index3 = Extract(index, 3);
839
840                                 c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16);
841                                 c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16);
842                                 c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16);
843                                 c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16);
844
845                                 transpose4x4(c.x, c.y, c.z, c.w);
846                         }
847                 }
848
849                 return c;
850         }
851
852         Int VertexProgram::relativeAddress(Registers &r, const Shader::Parameter &var)
853         {
854                 ASSERT(var.rel.deterministic);
855
856                 if(var.rel.type == Shader::PARAMETER_TEMP)
857                 {
858                         return As<Int>(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale;
859                 }
860                 else if(var.rel.type == Shader::PARAMETER_INPUT)
861                 {
862                         return As<Int>(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale;
863                 }
864                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
865                 {
866                         return As<Int>(Extract(r.o[var.rel.index].x, 0)) * var.rel.scale;
867                 }
868                 else if(var.rel.type == Shader::PARAMETER_CONST)
869                 {
870                         RValue<Int4> c = *Pointer<Int4>(r.data + OFFSET(DrawData, vs.c[var.rel.index]));
871
872                         return Extract(c, 0) * var.rel.scale;
873                 }
874                 else ASSERT(false);
875
876                 return 0;
877         }
878
879         Int4 VertexProgram::enableMask(Registers &r, const Shader::Instruction *instruction)
880         {
881                 Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
882                 
883                 if(!whileTest)
884                 {
885                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
886                         {
887                                 enable &= r.enableBreak;
888                         }
889
890                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
891                         {
892                                 enable &= r.enableContinue;
893                         }
894
895                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
896                         {
897                                 enable &= r.enableLeave;
898                         }
899                 }
900
901                 return enable;
902         }
903
904         void VertexProgram::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
905         {
906                 Vector4f row0 = fetchRegisterF(r, src1, 0);
907                 Vector4f row1 = fetchRegisterF(r, src1, 1);
908
909                 dst.x = dot3(src0, row0);
910                 dst.y = dot3(src0, row1);
911         }
912
913         void VertexProgram::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
914         {
915                 Vector4f row0 = fetchRegisterF(r, src1, 0);
916                 Vector4f row1 = fetchRegisterF(r, src1, 1);
917                 Vector4f row2 = fetchRegisterF(r, src1, 2);
918
919                 dst.x = dot3(src0, row0);
920                 dst.y = dot3(src0, row1);
921                 dst.z = dot3(src0, row2);
922         }
923
924         void VertexProgram::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
925         {
926                 Vector4f row0 = fetchRegisterF(r, src1, 0);
927                 Vector4f row1 = fetchRegisterF(r, src1, 1);
928                 Vector4f row2 = fetchRegisterF(r, src1, 2);
929                 Vector4f row3 = fetchRegisterF(r, src1, 3);
930
931                 dst.x = dot3(src0, row0);
932                 dst.y = dot3(src0, row1);
933                 dst.z = dot3(src0, row2);
934                 dst.w = dot3(src0, row3);
935         }
936
937         void VertexProgram::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
938         {
939                 Vector4f row0 = fetchRegisterF(r, src1, 0);
940                 Vector4f row1 = fetchRegisterF(r, src1, 1);
941                 Vector4f row2 = fetchRegisterF(r, src1, 2);
942
943                 dst.x = dot4(src0, row0);
944                 dst.y = dot4(src0, row1);
945                 dst.z = dot4(src0, row2);
946         }
947
948         void VertexProgram::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, Src &src1)
949         {
950                 Vector4f row0 = fetchRegisterF(r, src1, 0);
951                 Vector4f row1 = fetchRegisterF(r, src1, 1);
952                 Vector4f row2 = fetchRegisterF(r, src1, 2);
953                 Vector4f row3 = fetchRegisterF(r, src1, 3);
954
955                 dst.x = dot4(src0, row0);
956                 dst.y = dot4(src0, row1);
957                 dst.z = dot4(src0, row2);
958                 dst.w = dot4(src0, row3);
959         }
960
961         void VertexProgram::BREAK(Registers &r)
962         {
963                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
964                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
965
966                 if(breakDepth == 0)
967                 {
968                         r.enableIndex = r.enableIndex - breakDepth;
969                         Nucleus::createBr(endBlock);
970                 }
971                 else
972                 {
973                         r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
974                         Bool allBreak = SignMask(r.enableBreak) == 0x0;
975
976                         r.enableIndex = r.enableIndex - breakDepth;
977                         branch(allBreak, endBlock, deadBlock);
978                 }
979
980                 Nucleus::setInsertBlock(deadBlock);
981                 r.enableIndex = r.enableIndex + breakDepth;
982         }
983
984         void VertexProgram::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
985         {
986                 Int4 condition;
987
988                 switch(control)
989                 {
990                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
991                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
992                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
993                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
994                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
995                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
996                 default:
997                         ASSERT(false);
998                 }
999
1000                 BREAK(r, condition);
1001         }
1002
1003         void VertexProgram::BREAKP(Registers &r, const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1004         {
1005                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1006
1007                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1008                 {
1009                         condition = ~condition;
1010                 }
1011
1012                 BREAK(r, condition);
1013         }
1014
1015         void VertexProgram::BREAK(Registers &r, Int4 &condition)
1016         {
1017                 condition &= r.enableStack[r.enableIndex];
1018
1019                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
1020                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1021
1022                 r.enableBreak = r.enableBreak & ~condition;
1023                 Bool allBreak = SignMask(r.enableBreak) == 0x0;
1024
1025                 r.enableIndex = r.enableIndex - breakDepth;
1026                 branch(allBreak, endBlock, continueBlock);
1027
1028                 Nucleus::setInsertBlock(continueBlock);
1029                 r.enableIndex = r.enableIndex + breakDepth;
1030         }
1031
1032         void VertexProgram::CONTINUE(Registers &r)
1033         {
1034                 r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
1035         }
1036
1037         void VertexProgram::TEST()
1038         {
1039                 whileTest = true;
1040         }
1041
1042         void VertexProgram::CALL(Registers &r, int labelIndex, int callSiteIndex)
1043         {
1044                 if(!labelBlock[labelIndex])
1045                 {
1046                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1047                 }
1048
1049                 if(callRetBlock[labelIndex].size() > 1)
1050                 {
1051                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1052                 }
1053
1054                 Int4 restoreLeave = r.enableLeave;
1055
1056                 Nucleus::createBr(labelBlock[labelIndex]);
1057                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1058
1059                 r.enableLeave = restoreLeave;
1060         }
1061
1062         void VertexProgram::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
1063         {
1064                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1065                 {
1066                         CALLNZb(r, labelIndex, callSiteIndex, src);
1067                 }
1068                 else if(src.type == Shader::PARAMETER_PREDICATE)
1069                 {
1070                         CALLNZp(r, labelIndex, callSiteIndex, src);
1071                 }
1072                 else ASSERT(false);
1073         }
1074
1075         void VertexProgram::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
1076         {
1077                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1078                 
1079                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1080                 {
1081                         condition = !condition; 
1082                 }
1083
1084                 if(!labelBlock[labelIndex])
1085                 {
1086                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1087                 }
1088
1089                 if(callRetBlock[labelIndex].size() > 1)
1090                 {
1091                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1092                 }
1093
1094                 Int4 restoreLeave = r.enableLeave;
1095
1096                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1097                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1098
1099                 r.enableLeave = restoreLeave;
1100         }
1101
1102         void VertexProgram::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
1103         {
1104                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1105
1106                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1107                 {
1108                         condition = ~condition;
1109                 }
1110
1111                 condition &= r.enableStack[r.enableIndex];
1112
1113                 if(!labelBlock[labelIndex])
1114                 {
1115                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1116                 }
1117
1118                 if(callRetBlock[labelIndex].size() > 1)
1119                 {
1120                         r.callStack[r.stackIndex++] = UInt(callSiteIndex);
1121                 }
1122
1123                 r.enableIndex++;
1124                 r.enableStack[r.enableIndex] = condition;
1125                 Int4 restoreLeave = r.enableLeave;
1126
1127                 Bool notAllFalse = SignMask(condition) != 0;
1128                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1129                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1130
1131                 r.enableIndex--;
1132                 r.enableLeave = restoreLeave;
1133         }
1134
1135         void VertexProgram::ELSE(Registers &r)
1136         {
1137                 ifDepth--;
1138
1139                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1140                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1141
1142                 if(isConditionalIf[ifDepth])
1143                 {
1144                         Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1145                         Bool notAllFalse = SignMask(condition) != 0;
1146
1147                         branch(notAllFalse, falseBlock, endBlock);
1148
1149                         r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
1150                 }
1151                 else
1152                 {
1153                         Nucleus::createBr(endBlock);
1154                         Nucleus::setInsertBlock(falseBlock);
1155                 }
1156
1157                 ifFalseBlock[ifDepth] = endBlock;
1158
1159                 ifDepth++;
1160         }
1161
1162         void VertexProgram::ENDIF(Registers &r)
1163         {
1164                 ifDepth--;
1165
1166                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1167
1168                 Nucleus::createBr(endBlock);
1169                 Nucleus::setInsertBlock(endBlock);
1170
1171                 if(isConditionalIf[ifDepth])
1172                 {
1173                         breakDepth--;
1174                         r.enableIndex--;
1175                 }
1176         }
1177
1178         void VertexProgram::ENDLOOP(Registers &r)
1179         {
1180                 loopRepDepth--;
1181
1182                 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth];   // FIXME: +=
1183
1184                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1185                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1186
1187                 Nucleus::createBr(testBlock);
1188                 Nucleus::setInsertBlock(endBlock);
1189
1190                 r.loopDepth--;
1191                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1192         }
1193
1194         void VertexProgram::ENDREP(Registers &r)
1195         {
1196                 loopRepDepth--;
1197
1198                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1199                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1200
1201                 Nucleus::createBr(testBlock);
1202                 Nucleus::setInsertBlock(endBlock);
1203
1204                 r.loopDepth--;
1205                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1206         }
1207
1208         void VertexProgram::ENDWHILE(Registers &r)
1209         {
1210                 loopRepDepth--;
1211
1212                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1213                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1214
1215                 Nucleus::createBr(testBlock);
1216                 Nucleus::setInsertBlock(endBlock);
1217
1218                 r.enableIndex--;
1219                 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1220                 whileTest = false;
1221         }
1222
1223         void VertexProgram::IF(Registers &r, const Src &src)
1224         {
1225                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1226                 {
1227                         IFb(r, src);
1228                 }
1229                 else if(src.type == Shader::PARAMETER_PREDICATE)
1230                 {
1231                         IFp(r, src);
1232                 }
1233                 else
1234                 {
1235                         Int4 condition = As<Int4>(fetchRegisterF(r, src).x);
1236                         IF(r, condition);
1237                 }
1238         }
1239
1240         void VertexProgram::IFb(Registers &r, const Src &boolRegister)
1241         {
1242                 ASSERT(ifDepth < 24 + 4);
1243
1244                 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1245
1246                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1247                 {
1248                         condition = !condition;
1249                 }
1250
1251                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1252                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1253
1254                 branch(condition, trueBlock, falseBlock);
1255
1256                 isConditionalIf[ifDepth] = false;
1257                 ifFalseBlock[ifDepth] = falseBlock;
1258
1259                 ifDepth++;
1260         }
1261
1262         void VertexProgram::IFp(Registers &r, const Src &predicateRegister)
1263         {
1264                 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
1265
1266                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1267                 {
1268                         condition = ~condition;
1269                 }
1270
1271                 IF(r, condition);
1272         }
1273
1274         void VertexProgram::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
1275         {
1276                 Int4 condition;
1277
1278                 switch(control)
1279                 {
1280                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1281                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1282                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1283                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1284                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1285                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1286                 default:
1287                         ASSERT(false);
1288                 }
1289
1290                 IF(r, condition);
1291         }
1292
1293         void VertexProgram::IF(Registers &r, Int4 &condition)
1294         {
1295                 condition &= r.enableStack[r.enableIndex];
1296
1297                 r.enableIndex++;
1298                 r.enableStack[r.enableIndex] = condition;
1299
1300                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1301                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1302
1303                 Bool notAllFalse = SignMask(condition) != 0;
1304
1305                 branch(notAllFalse, trueBlock, falseBlock);
1306
1307                 isConditionalIf[ifDepth] = true;
1308                 ifFalseBlock[ifDepth] = falseBlock;
1309
1310                 ifDepth++;
1311                 breakDepth++;
1312         }
1313
1314         void VertexProgram::LABEL(int labelIndex)
1315         {
1316                 if(!labelBlock[labelIndex])
1317                 {
1318                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1319                 }
1320
1321                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1322                 currentLabel = labelIndex;
1323         }
1324
1325         void VertexProgram::LOOP(Registers &r, const Src &integerRegister)
1326         {
1327                 r.loopDepth++;
1328
1329                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1330                 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1331                 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1332
1333                 // FIXME: Compiles to two instructions?
1334                 If(r.increment[r.loopDepth] == 0)
1335                 {
1336                         r.increment[r.loopDepth] = 1;
1337                 }
1338
1339                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1340                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1341                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1342
1343                 loopRepTestBlock[loopRepDepth] = testBlock;
1344                 loopRepEndBlock[loopRepDepth] = endBlock;
1345
1346                 // FIXME: jump(testBlock)
1347                 Nucleus::createBr(testBlock);
1348                 Nucleus::setInsertBlock(testBlock);
1349
1350                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1351                 Nucleus::setInsertBlock(loopBlock);
1352
1353                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1354                 
1355                 loopRepDepth++;
1356                 breakDepth = 0;
1357         }
1358
1359         void VertexProgram::REP(Registers &r, const Src &integerRegister)
1360         {
1361                 r.loopDepth++;
1362
1363                 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1364                 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
1365
1366                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1367                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1368                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1369
1370                 loopRepTestBlock[loopRepDepth] = testBlock;
1371                 loopRepEndBlock[loopRepDepth] = endBlock;
1372
1373                 // FIXME: jump(testBlock)
1374                 Nucleus::createBr(testBlock);
1375                 Nucleus::setInsertBlock(testBlock);
1376
1377                 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1378                 Nucleus::setInsertBlock(loopBlock);
1379
1380                 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1;   // FIXME: --
1381
1382                 loopRepDepth++;
1383                 breakDepth = 0;
1384         }
1385
1386         void VertexProgram::WHILE(Registers &r, const Src &temporaryRegister)
1387         {
1388                 r.enableIndex++;
1389
1390                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1391                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1392                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1393                 
1394                 loopRepTestBlock[loopRepDepth] = testBlock;
1395                 loopRepEndBlock[loopRepDepth] = endBlock;
1396
1397                 Int4 restoreBreak = r.enableBreak;
1398                 Int4 restoreContinue = r.enableContinue;
1399
1400                 // FIXME: jump(testBlock)
1401                 Nucleus::createBr(testBlock);
1402                 Nucleus::setInsertBlock(testBlock);
1403                 r.enableContinue = restoreContinue;
1404
1405                 const Vector4f &src = fetchRegisterF(r, temporaryRegister);
1406                 Int4 condition = As<Int4>(src.x);
1407                 condition &= r.enableStack[r.enableIndex - 1];
1408                 r.enableStack[r.enableIndex] = condition;
1409
1410                 Bool notAllFalse = SignMask(condition) != 0;
1411                 branch(notAllFalse, loopBlock, endBlock);
1412                 
1413                 Nucleus::setInsertBlock(endBlock);
1414                 r.enableBreak = restoreBreak;
1415                 
1416                 Nucleus::setInsertBlock(loopBlock);
1417
1418                 loopRepDepth++;
1419                 breakDepth = 0;
1420         }
1421
1422         void VertexProgram::RET(Registers &r)
1423         {
1424                 if(currentLabel == -1)
1425                 {
1426                         returnBlock = Nucleus::createBasicBlock();
1427                         Nucleus::createBr(returnBlock);
1428                 }
1429                 else
1430                 {
1431                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1432
1433                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1434                         {
1435                                 // FIXME: Encapsulate
1436                                 UInt index = r.callStack[--r.stackIndex];
1437  
1438                                 llvm::Value *value = index.loadValue();
1439                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1440
1441                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1442                                 {
1443                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1444                                 }
1445                         }
1446                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1447                         {
1448                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1449                         }
1450                         else   // Function isn't called
1451                         {
1452                                 Nucleus::createBr(unreachableBlock);
1453                         }
1454
1455                         Nucleus::setInsertBlock(unreachableBlock);
1456                         Nucleus::createUnreachable();
1457                 }
1458         }
1459
1460         void VertexProgram::LEAVE(Registers &r)
1461         {
1462                 r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
1463
1464                 // FIXME: Return from function if all instances left
1465                 // FIXME: Use enableLeave in other control-flow constructs
1466         }
1467
1468         void VertexProgram::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1469         {
1470                 Vector4f tmp;
1471                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w);
1472
1473                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1474                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1475                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1476                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1477         }
1478
1479         void VertexProgram::TEX(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
1480         {
1481                 Float4 lod = Float4(0.0f);
1482                 Vector4f tmp;
1483                 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, lod);
1484
1485                 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1486                 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1487                 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1488                 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1489         }
1490
1491         void VertexProgram::TEXSIZE(Registers &r, Vector4f &dst, Float4 &lod, const Src &src1)
1492         {
1493                 Pointer<Byte> textureMipmap = r.data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
1494                 for(int i = 0; i < 4; ++i)
1495                 {
1496                         Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap);
1497                         dst.x = Insert(dst.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
1498                         dst.y = Insert(dst.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
1499                         dst.z = Insert(dst.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
1500                 }
1501         }
1502
1503         void VertexProgram::sampleTexture(Registers &r, Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q)
1504         {
1505                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1506                 {
1507                         Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
1508                         sampler[s.index]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);        
1509                 }
1510                 else
1511                 {
1512                         Int index = As<Int>(Float(fetchRegisterF(r, s).x.x));
1513
1514                         for(int i = 0; i < 16; i++)
1515                         {
1516                                 if(shader->usesSampler(i))
1517                                 {
1518                                         If(index == i)
1519                                         {
1520                                                 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
1521                                                 sampler[i]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true);
1522                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1523                                         }
1524                                 }
1525                         }
1526                 }
1527         }
1528 }