OSDN Git Service

gl_VertexID implementation
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "VertexProgram.hpp"
16
17 #include "Renderer.hpp"
18 #include "VertexShader.hpp"
19 #include "Vertex.hpp"
20 #include "Half.hpp"
21 #include "SamplerCore.hpp"
22 #include "Debug.hpp"
23
24 namespace sw
25 {
26         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
27                 : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
28         {
29                 ifDepth = 0;
30                 loopRepDepth = 0;
31                 breakDepth = 0;
32                 currentLabel = -1;
33                 whileTest = false;
34
35                 for(int i = 0; i < 2048; i++)
36                 {
37                         labelBlock[i] = 0;
38                 }
39
40                 loopDepth = -1;
41                 enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
42
43                 if(shader && shader->containsBreakInstruction())
44                 {
45                         enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
46                 }
47
48                 if(shader && shader->containsContinueInstruction())
49                 {
50                         enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
51                 }
52
53                 if(shader->isInstanceIdDeclared())
54                 {
55                         instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
56                 }
57         }
58
59         VertexProgram::~VertexProgram()
60         {
61                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
62                 {
63                         delete sampler[i];
64                 }
65         }
66
67         void VertexProgram::pipeline(UInt& index)
68         {
69                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
70                 {
71                         sampler[i] = new SamplerCore(constants, state.samplerState[i]);
72                 }
73
74                 if(!state.preTransformed)
75                 {
76                         program(index);
77                 }
78                 else
79                 {
80                         passThrough();
81                 }
82         }
83
84         void VertexProgram::program(UInt& index)
85         {
86         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
87
88                 unsigned short version = shader->getVersion();
89
90                 enableIndex = 0;
91                 stackIndex = 0;
92
93                 if(shader->containsLeaveInstruction())
94                 {
95                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
96                 }
97
98                 if(shader->isVertexIdDeclared())
99                 {
100                         if(state.textureSampling)
101                         {
102                                 vertexID = Int4(index);
103                         }
104                         else
105                         {
106                                 vertexID = Insert(vertexID, As<Int>(index), 0);
107                                 vertexID = Insert(vertexID, As<Int>(index + 1), 1);
108                                 vertexID = Insert(vertexID, As<Int>(index + 2), 2);
109                                 vertexID = Insert(vertexID, As<Int>(index + 3), 3);
110                         }
111                 }
112
113                 // Create all call site return blocks up front
114                 for(size_t i = 0; i < shader->getLength(); i++)
115                 {
116                         const Shader::Instruction *instruction = shader->getInstruction(i);
117                         Shader::Opcode opcode = instruction->opcode;
118
119                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
120                         {
121                                 const Dst &dst = instruction->dst;
122
123                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
124                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
125                         }
126                 }
127
128                 for(size_t i = 0; i < shader->getLength(); i++)
129                 {
130                         const Shader::Instruction *instruction = shader->getInstruction(i);
131                         Shader::Opcode opcode = instruction->opcode;
132
133                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
134                         {
135                                 continue;
136                         }
137
138                         Dst dst = instruction->dst;
139                         Src src0 = instruction->src[0];
140                         Src src1 = instruction->src[1];
141                         Src src2 = instruction->src[2];
142                         Src src3 = instruction->src[3];
143                         Src src4 = instruction->src[4];
144
145                         bool predicate = instruction->predicate;
146                         Control control = instruction->control;
147                         bool integer = dst.type == Shader::PARAMETER_ADDR;
148                         bool pp = dst.partialPrecision;
149
150                         Vector4f d;
151                         Vector4f s0;
152                         Vector4f s1;
153                         Vector4f s2;
154                         Vector4f s3;
155                         Vector4f s4;
156
157                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
158                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
159                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
160                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
161                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
162
163                         switch(opcode)
164                         {
165                         case Shader::OPCODE_VS_1_0:                                     break;
166                         case Shader::OPCODE_VS_1_1:                                     break;
167                         case Shader::OPCODE_VS_2_0:                                     break;
168                         case Shader::OPCODE_VS_2_x:                                     break;
169                         case Shader::OPCODE_VS_2_sw:                                    break;
170                         case Shader::OPCODE_VS_3_0:                                     break;
171                         case Shader::OPCODE_VS_3_sw:                                    break;
172                         case Shader::OPCODE_DCL:                                        break;
173                         case Shader::OPCODE_DEF:                                        break;
174                         case Shader::OPCODE_DEFI:                                       break;
175                         case Shader::OPCODE_DEFB:                                       break;
176                         case Shader::OPCODE_NOP:                                        break;
177                         case Shader::OPCODE_ABS:        abs(d, s0);                     break;
178                         case Shader::OPCODE_IABS:       iabs(d, s0);                    break;
179                         case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
180                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
181                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
182                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);        break;
183                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);        break;
184                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);        break;
185                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);        break;
186                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);            break;
187                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);            break;
188                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);            break;
189                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);            break;
190                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);      break;
191                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);      break;
192                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);      break;
193                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);      break;
194                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                 break;
195                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                 break;
196                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                 break;
197                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                 break;
198                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
199                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
200                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
201                         case Shader::OPCODE_ATT:        att(d, s0, s1);                 break;
202                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);               break;
203                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                break;
204                         case Shader::OPCODE_EXPP:       expp(d, s0, version);           break;
205                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                 break;
206                         case Shader::OPCODE_FRC:        frc(d, s0);                     break;
207                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
208                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
209                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
210                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
211                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
212                         case Shader::OPCODE_LIT:        lit(d, s0);                     break;
213                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);               break;
214                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                break;
215                         case Shader::OPCODE_LOGP:       logp(d, s0, version);           break;
216                         case Shader::OPCODE_LOG:        log(d, s0, pp);                 break;
217                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);             break;
218                         case Shader::OPCODE_STEP:       step(d, s0, s1);                break;
219                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);          break;
220                         case Shader::OPCODE_FLOATBITSTOINT:
221                         case Shader::OPCODE_FLOATBITSTOUINT:
222                         case Shader::OPCODE_INTBITSTOFLOAT:
223                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
224                         case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);      break;
225                         case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);      break;
226                         case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);       break;
227                         case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);    break;
228                         case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);    break;
229                         case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);     break;
230                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
231                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
232                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
233                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);              break;
234                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);              break;
235                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);             break;
236                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);            break;
237                         case Shader::OPCODE_MAX:        max(d, s0, s1);                 break;
238                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
239                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
240                         case Shader::OPCODE_MIN:        min(d, s0, s1);                 break;
241                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
242                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
243                         case Shader::OPCODE_MOV:        mov(d, s0, integer);            break;
244                         case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
245                         case Shader::OPCODE_NEG:        neg(d, s0);                     break;
246                         case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
247                         case Shader::OPCODE_F2B:        f2b(d, s0);                     break;
248                         case Shader::OPCODE_B2F:        b2f(d, s0);                     break;
249                         case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
250                         case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
251                         case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
252                         case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
253                         case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
254                         case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
255                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                 break;
256                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
257                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                break;
258                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                break;
259                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                break;
260                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);            break;
261                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);             break;
262                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                break;
263                         case Shader::OPCODE_DIV:        div(d, s0, s1);                 break;
264                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
265                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
266                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                 break;
267                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
268                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
269                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
270                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                break;
271                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                break;
272                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                break;
273                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                break;
274                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                 break;
275                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);              break;
276                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);              break;
277                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);              break;
278                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);         break;
279                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);         break;
280                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);         break;
281                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
282                         case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
283                         case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
284                         case Shader::OPCODE_ISGN:       isgn(d, s0);                    break;
285                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
286                         case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
287                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
288                         case Shader::OPCODE_TAN:        tan(d, s0);                     break;
289                         case Shader::OPCODE_ACOS:       acos(d, s0);                    break;
290                         case Shader::OPCODE_ASIN:       asin(d, s0);                    break;
291                         case Shader::OPCODE_ATAN:       atan(d, s0);                    break;
292                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1);               break;
293                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                break;
294                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                break;
295                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                break;
296                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);               break;
297                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);               break;
298                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);               break;
299                         case Shader::OPCODE_SLT:        slt(d, s0, s1);                 break;
300                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                 break;
301                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
302                         case Shader::OPCODE_BREAK:      BREAK();                        break;
303                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);        break;
304                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                   break;
305                         case Shader::OPCODE_CONTINUE:   CONTINUE();                     break;
306                         case Shader::OPCODE_TEST:       TEST();                         break;
307                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);  break;
308                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0); break;
309                         case Shader::OPCODE_ELSE:       ELSE();                         break;
310                         case Shader::OPCODE_ENDIF:      ENDIF();                        break;
311                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
312                         case Shader::OPCODE_ENDREP:     ENDREP();                       break;
313                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
314                         case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                    break;
315                         case Shader::OPCODE_IF:         IF(src0);                       break;
316                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
317                         case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
318                         case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
319                         case Shader::OPCODE_REP:        REP(src0);                      break;
320                         case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
321                         case Shader::OPCODE_SWITCH:     SWITCH();                       break;
322                         case Shader::OPCODE_RET:        RET();                          break;
323                         case Shader::OPCODE_LEAVE:      LEAVE();                        break;
324                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
325                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);       break;
326                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
327                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);          break;
328                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);         break;
329                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
330                         case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
331                         case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
332                         case Shader::OPCODE_NOT:        bitwise_not(d, s0);             break;
333                         case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);          break;
334                         case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);         break;
335                         case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);         break;
336                         case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
337                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
338                         case Shader::OPCODE_TEXLDL:     TEXLDL(d, s0, src1);            break;
339                         case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
340                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);     break;
341                         case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2);      break;
342                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1);        break;
343                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2); break;
344                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
345                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break;
346                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
347                         case Shader::OPCODE_END:                                        break;
348                         default:
349                                 ASSERT(false);
350                         }
351
352                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
353                         {
354                                 if(dst.integer)
355                                 {
356                                         switch(opcode)
357                                         {
358                                         case Shader::OPCODE_DIV:
359                                                 if(dst.x) d.x = Trunc(d.x);
360                                                 if(dst.y) d.y = Trunc(d.y);
361                                                 if(dst.z) d.z = Trunc(d.z);
362                                                 if(dst.w) d.w = Trunc(d.w);
363                                                 break;
364                                         default:
365                                                 break;   // No truncation to integer required when arguments are integer
366                                         }
367                                 }
368
369                                 if(dst.saturate)
370                                 {
371                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
372                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
373                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
374                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
375
376                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
377                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
378                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
379                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
380                                 }
381
382                                 if(instruction->isPredicated())
383                                 {
384                                         Vector4f pDst;   // FIXME: Rename
385
386                                         switch(dst.type)
387                                         {
388                                         case Shader::PARAMETER_VOID: break;
389                                         case Shader::PARAMETER_TEMP:
390                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
391                                                 {
392                                                         if(dst.x) pDst.x = r[dst.index].x;
393                                                         if(dst.y) pDst.y = r[dst.index].y;
394                                                         if(dst.z) pDst.z = r[dst.index].z;
395                                                         if(dst.w) pDst.w = r[dst.index].w;
396                                                 }
397                                                 else
398                                                 {
399                                                         Int a = relativeAddress(dst);
400
401                                                         if(dst.x) pDst.x = r[dst.index + a].x;
402                                                         if(dst.y) pDst.y = r[dst.index + a].y;
403                                                         if(dst.z) pDst.z = r[dst.index + a].z;
404                                                         if(dst.w) pDst.w = r[dst.index + a].w;
405                                                 }
406                                                 break;
407                                         case Shader::PARAMETER_ADDR: pDst = a0; break;
408                                         case Shader::PARAMETER_RASTOUT:
409                                                 switch(dst.index)
410                                                 {
411                                                 case 0:
412                                                         if(dst.x) pDst.x = o[Pos].x;
413                                                         if(dst.y) pDst.y = o[Pos].y;
414                                                         if(dst.z) pDst.z = o[Pos].z;
415                                                         if(dst.w) pDst.w = o[Pos].w;
416                                                         break;
417                                                 case 1:
418                                                         pDst.x = o[Fog].x;
419                                                         break;
420                                                 case 2:
421                                                         pDst.x = o[Pts].y;
422                                                         break;
423                                                 default:
424                                                         ASSERT(false);
425                                                 }
426                                                 break;
427                                         case Shader::PARAMETER_ATTROUT:
428                                                 if(dst.x) pDst.x = o[C0 + dst.index].x;
429                                                 if(dst.y) pDst.y = o[C0 + dst.index].y;
430                                                 if(dst.z) pDst.z = o[C0 + dst.index].z;
431                                                 if(dst.w) pDst.w = o[C0 + dst.index].w;
432                                                 break;
433                                         case Shader::PARAMETER_TEXCRDOUT:
434                                 //      case Shader::PARAMETER_OUTPUT:
435                                                 if(version < 0x0300)
436                                                 {
437                                                         if(dst.x) pDst.x = o[T0 + dst.index].x;
438                                                         if(dst.y) pDst.y = o[T0 + dst.index].y;
439                                                         if(dst.z) pDst.z = o[T0 + dst.index].z;
440                                                         if(dst.w) pDst.w = o[T0 + dst.index].w;
441                                                 }
442                                                 else
443                                                 {
444                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
445                                                         {
446                                                                 if(dst.x) pDst.x = o[dst.index].x;
447                                                                 if(dst.y) pDst.y = o[dst.index].y;
448                                                                 if(dst.z) pDst.z = o[dst.index].z;
449                                                                 if(dst.w) pDst.w = o[dst.index].w;
450                                                         }
451                                                         else
452                                                         {
453                                                                 Int a = relativeAddress(dst);
454
455                                                                 if(dst.x) pDst.x = o[dst.index + a].x;
456                                                                 if(dst.y) pDst.y = o[dst.index + a].y;
457                                                                 if(dst.z) pDst.z = o[dst.index + a].z;
458                                                                 if(dst.w) pDst.w = o[dst.index + a].w;
459                                                         }
460                                                 }
461                                                 break;
462                                         case Shader::PARAMETER_LABEL:                break;
463                                         case Shader::PARAMETER_PREDICATE: pDst = p0; break;
464                                         case Shader::PARAMETER_INPUT:                break;
465                                         default:
466                                                 ASSERT(false);
467                                         }
468
469                                         Int4 enable = enableMask(instruction);
470
471                                         Int4 xEnable = enable;
472                                         Int4 yEnable = enable;
473                                         Int4 zEnable = enable;
474                                         Int4 wEnable = enable;
475
476                                         if(predicate)
477                                         {
478                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
479
480                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
481                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
482                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
483                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
484
485                                                 if(!instruction->predicateNot)
486                                                 {
487                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
488                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
489                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
490                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
491                                                 }
492                                                 else
493                                                 {
494                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
495                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
496                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
497                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
498                                                 }
499                                         }
500
501                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
502                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
503                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
504                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
505
506                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
507                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
508                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
509                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
510                                 }
511
512                                 switch(dst.type)
513                                 {
514                                 case Shader::PARAMETER_VOID:
515                                         break;
516                                 case Shader::PARAMETER_TEMP:
517                                         if(dst.rel.type == Shader::PARAMETER_VOID)
518                                         {
519                                                 if(dst.x) r[dst.index].x = d.x;
520                                                 if(dst.y) r[dst.index].y = d.y;
521                                                 if(dst.z) r[dst.index].z = d.z;
522                                                 if(dst.w) r[dst.index].w = d.w;
523                                         }
524                                         else
525                                         {
526                                                 Int a = relativeAddress(dst);
527
528                                                 if(dst.x) r[dst.index + a].x = d.x;
529                                                 if(dst.y) r[dst.index + a].y = d.y;
530                                                 if(dst.z) r[dst.index + a].z = d.z;
531                                                 if(dst.w) r[dst.index + a].w = d.w;
532                                         }
533                                         break;
534                                 case Shader::PARAMETER_ADDR:
535                                         if(dst.x) a0.x = d.x;
536                                         if(dst.y) a0.y = d.y;
537                                         if(dst.z) a0.z = d.z;
538                                         if(dst.w) a0.w = d.w;
539                                         break;
540                                 case Shader::PARAMETER_RASTOUT:
541                                         switch(dst.index)
542                                         {
543                                         case 0:
544                                                 if(dst.x) o[Pos].x = d.x;
545                                                 if(dst.y) o[Pos].y = d.y;
546                                                 if(dst.z) o[Pos].z = d.z;
547                                                 if(dst.w) o[Pos].w = d.w;
548                                                 break;
549                                         case 1:
550                                                 o[Fog].x = d.x;
551                                                 break;
552                                         case 2:
553                                                 o[Pts].y = d.x;
554                                                 break;
555                                         default:        ASSERT(false);
556                                         }
557                                         break;
558                                 case Shader::PARAMETER_ATTROUT:
559                                         if(dst.x) o[C0 + dst.index].x = d.x;
560                                         if(dst.y) o[C0 + dst.index].y = d.y;
561                                         if(dst.z) o[C0 + dst.index].z = d.z;
562                                         if(dst.w) o[C0 + dst.index].w = d.w;
563                                         break;
564                                 case Shader::PARAMETER_TEXCRDOUT:
565                         //      case Shader::PARAMETER_OUTPUT:
566                                         if(version < 0x0300)
567                                         {
568                                                 if(dst.x) o[T0 + dst.index].x = d.x;
569                                                 if(dst.y) o[T0 + dst.index].y = d.y;
570                                                 if(dst.z) o[T0 + dst.index].z = d.z;
571                                                 if(dst.w) o[T0 + dst.index].w = d.w;
572                                         }
573                                         else
574                                         {
575                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
576                                                 {
577                                                         if(dst.x) o[dst.index].x = d.x;
578                                                         if(dst.y) o[dst.index].y = d.y;
579                                                         if(dst.z) o[dst.index].z = d.z;
580                                                         if(dst.w) o[dst.index].w = d.w;
581                                                 }
582                                                 else
583                                                 {
584                                                         Int a = relativeAddress(dst);
585
586                                                         if(dst.x) o[dst.index + a].x = d.x;
587                                                         if(dst.y) o[dst.index + a].y = d.y;
588                                                         if(dst.z) o[dst.index + a].z = d.z;
589                                                         if(dst.w) o[dst.index + a].w = d.w;
590                                                 }
591                                         }
592                                         break;
593                                 case Shader::PARAMETER_LABEL:             break;
594                                 case Shader::PARAMETER_PREDICATE: p0 = d; break;
595                                 case Shader::PARAMETER_INPUT:             break;
596                                 default:
597                                         ASSERT(false);
598                                 }
599                         }
600                 }
601
602                 if(currentLabel != -1)
603                 {
604                         Nucleus::setInsertBlock(returnBlock);
605                 }
606         }
607
608         void VertexProgram::passThrough()
609         {
610                 if(shader)
611                 {
612                         for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
613                         {
614                                 unsigned char usage = shader->getOutput(i, 0).usage;
615
616                                 switch(usage)
617                                 {
618                                 case 0xFF:
619                                         continue;
620                                 case Shader::USAGE_PSIZE:
621                                         o[i].y = v[i].x;
622                                         break;
623                                 case Shader::USAGE_TEXCOORD:
624                                         o[i].x = v[i].x;
625                                         o[i].y = v[i].y;
626                                         o[i].z = v[i].z;
627                                         o[i].w = v[i].w;
628                                         break;
629                                 case Shader::USAGE_POSITION:
630                                         o[i].x = v[i].x;
631                                         o[i].y = v[i].y;
632                                         o[i].z = v[i].z;
633                                         o[i].w = v[i].w;
634                                         break;
635                                 case Shader::USAGE_COLOR:
636                                         o[i].x = v[i].x;
637                                         o[i].y = v[i].y;
638                                         o[i].z = v[i].z;
639                                         o[i].w = v[i].w;
640                                         break;
641                                 case Shader::USAGE_FOG:
642                                         o[i].x = v[i].x;
643                                         break;
644                                 default:
645                                         ASSERT(false);
646                                 }
647                         }
648                 }
649                 else
650                 {
651                         o[Pos].x = v[PositionT].x;
652                         o[Pos].y = v[PositionT].y;
653                         o[Pos].z = v[PositionT].z;
654                         o[Pos].w = v[PositionT].w;
655
656                         for(int i = 0; i < 2; i++)
657                         {
658                                 o[C0 + i].x = v[Color0 + i].x;
659                                 o[C0 + i].y = v[Color0 + i].y;
660                                 o[C0 + i].z = v[Color0 + i].z;
661                                 o[C0 + i].w = v[Color0 + i].w;
662                         }
663
664                         for(int i = 0; i < 8; i++)
665                         {
666                                 o[T0 + i].x = v[TexCoord0 + i].x;
667                                 o[T0 + i].y = v[TexCoord0 + i].y;
668                                 o[T0 + i].z = v[TexCoord0 + i].z;
669                                 o[T0 + i].w = v[TexCoord0 + i].w;
670                         }
671
672                         o[Pts].y = v[PointSize].x;
673                 }
674         }
675
676         Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset)
677         {
678                 Vector4f reg;
679                 unsigned int i = src.index + offset;
680
681                 switch(src.type)
682                 {
683                 case Shader::PARAMETER_TEMP:
684                         if(src.rel.type == Shader::PARAMETER_VOID)
685                         {
686                                 reg = r[i];
687                         }
688                         else
689                         {
690                                 reg = r[i + relativeAddress(src, src.bufferIndex)];
691                         }
692                         break;
693                 case Shader::PARAMETER_CONST:
694                         reg = readConstant(src, offset);
695                         break;
696                 case Shader::PARAMETER_INPUT:
697                         if(src.rel.type == Shader::PARAMETER_VOID)
698                         {
699                                 reg = v[i];
700                         }
701                         else
702                         {
703                                 reg = v[i + relativeAddress(src, src.bufferIndex)];
704                         }
705                         break;
706                 case Shader::PARAMETER_VOID: return r[0];   // Dummy
707                 case Shader::PARAMETER_FLOAT4LITERAL:
708                         reg.x = Float4(src.value[0]);
709                         reg.y = Float4(src.value[1]);
710                         reg.z = Float4(src.value[2]);
711                         reg.w = Float4(src.value[3]);
712                         break;
713                 case Shader::PARAMETER_ADDR:      reg = a0; break;
714                 case Shader::PARAMETER_CONSTBOOL: return r[0];   // Dummy
715                 case Shader::PARAMETER_CONSTINT:  return r[0];   // Dummy
716                 case Shader::PARAMETER_LOOP:      return r[0];   // Dummy
717                 case Shader::PARAMETER_PREDICATE: return r[0];   // Dummy
718                 case Shader::PARAMETER_SAMPLER:
719                         if(src.rel.type == Shader::PARAMETER_VOID)
720                         {
721                                 reg.x = As<Float4>(Int4(i));
722                         }
723                         else if(src.rel.type == Shader::PARAMETER_TEMP)
724                         {
725                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
726                         }
727                         return reg;
728                 case Shader::PARAMETER_OUTPUT:
729                         if(src.rel.type == Shader::PARAMETER_VOID)
730                         {
731                                 reg = o[i];
732                         }
733                         else
734                         {
735                                 reg = o[i + relativeAddress(src, src.bufferIndex)];
736                         }
737                         break;
738                 case Shader::PARAMETER_MISCTYPE:
739                         if(src.index == Shader::InstanceIDIndex)
740                         {
741                                 reg.x = As<Float>(instanceID);
742                         }
743                         else if(src.index == Shader::VertexIDIndex)
744                         {
745                                 reg.x = As<Float4>(vertexID);
746                         }
747                         else ASSERT(false);
748                         return reg;
749                 default:
750                         ASSERT(false);
751                 }
752
753                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
754                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
755                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
756                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
757
758                 Vector4f mod;
759
760                 switch(src.modifier)
761                 {
762                 case Shader::MODIFIER_NONE:
763                         mod.x = x;
764                         mod.y = y;
765                         mod.z = z;
766                         mod.w = w;
767                         break;
768                 case Shader::MODIFIER_NEGATE:
769                         mod.x = -x;
770                         mod.y = -y;
771                         mod.z = -z;
772                         mod.w = -w;
773                         break;
774                 case Shader::MODIFIER_ABS:
775                         mod.x = Abs(x);
776                         mod.y = Abs(y);
777                         mod.z = Abs(z);
778                         mod.w = Abs(w);
779                         break;
780                 case Shader::MODIFIER_ABS_NEGATE:
781                         mod.x = -Abs(x);
782                         mod.y = -Abs(y);
783                         mod.z = -Abs(z);
784                         mod.w = -Abs(w);
785                         break;
786                 case Shader::MODIFIER_NOT:
787                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
788                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
789                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
790                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
791                         break;
792                 default:
793                         ASSERT(false);
794                 }
795
796                 return mod;
797         }
798
799         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index)
800         {
801                 if(bufferIndex == -1)
802                 {
803                         return data + OFFSET(DrawData, vs.c[index]);
804                 }
805                 else
806                 {
807                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index;
808                 }
809         }
810
811         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
812         {
813                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
814         }
815
816         Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset)
817         {
818                 Vector4f c;
819                 unsigned int i = src.index + offset;
820
821                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
822                 {
823                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
824
825                         c.x = c.x.xxxx;
826                         c.y = c.y.yyyy;
827                         c.z = c.z.zzzz;
828                         c.w = c.w.wwww;
829
830                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
831                         {
832                                 for(size_t j = 0; j < shader->getLength(); j++)
833                                 {
834                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
835
836                                         if(instruction.opcode == Shader::OPCODE_DEF)
837                                         {
838                                                 if(instruction.dst.index == i)
839                                                 {
840                                                         c.x = Float4(instruction.src[0].value[0]);
841                                                         c.y = Float4(instruction.src[0].value[1]);
842                                                         c.z = Float4(instruction.src[0].value[2]);
843                                                         c.w = Float4(instruction.src[0].value[3]);
844
845                                                         break;
846                                                 }
847                                         }
848                                 }
849                         }
850                 }
851                 else if(src.rel.type == Shader::PARAMETER_LOOP)
852                 {
853                         Int loopCounter = aL[loopDepth];
854
855                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
856
857                         c.x = c.x.xxxx;
858                         c.y = c.y.yyyy;
859                         c.z = c.z.zzzz;
860                         c.w = c.w.wwww;
861                 }
862                 else
863                 {
864                         if(src.rel.deterministic)
865                         {
866                                 Int a = relativeAddress(src, src.bufferIndex);
867
868                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
869
870                                 c.x = c.x.xxxx;
871                                 c.y = c.y.yyyy;
872                                 c.z = c.z.zzzz;
873                                 c.w = c.w.wwww;
874                         }
875                         else
876                         {
877                                 int component = src.rel.swizzle & 0x03;
878                                 Float4 a;
879
880                                 switch(src.rel.type)
881                                 {
882                                 case Shader::PARAMETER_ADDR:     a = a0[component]; break;
883                                 case Shader::PARAMETER_TEMP:     a = r[src.rel.index][component]; break;
884                                 case Shader::PARAMETER_INPUT:    a = v[src.rel.index][component]; break;
885                                 case Shader::PARAMETER_OUTPUT:   a = o[src.rel.index][component]; break;
886                                 case Shader::PARAMETER_CONST:    a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
887                                 case Shader::PARAMETER_MISCTYPE:
888                                         if(src.rel.index == Shader::InstanceIDIndex)
889                                         {
890                                                 a = As<Float4>(Int4(instanceID)); break;
891                                         }
892                                         else if(src.rel.index == Shader::VertexIDIndex)
893                                         {
894                                                 a = As<Float4>(vertexID); break;
895                                         }
896                                         else ASSERT(false);
897                                         break;
898                                 default: ASSERT(false);
899                                 }
900
901                                 Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
902
903                                 index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
904
905                                 Int index0 = Extract(index, 0);
906                                 Int index1 = Extract(index, 1);
907                                 Int index2 = Extract(index, 2);
908                                 Int index3 = Extract(index, 3);
909
910                                 c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
911                                 c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
912                                 c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
913                                 c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
914
915                                 transpose4x4(c.x, c.y, c.z, c.w);
916                         }
917                 }
918
919                 return c;
920         }
921
922         Int VertexProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
923         {
924                 ASSERT(var.rel.deterministic);
925
926                 if(var.rel.type == Shader::PARAMETER_TEMP)
927                 {
928                         return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
929                 }
930                 else if(var.rel.type == Shader::PARAMETER_INPUT)
931                 {
932                         return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
933                 }
934                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
935                 {
936                         return As<Int>(Extract(o[var.rel.index].x, 0)) * var.rel.scale;
937                 }
938                 else if(var.rel.type == Shader::PARAMETER_CONST)
939                 {
940                         return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
941                 }
942                 else if(var.rel.type == Shader::PARAMETER_LOOP)
943                 {
944                         return aL[loopDepth];
945                 }
946                 else ASSERT(false);
947
948                 return 0;
949         }
950
951         Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
952         {
953                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
954
955                 if(!whileTest)
956                 {
957                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
958                         {
959                                 enable &= enableBreak;
960                         }
961
962                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
963                         {
964                                 enable &= enableContinue;
965                         }
966
967                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
968                         {
969                                 enable &= enableLeave;
970                         }
971                 }
972
973                 return enable;
974         }
975
976         void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
977         {
978                 Vector4f row0 = fetchRegister(src1, 0);
979                 Vector4f row1 = fetchRegister(src1, 1);
980
981                 dst.x = dot3(src0, row0);
982                 dst.y = dot3(src0, row1);
983         }
984
985         void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
986         {
987                 Vector4f row0 = fetchRegister(src1, 0);
988                 Vector4f row1 = fetchRegister(src1, 1);
989                 Vector4f row2 = fetchRegister(src1, 2);
990
991                 dst.x = dot3(src0, row0);
992                 dst.y = dot3(src0, row1);
993                 dst.z = dot3(src0, row2);
994         }
995
996         void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
997         {
998                 Vector4f row0 = fetchRegister(src1, 0);
999                 Vector4f row1 = fetchRegister(src1, 1);
1000                 Vector4f row2 = fetchRegister(src1, 2);
1001                 Vector4f row3 = fetchRegister(src1, 3);
1002
1003                 dst.x = dot3(src0, row0);
1004                 dst.y = dot3(src0, row1);
1005                 dst.z = dot3(src0, row2);
1006                 dst.w = dot3(src0, row3);
1007         }
1008
1009         void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
1010         {
1011                 Vector4f row0 = fetchRegister(src1, 0);
1012                 Vector4f row1 = fetchRegister(src1, 1);
1013                 Vector4f row2 = fetchRegister(src1, 2);
1014
1015                 dst.x = dot4(src0, row0);
1016                 dst.y = dot4(src0, row1);
1017                 dst.z = dot4(src0, row2);
1018         }
1019
1020         void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
1021         {
1022                 Vector4f row0 = fetchRegister(src1, 0);
1023                 Vector4f row1 = fetchRegister(src1, 1);
1024                 Vector4f row2 = fetchRegister(src1, 2);
1025                 Vector4f row3 = fetchRegister(src1, 3);
1026
1027                 dst.x = dot4(src0, row0);
1028                 dst.y = dot4(src0, row1);
1029                 dst.z = dot4(src0, row2);
1030                 dst.w = dot4(src0, row3);
1031         }
1032
1033         void VertexProgram::BREAK()
1034         {
1035                 BasicBlock *deadBlock = Nucleus::createBasicBlock();
1036                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1037
1038                 if(breakDepth == 0)
1039                 {
1040                         enableIndex = enableIndex - breakDepth;
1041                         Nucleus::createBr(endBlock);
1042                 }
1043                 else
1044                 {
1045                         enableBreak = enableBreak & ~enableStack[enableIndex];
1046                         Bool allBreak = SignMask(enableBreak) == 0x0;
1047
1048                         enableIndex = enableIndex - breakDepth;
1049                         branch(allBreak, endBlock, deadBlock);
1050                 }
1051
1052                 Nucleus::setInsertBlock(deadBlock);
1053                 enableIndex = enableIndex + breakDepth;
1054         }
1055
1056         void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1057         {
1058                 Int4 condition;
1059
1060                 switch(control)
1061                 {
1062                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1063                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1064                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1065                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1066                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1067                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1068                 default:
1069                         ASSERT(false);
1070                 }
1071
1072                 BREAK(condition);
1073         }
1074
1075         void VertexProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1076         {
1077                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1078
1079                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1080                 {
1081                         condition = ~condition;
1082                 }
1083
1084                 BREAK(condition);
1085         }
1086
1087         void VertexProgram::BREAK(Int4 &condition)
1088         {
1089                 condition &= enableStack[enableIndex];
1090
1091                 BasicBlock *continueBlock = Nucleus::createBasicBlock();
1092                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1093
1094                 enableBreak = enableBreak & ~condition;
1095                 Bool allBreak = SignMask(enableBreak) == 0x0;
1096
1097                 enableIndex = enableIndex - breakDepth;
1098                 branch(allBreak, endBlock, continueBlock);
1099
1100                 Nucleus::setInsertBlock(continueBlock);
1101                 enableIndex = enableIndex + breakDepth;
1102         }
1103
1104         void VertexProgram::CONTINUE()
1105         {
1106                 enableContinue = enableContinue & ~enableStack[enableIndex];
1107         }
1108
1109         void VertexProgram::TEST()
1110         {
1111                 whileTest = true;
1112         }
1113
1114         void VertexProgram::CALL(int labelIndex, int callSiteIndex)
1115         {
1116                 if(!labelBlock[labelIndex])
1117                 {
1118                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1119                 }
1120
1121                 if(callRetBlock[labelIndex].size() > 1)
1122                 {
1123                         callStack[stackIndex++] = UInt(callSiteIndex);
1124                 }
1125
1126                 Int4 restoreLeave = enableLeave;
1127
1128                 Nucleus::createBr(labelBlock[labelIndex]);
1129                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1130
1131                 enableLeave = restoreLeave;
1132         }
1133
1134         void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1135         {
1136                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1137                 {
1138                         CALLNZb(labelIndex, callSiteIndex, src);
1139                 }
1140                 else if(src.type == Shader::PARAMETER_PREDICATE)
1141                 {
1142                         CALLNZp(labelIndex, callSiteIndex, src);
1143                 }
1144                 else ASSERT(false);
1145         }
1146
1147         void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1148         {
1149                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1150
1151                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1152                 {
1153                         condition = !condition;
1154                 }
1155
1156                 if(!labelBlock[labelIndex])
1157                 {
1158                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1159                 }
1160
1161                 if(callRetBlock[labelIndex].size() > 1)
1162                 {
1163                         callStack[stackIndex++] = UInt(callSiteIndex);
1164                 }
1165
1166                 Int4 restoreLeave = enableLeave;
1167
1168                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1169                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1170
1171                 enableLeave = restoreLeave;
1172         }
1173
1174         void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1175         {
1176                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1177
1178                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1179                 {
1180                         condition = ~condition;
1181                 }
1182
1183                 condition &= enableStack[enableIndex];
1184
1185                 if(!labelBlock[labelIndex])
1186                 {
1187                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1188                 }
1189
1190                 if(callRetBlock[labelIndex].size() > 1)
1191                 {
1192                         callStack[stackIndex++] = UInt(callSiteIndex);
1193                 }
1194
1195                 enableIndex++;
1196                 enableStack[enableIndex] = condition;
1197                 Int4 restoreLeave = enableLeave;
1198
1199                 Bool notAllFalse = SignMask(condition) != 0;
1200                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1201                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1202
1203                 enableIndex--;
1204                 enableLeave = restoreLeave;
1205         }
1206
1207         void VertexProgram::ELSE()
1208         {
1209                 ifDepth--;
1210
1211                 BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1212                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1213
1214                 if(isConditionalIf[ifDepth])
1215                 {
1216                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1217                         Bool notAllFalse = SignMask(condition) != 0;
1218
1219                         branch(notAllFalse, falseBlock, endBlock);
1220
1221                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1222                 }
1223                 else
1224                 {
1225                         Nucleus::createBr(endBlock);
1226                         Nucleus::setInsertBlock(falseBlock);
1227                 }
1228
1229                 ifFalseBlock[ifDepth] = endBlock;
1230
1231                 ifDepth++;
1232         }
1233
1234         void VertexProgram::ENDIF()
1235         {
1236                 ifDepth--;
1237
1238                 BasicBlock *endBlock = ifFalseBlock[ifDepth];
1239
1240                 Nucleus::createBr(endBlock);
1241                 Nucleus::setInsertBlock(endBlock);
1242
1243                 if(isConditionalIf[ifDepth])
1244                 {
1245                         breakDepth--;
1246                         enableIndex--;
1247                 }
1248         }
1249
1250         void VertexProgram::ENDLOOP()
1251         {
1252                 loopRepDepth--;
1253
1254                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1255
1256                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1257                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1258
1259                 Nucleus::createBr(testBlock);
1260                 Nucleus::setInsertBlock(endBlock);
1261
1262                 loopDepth--;
1263                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1264         }
1265
1266         void VertexProgram::ENDREP()
1267         {
1268                 loopRepDepth--;
1269
1270                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1271                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1272
1273                 Nucleus::createBr(testBlock);
1274                 Nucleus::setInsertBlock(endBlock);
1275
1276                 loopDepth--;
1277                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1278         }
1279
1280         void VertexProgram::ENDWHILE()
1281         {
1282                 loopRepDepth--;
1283
1284                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1285                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1286
1287                 Nucleus::createBr(testBlock);
1288                 Nucleus::setInsertBlock(endBlock);
1289
1290                 enableIndex--;
1291                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1292                 whileTest = false;
1293         }
1294
1295         void VertexProgram::ENDSWITCH()
1296         {
1297                 loopRepDepth--;
1298
1299                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1300
1301                 Nucleus::createBr(loopRepEndBlock[loopRepDepth]);
1302                 Nucleus::setInsertBlock(endBlock);
1303
1304                 enableIndex--;
1305                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1306         }
1307
1308         void VertexProgram::IF(const Src &src)
1309         {
1310                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1311                 {
1312                         IFb(src);
1313                 }
1314                 else if(src.type == Shader::PARAMETER_PREDICATE)
1315                 {
1316                         IFp(src);
1317                 }
1318                 else
1319                 {
1320                         Int4 condition = As<Int4>(fetchRegister(src).x);
1321                         IF(condition);
1322                 }
1323         }
1324
1325         void VertexProgram::IFb(const Src &boolRegister)
1326         {
1327                 ASSERT(ifDepth < 24 + 4);
1328
1329                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1330
1331                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1332                 {
1333                         condition = !condition;
1334                 }
1335
1336                 BasicBlock *trueBlock = Nucleus::createBasicBlock();
1337                 BasicBlock *falseBlock = Nucleus::createBasicBlock();
1338
1339                 branch(condition, trueBlock, falseBlock);
1340
1341                 isConditionalIf[ifDepth] = false;
1342                 ifFalseBlock[ifDepth] = falseBlock;
1343
1344                 ifDepth++;
1345         }
1346
1347         void VertexProgram::IFp(const Src &predicateRegister)
1348         {
1349                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1350
1351                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1352                 {
1353                         condition = ~condition;
1354                 }
1355
1356                 IF(condition);
1357         }
1358
1359         void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1360         {
1361                 Int4 condition;
1362
1363                 switch(control)
1364                 {
1365                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1366                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1367                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1368                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1369                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1370                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1371                 default:
1372                         ASSERT(false);
1373                 }
1374
1375                 IF(condition);
1376         }
1377
1378         void VertexProgram::IF(Int4 &condition)
1379         {
1380                 condition &= enableStack[enableIndex];
1381
1382                 enableIndex++;
1383                 enableStack[enableIndex] = condition;
1384
1385                 BasicBlock *trueBlock = Nucleus::createBasicBlock();
1386                 BasicBlock *falseBlock = Nucleus::createBasicBlock();
1387
1388                 Bool notAllFalse = SignMask(condition) != 0;
1389
1390                 branch(notAllFalse, trueBlock, falseBlock);
1391
1392                 isConditionalIf[ifDepth] = true;
1393                 ifFalseBlock[ifDepth] = falseBlock;
1394
1395                 ifDepth++;
1396                 breakDepth++;
1397         }
1398
1399         void VertexProgram::LABEL(int labelIndex)
1400         {
1401                 if(!labelBlock[labelIndex])
1402                 {
1403                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1404                 }
1405
1406                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1407                 currentLabel = labelIndex;
1408         }
1409
1410         void VertexProgram::LOOP(const Src &integerRegister)
1411         {
1412                 loopDepth++;
1413
1414                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1415                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1416                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1417
1418                 // FIXME: Compiles to two instructions?
1419                 If(increment[loopDepth] == 0)
1420                 {
1421                         increment[loopDepth] = 1;
1422                 }
1423
1424                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1425                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1426                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1427
1428                 loopRepTestBlock[loopRepDepth] = testBlock;
1429                 loopRepEndBlock[loopRepDepth] = endBlock;
1430
1431                 // FIXME: jump(testBlock)
1432                 Nucleus::createBr(testBlock);
1433                 Nucleus::setInsertBlock(testBlock);
1434
1435                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1436                 Nucleus::setInsertBlock(loopBlock);
1437
1438                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1439
1440                 loopRepDepth++;
1441                 breakDepth = 0;
1442         }
1443
1444         void VertexProgram::REP(const Src &integerRegister)
1445         {
1446                 loopDepth++;
1447
1448                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1449                 aL[loopDepth] = aL[loopDepth - 1];
1450
1451                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1452                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1453                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1454
1455                 loopRepTestBlock[loopRepDepth] = testBlock;
1456                 loopRepEndBlock[loopRepDepth] = endBlock;
1457
1458                 // FIXME: jump(testBlock)
1459                 Nucleus::createBr(testBlock);
1460                 Nucleus::setInsertBlock(testBlock);
1461
1462                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1463                 Nucleus::setInsertBlock(loopBlock);
1464
1465                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1466
1467                 loopRepDepth++;
1468                 breakDepth = 0;
1469         }
1470
1471         void VertexProgram::WHILE(const Src &temporaryRegister)
1472         {
1473                 enableIndex++;
1474
1475                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1476                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1477                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1478
1479                 loopRepTestBlock[loopRepDepth] = testBlock;
1480                 loopRepEndBlock[loopRepDepth] = endBlock;
1481
1482                 Int4 restoreBreak = enableBreak;
1483                 Int4 restoreContinue = enableContinue;
1484
1485                 // FIXME: jump(testBlock)
1486                 Nucleus::createBr(testBlock);
1487                 Nucleus::setInsertBlock(testBlock);
1488                 enableContinue = restoreContinue;
1489
1490                 const Vector4f &src = fetchRegister(temporaryRegister);
1491                 Int4 condition = As<Int4>(src.x);
1492                 condition &= enableStack[enableIndex - 1];
1493                 if(shader->containsLeaveInstruction()) condition &= enableLeave;
1494                 enableStack[enableIndex] = condition;
1495
1496                 Bool notAllFalse = SignMask(condition) != 0;
1497                 branch(notAllFalse, loopBlock, endBlock);
1498
1499                 Nucleus::setInsertBlock(endBlock);
1500                 enableBreak = restoreBreak;
1501
1502                 Nucleus::setInsertBlock(loopBlock);
1503
1504                 loopRepDepth++;
1505                 breakDepth = 0;
1506         }
1507
1508         void VertexProgram::SWITCH()
1509         {
1510                 enableIndex++;
1511                 enableStack[enableIndex] = Int4(0xFFFFFFFF);
1512
1513                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1514
1515                 loopRepTestBlock[loopRepDepth] = nullptr;
1516                 loopRepEndBlock[loopRepDepth] = endBlock;
1517
1518                 loopRepDepth++;
1519                 breakDepth = 0;
1520         }
1521
1522         void VertexProgram::RET()
1523         {
1524                 if(currentLabel == -1)
1525                 {
1526                         returnBlock = Nucleus::createBasicBlock();
1527                         Nucleus::createBr(returnBlock);
1528                 }
1529                 else
1530                 {
1531                         BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1532
1533                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1534                         {
1535                                 // FIXME: Encapsulate
1536                                 UInt index = callStack[--stackIndex];
1537
1538                                 Value *value = index.loadValue();
1539                                 SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1540
1541                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1542                                 {
1543                                         Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
1544                                 }
1545                         }
1546                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1547                         {
1548                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1549                         }
1550                         else   // Function isn't called
1551                         {
1552                                 Nucleus::createBr(unreachableBlock);
1553                         }
1554
1555                         Nucleus::setInsertBlock(unreachableBlock);
1556                         Nucleus::createUnreachable();
1557                 }
1558         }
1559
1560         void VertexProgram::LEAVE()
1561         {
1562                 enableLeave = enableLeave & ~enableStack[enableIndex];
1563
1564                 // FIXME: Return from function if all instances left
1565                 // FIXME: Use enableLeave in other control-flow constructs
1566         }
1567
1568         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1)
1569         {
1570                 sampleTexture(dst, src1, src0, a0, a0, src0, Lod);
1571         }
1572
1573         void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
1574         {
1575                 src0.w = Float(0.0f);
1576                 sampleTexture(dst, src1, src0, a0, a0, src0, Lod);
1577         }
1578
1579         void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
1580         {
1581                 src0.w = Float(0.0f);
1582                 sampleTexture(dst, src1, src0, a0, a0, src2, {Lod, Offset});
1583         }
1584
1585         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
1586         {
1587                 sampleTexture(dst, src1, src0, a0, a0, offset, {Lod, Offset});
1588         }
1589
1590         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1)
1591         {
1592                 sampleTexture(dst, src1, src0, src0, src0, src0, Fetch);
1593         }
1594
1595         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
1596         {
1597                 sampleTexture(dst, src1, src0, src0, src0, offset, {Fetch, Offset});
1598         }
1599
1600         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1601         {
1602                 sampleTexture(dst, src1, src0, src2, src3, src0, Grad);
1603         }
1604
1605         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
1606         {
1607                 sampleTexture(dst, src1, src0, src2, src3, offset, {Grad, Offset});
1608         }
1609
1610         void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1611         {
1612                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture);
1613                 sampler[src1.index]->textureSize(texture, dst, lod);
1614         }
1615
1616         void VertexProgram::sampleTexture(Vector4f &c, const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
1617         {
1618                 Vector4f tmp;
1619
1620                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1621                 {
1622                         Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + s.index * sizeof(Texture);
1623                         sampler[s.index]->sampleTexture(texture, tmp, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
1624                 }
1625                 else
1626                 {
1627                         Int index = As<Int>(Float(fetchRegister(s).x.x));
1628
1629                         for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
1630                         {
1631                                 if(shader->usesSampler(i))
1632                                 {
1633                                         If(index == i)
1634                                         {
1635                                                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + i * sizeof(Texture);
1636                                                 sampler[i]->sampleTexture(texture, tmp, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
1637                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1638                                         }
1639                                 }
1640                         }
1641                 }
1642
1643                 c.x = tmp[(s.swizzle >> 0) & 0x3];
1644                 c.y = tmp[(s.swizzle >> 2) & 0x3];
1645                 c.z = tmp[(s.swizzle >> 4) & 0x3];
1646                 c.w = tmp[(s.swizzle >> 6) & 0x3];
1647         }
1648 }