OSDN Git Service

Work around Subzero constant folding limitation.
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "VertexProgram.hpp"
16
17 #include "VertexShader.hpp"
18 #include "SamplerCore.hpp"
19 #include "Renderer/Renderer.hpp"
20 #include "Renderer/Vertex.hpp"
21 #include "Common/Half.hpp"
22 #include "Common/Debug.hpp"
23
24 namespace sw
25 {
26         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
27                 : VertexRoutine(state, shader), shader(shader), r(shader->indirectAddressableTemporaries)
28         {
29                 for(int i = 0; i < 2048; i++)
30                 {
31                         labelBlock[i] = 0;
32                 }
33
34                 loopDepth = -1;
35                 enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
36
37                 if(shader->containsBreakInstruction())
38                 {
39                         enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
40                 }
41
42                 if(shader->containsContinueInstruction())
43                 {
44                         enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
45                 }
46
47                 if(shader->isInstanceIdDeclared())
48                 {
49                         instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
50                 }
51         }
52
53         VertexProgram::~VertexProgram()
54         {
55         }
56
57         void VertexProgram::pipeline(UInt &index)
58         {
59                 if(!state.preTransformed)
60                 {
61                         program(index);
62                 }
63                 else
64                 {
65                         passThrough();
66                 }
67         }
68
69         void VertexProgram::program(UInt &index)
70         {
71         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
72
73                 unsigned short shaderModel = shader->getShaderModel();
74
75                 enableIndex = 0;
76                 stackIndex = 0;
77
78                 if(shader->containsLeaveInstruction())
79                 {
80                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
81                 }
82
83                 if(shader->isVertexIdDeclared())
84                 {
85                         if(state.textureSampling)
86                         {
87                                 vertexID = Int4(index);
88                         }
89                         else
90                         {
91                                 vertexID = Insert(vertexID, As<Int>(index), 0);
92                                 vertexID = Insert(vertexID, As<Int>(index + 1), 1);
93                                 vertexID = Insert(vertexID, As<Int>(index + 2), 2);
94                                 vertexID = Insert(vertexID, As<Int>(index + 3), 3);
95                         }
96                 }
97
98                 // Create all call site return blocks up front
99                 for(size_t i = 0; i < shader->getLength(); i++)
100                 {
101                         const Shader::Instruction *instruction = shader->getInstruction(i);
102                         Shader::Opcode opcode = instruction->opcode;
103
104                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
105                         {
106                                 const Dst &dst = instruction->dst;
107
108                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
109                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
110                         }
111                 }
112
113                 for(size_t i = 0; i < shader->getLength(); i++)
114                 {
115                         const Shader::Instruction *instruction = shader->getInstruction(i);
116                         Shader::Opcode opcode = instruction->opcode;
117
118                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
119                         {
120                                 continue;
121                         }
122
123                         Dst dst = instruction->dst;
124                         Src src0 = instruction->src[0];
125                         Src src1 = instruction->src[1];
126                         Src src2 = instruction->src[2];
127                         Src src3 = instruction->src[3];
128                         Src src4 = instruction->src[4];
129
130                         bool predicate = instruction->predicate;
131                         Control control = instruction->control;
132                         bool integer = dst.type == Shader::PARAMETER_ADDR;
133                         bool pp = dst.partialPrecision;
134
135                         Vector4f d;
136                         Vector4f s0;
137                         Vector4f s1;
138                         Vector4f s2;
139                         Vector4f s3;
140                         Vector4f s4;
141
142                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
143                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
144                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
145                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
146                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
147
148                         switch(opcode)
149                         {
150                         case Shader::OPCODE_VS_1_0:                                     break;
151                         case Shader::OPCODE_VS_1_1:                                     break;
152                         case Shader::OPCODE_VS_2_0:                                     break;
153                         case Shader::OPCODE_VS_2_x:                                     break;
154                         case Shader::OPCODE_VS_2_sw:                                    break;
155                         case Shader::OPCODE_VS_3_0:                                     break;
156                         case Shader::OPCODE_VS_3_sw:                                    break;
157                         case Shader::OPCODE_DCL:                                        break;
158                         case Shader::OPCODE_DEF:                                        break;
159                         case Shader::OPCODE_DEFI:                                       break;
160                         case Shader::OPCODE_DEFB:                                       break;
161                         case Shader::OPCODE_NOP:                                        break;
162                         case Shader::OPCODE_ABS:        abs(d, s0);                     break;
163                         case Shader::OPCODE_IABS:       iabs(d, s0);                    break;
164                         case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
165                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
166                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
167                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);        break;
168                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);        break;
169                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);        break;
170                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);        break;
171                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);            break;
172                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);            break;
173                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);            break;
174                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);            break;
175                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);      break;
176                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);      break;
177                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);      break;
178                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);      break;
179                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                 break;
180                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                 break;
181                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                 break;
182                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                 break;
183                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
184                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
185                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
186                         case Shader::OPCODE_ATT:        att(d, s0, s1);                 break;
187                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);               break;
188                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                break;
189                         case Shader::OPCODE_EXPP:       expp(d, s0, shaderModel);       break;
190                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                 break;
191                         case Shader::OPCODE_FRC:        frc(d, s0);                     break;
192                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
193                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
194                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
195                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
196                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
197                         case Shader::OPCODE_LIT:        lit(d, s0);                     break;
198                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);               break;
199                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                break;
200                         case Shader::OPCODE_LOGP:       logp(d, s0, shaderModel);       break;
201                         case Shader::OPCODE_LOG:        log(d, s0, pp);                 break;
202                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);             break;
203                         case Shader::OPCODE_STEP:       step(d, s0, s1);                break;
204                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);          break;
205                         case Shader::OPCODE_ISINF:      isinf(d, s0);                   break;
206                         case Shader::OPCODE_ISNAN:      isnan(d, s0);                   break;
207                         case Shader::OPCODE_FLOATBITSTOINT:
208                         case Shader::OPCODE_FLOATBITSTOUINT:
209                         case Shader::OPCODE_INTBITSTOFLOAT:
210                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
211                         case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);      break;
212                         case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);      break;
213                         case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);       break;
214                         case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);    break;
215                         case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);    break;
216                         case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);     break;
217                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
218                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
219                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
220                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);              break;
221                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);              break;
222                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);             break;
223                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);            break;
224                         case Shader::OPCODE_MAX:        max(d, s0, s1);                 break;
225                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
226                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
227                         case Shader::OPCODE_MIN:        min(d, s0, s1);                 break;
228                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
229                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
230                         case Shader::OPCODE_MOV:        mov(d, s0, integer);            break;
231                         case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
232                         case Shader::OPCODE_NEG:        neg(d, s0);                     break;
233                         case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
234                         case Shader::OPCODE_F2B:        f2b(d, s0);                     break;
235                         case Shader::OPCODE_B2F:        b2f(d, s0);                     break;
236                         case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
237                         case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
238                         case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
239                         case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
240                         case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
241                         case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
242                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                 break;
243                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
244                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                break;
245                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                break;
246                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                break;
247                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);            break;
248                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);             break;
249                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                break;
250                         case Shader::OPCODE_DIV:        div(d, s0, s1);                 break;
251                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
252                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
253                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                 break;
254                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
255                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
256                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
257                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                break;
258                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                break;
259                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                break;
260                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                break;
261                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                 break;
262                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);              break;
263                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);              break;
264                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);              break;
265                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);         break;
266                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);         break;
267                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);         break;
268                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
269                         case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
270                         case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
271                         case Shader::OPCODE_ISGN:       isgn(d, s0);                    break;
272                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
273                         case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
274                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
275                         case Shader::OPCODE_TAN:        tan(d, s0);                     break;
276                         case Shader::OPCODE_ACOS:       acos(d, s0);                    break;
277                         case Shader::OPCODE_ASIN:       asin(d, s0);                    break;
278                         case Shader::OPCODE_ATAN:       atan(d, s0);                    break;
279                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1);               break;
280                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                break;
281                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                break;
282                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                break;
283                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);               break;
284                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);               break;
285                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);               break;
286                         case Shader::OPCODE_SLT:        slt(d, s0, s1);                 break;
287                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                 break;
288                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
289                         case Shader::OPCODE_BREAK:      BREAK();                        break;
290                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);        break;
291                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                   break;
292                         case Shader::OPCODE_CONTINUE:   CONTINUE();                     break;
293                         case Shader::OPCODE_TEST:       TEST();                         break;
294                         case Shader::OPCODE_SCALAR:     SCALAR();                       break;
295                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);  break;
296                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0); break;
297                         case Shader::OPCODE_ELSE:       ELSE();                         break;
298                         case Shader::OPCODE_ENDIF:      ENDIF();                        break;
299                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
300                         case Shader::OPCODE_ENDREP:     ENDREP();                       break;
301                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
302                         case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                    break;
303                         case Shader::OPCODE_IF:         IF(src0);                       break;
304                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
305                         case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
306                         case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
307                         case Shader::OPCODE_REP:        REP(src0);                      break;
308                         case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
309                         case Shader::OPCODE_SWITCH:     SWITCH();                       break;
310                         case Shader::OPCODE_RET:        RET();                          break;
311                         case Shader::OPCODE_LEAVE:      LEAVE();                        break;
312                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
313                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);       break;
314                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
315                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);          break;
316                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);         break;
317                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
318                         case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
319                         case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
320                         case Shader::OPCODE_NOT:        bitwise_not(d, s0);             break;
321                         case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);          break;
322                         case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);         break;
323                         case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);         break;
324                         case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
325                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
326                         case Shader::OPCODE_TEXLDL:     TEXLOD(d, s0, src1, s0.w);      break;
327                         case Shader::OPCODE_TEXLOD:     TEXLOD(d, s0, src1, s2.x);      break;
328                         case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
329                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);     break;
330                         case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break;
331                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x);  break;
332                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break;
333                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
334                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break;
335                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
336                         case Shader::OPCODE_END:                                        break;
337                         default:
338                                 ASSERT(false);
339                         }
340
341                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
342                         {
343                                 if(dst.saturate)
344                                 {
345                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
346                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
347                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
348                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
349
350                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
351                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
352                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
353                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
354                                 }
355
356                                 if(instruction->isPredicated())
357                                 {
358                                         Vector4f pDst;   // FIXME: Rename
359
360                                         switch(dst.type)
361                                         {
362                                         case Shader::PARAMETER_VOID: break;
363                                         case Shader::PARAMETER_TEMP:
364                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
365                                                 {
366                                                         if(dst.x) pDst.x = r[dst.index].x;
367                                                         if(dst.y) pDst.y = r[dst.index].y;
368                                                         if(dst.z) pDst.z = r[dst.index].z;
369                                                         if(dst.w) pDst.w = r[dst.index].w;
370                                                 }
371                                                 else if(!dst.rel.dynamic)
372                                                 {
373                                                         Int a = dst.index + relativeAddress(dst.rel);
374
375                                                         if(dst.x) pDst.x = r[a].x;
376                                                         if(dst.y) pDst.y = r[a].y;
377                                                         if(dst.z) pDst.z = r[a].z;
378                                                         if(dst.w) pDst.w = r[a].w;
379                                                 }
380                                                 else
381                                                 {
382                                                         Int4 a = dst.index + dynamicAddress(dst.rel);
383
384                                                         if(dst.x) pDst.x = r[a].x;
385                                                         if(dst.y) pDst.y = r[a].y;
386                                                         if(dst.z) pDst.z = r[a].z;
387                                                         if(dst.w) pDst.w = r[a].w;
388                                                 }
389                                                 break;
390                                         case Shader::PARAMETER_ADDR: pDst = a0; break;
391                                         case Shader::PARAMETER_RASTOUT:
392                                                 switch(dst.index)
393                                                 {
394                                                 case 0:
395                                                         if(dst.x) pDst.x = o[Pos].x;
396                                                         if(dst.y) pDst.y = o[Pos].y;
397                                                         if(dst.z) pDst.z = o[Pos].z;
398                                                         if(dst.w) pDst.w = o[Pos].w;
399                                                         break;
400                                                 case 1:
401                                                         pDst.x = o[Fog].x;
402                                                         break;
403                                                 case 2:
404                                                         pDst.x = o[Pts].y;
405                                                         break;
406                                                 default:
407                                                         ASSERT(false);
408                                                 }
409                                                 break;
410                                         case Shader::PARAMETER_ATTROUT:
411                                                 if(dst.x) pDst.x = o[C0 + dst.index].x;
412                                                 if(dst.y) pDst.y = o[C0 + dst.index].y;
413                                                 if(dst.z) pDst.z = o[C0 + dst.index].z;
414                                                 if(dst.w) pDst.w = o[C0 + dst.index].w;
415                                                 break;
416                                         case Shader::PARAMETER_TEXCRDOUT:
417                                 //      case Shader::PARAMETER_OUTPUT:
418                                                 if(shaderModel < 0x0300)
419                                                 {
420                                                         if(dst.x) pDst.x = o[T0 + dst.index].x;
421                                                         if(dst.y) pDst.y = o[T0 + dst.index].y;
422                                                         if(dst.z) pDst.z = o[T0 + dst.index].z;
423                                                         if(dst.w) pDst.w = o[T0 + dst.index].w;
424                                                 }
425                                                 else if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
426                                                 {
427                                                         if(dst.x) pDst.x = o[dst.index].x;
428                                                         if(dst.y) pDst.y = o[dst.index].y;
429                                                         if(dst.z) pDst.z = o[dst.index].z;
430                                                         if(dst.w) pDst.w = o[dst.index].w;
431                                                 }
432                                                 else if(!dst.rel.dynamic)
433                                                 {
434                                                         Int a = dst.index + relativeAddress(dst.rel);
435
436                                                         if(dst.x) pDst.x = o[a].x;
437                                                         if(dst.y) pDst.y = o[a].y;
438                                                         if(dst.z) pDst.z = o[a].z;
439                                                         if(dst.w) pDst.w = o[a].w;
440                                                 }
441                                                 else
442                                                 {
443                                                         Int4 a = dst.index + dynamicAddress(dst.rel);
444
445                                                         if(dst.x) pDst.x = o[a].x;
446                                                         if(dst.y) pDst.y = o[a].y;
447                                                         if(dst.z) pDst.z = o[a].z;
448                                                         if(dst.w) pDst.w = o[a].w;
449                                                 }
450                                                 break;
451                                         case Shader::PARAMETER_LABEL:                break;
452                                         case Shader::PARAMETER_PREDICATE: pDst = p0; break;
453                                         case Shader::PARAMETER_INPUT:                break;
454                                         default:
455                                                 ASSERT(false);
456                                         }
457
458                                         Int4 enable = enableMask(instruction);
459
460                                         Int4 xEnable = enable;
461                                         Int4 yEnable = enable;
462                                         Int4 zEnable = enable;
463                                         Int4 wEnable = enable;
464
465                                         if(predicate)
466                                         {
467                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
468
469                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
470                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
471                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
472                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
473
474                                                 if(!instruction->predicateNot)
475                                                 {
476                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
477                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
478                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
479                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
480                                                 }
481                                                 else
482                                                 {
483                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
484                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
485                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
486                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
487                                                 }
488                                         }
489
490                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
491                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
492                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
493                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
494
495                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
496                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
497                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
498                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
499                                 }
500
501                                 switch(dst.type)
502                                 {
503                                 case Shader::PARAMETER_VOID:
504                                         break;
505                                 case Shader::PARAMETER_TEMP:
506                                         if(dst.rel.type == Shader::PARAMETER_VOID)
507                                         {
508                                                 if(dst.x) r[dst.index].x = d.x;
509                                                 if(dst.y) r[dst.index].y = d.y;
510                                                 if(dst.z) r[dst.index].z = d.z;
511                                                 if(dst.w) r[dst.index].w = d.w;
512                                         }
513                                         else if(!dst.rel.dynamic)
514                                         {
515                                                 Int a = dst.index + relativeAddress(dst.rel);
516
517                                                 if(dst.x) r[a].x = d.x;
518                                                 if(dst.y) r[a].y = d.y;
519                                                 if(dst.z) r[a].z = d.z;
520                                                 if(dst.w) r[a].w = d.w;
521                                         }
522                                         else
523                                         {
524                                                 Int4 a = dst.index + dynamicAddress(dst.rel);
525
526                                                 if(dst.x) r.scatter_x(a, d.x);
527                                                 if(dst.y) r.scatter_y(a, d.y);
528                                                 if(dst.z) r.scatter_z(a, d.z);
529                                                 if(dst.w) r.scatter_w(a, d.w);
530                                         }
531                                         break;
532                                 case Shader::PARAMETER_ADDR:
533                                         if(dst.x) a0.x = d.x;
534                                         if(dst.y) a0.y = d.y;
535                                         if(dst.z) a0.z = d.z;
536                                         if(dst.w) a0.w = d.w;
537                                         break;
538                                 case Shader::PARAMETER_RASTOUT:
539                                         switch(dst.index)
540                                         {
541                                         case 0:
542                                                 if(dst.x) o[Pos].x = d.x;
543                                                 if(dst.y) o[Pos].y = d.y;
544                                                 if(dst.z) o[Pos].z = d.z;
545                                                 if(dst.w) o[Pos].w = d.w;
546                                                 break;
547                                         case 1:
548                                                 o[Fog].x = d.x;
549                                                 break;
550                                         case 2:
551                                                 o[Pts].y = d.x;
552                                                 break;
553                                         default:        ASSERT(false);
554                                         }
555                                         break;
556                                 case Shader::PARAMETER_ATTROUT:
557                                         if(dst.x) o[C0 + dst.index].x = d.x;
558                                         if(dst.y) o[C0 + dst.index].y = d.y;
559                                         if(dst.z) o[C0 + dst.index].z = d.z;
560                                         if(dst.w) o[C0 + dst.index].w = d.w;
561                                         break;
562                                 case Shader::PARAMETER_TEXCRDOUT:
563                         //      case Shader::PARAMETER_OUTPUT:
564                                         if(shaderModel < 0x0300)
565                                         {
566                                                 if(dst.x) o[T0 + dst.index].x = d.x;
567                                                 if(dst.y) o[T0 + dst.index].y = d.y;
568                                                 if(dst.z) o[T0 + dst.index].z = d.z;
569                                                 if(dst.w) o[T0 + dst.index].w = d.w;
570                                         }
571                                         else if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
572                                         {
573                                                 if(dst.x) o[dst.index].x = d.x;
574                                                 if(dst.y) o[dst.index].y = d.y;
575                                                 if(dst.z) o[dst.index].z = d.z;
576                                                 if(dst.w) o[dst.index].w = d.w;
577                                         }
578                                         else if(!dst.rel.dynamic)
579                                         {
580                                                 Int a = dst.index + relativeAddress(dst.rel);
581
582                                                 if(dst.x) o[a].x = d.x;
583                                                 if(dst.y) o[a].y = d.y;
584                                                 if(dst.z) o[a].z = d.z;
585                                                 if(dst.w) o[a].w = d.w;
586                                         }
587                                         else
588                                         {
589                                                 Int4 a = dst.index + dynamicAddress(dst.rel);
590
591                                                 if(dst.x) o.scatter_x(a, d.x);
592                                                 if(dst.y) o.scatter_y(a, d.y);
593                                                 if(dst.z) o.scatter_z(a, d.z);
594                                                 if(dst.w) o.scatter_w(a, d.w);
595                                         }
596                                         break;
597                                 case Shader::PARAMETER_LABEL:             break;
598                                 case Shader::PARAMETER_PREDICATE: p0 = d; break;
599                                 case Shader::PARAMETER_INPUT:             break;
600                                 default:
601                                         ASSERT(false);
602                                 }
603                         }
604                 }
605
606                 if(currentLabel != -1)
607                 {
608                         Nucleus::setInsertBlock(returnBlock);
609                 }
610         }
611
612         void VertexProgram::passThrough()
613         {
614                 if(shader)
615                 {
616                         for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
617                         {
618                                 unsigned char usage = shader->getOutput(i, 0).usage;
619
620                                 switch(usage)
621                                 {
622                                 case 0xFF:
623                                         continue;
624                                 case Shader::USAGE_PSIZE:
625                                         o[i].y = v[i].x;
626                                         break;
627                                 case Shader::USAGE_TEXCOORD:
628                                         o[i].x = v[i].x;
629                                         o[i].y = v[i].y;
630                                         o[i].z = v[i].z;
631                                         o[i].w = v[i].w;
632                                         break;
633                                 case Shader::USAGE_POSITION:
634                                         o[i].x = v[i].x;
635                                         o[i].y = v[i].y;
636                                         o[i].z = v[i].z;
637                                         o[i].w = v[i].w;
638                                         break;
639                                 case Shader::USAGE_COLOR:
640                                         o[i].x = v[i].x;
641                                         o[i].y = v[i].y;
642                                         o[i].z = v[i].z;
643                                         o[i].w = v[i].w;
644                                         break;
645                                 case Shader::USAGE_FOG:
646                                         o[i].x = v[i].x;
647                                         break;
648                                 default:
649                                         ASSERT(false);
650                                 }
651                         }
652                 }
653                 else
654                 {
655                         o[Pos].x = v[PositionT].x;
656                         o[Pos].y = v[PositionT].y;
657                         o[Pos].z = v[PositionT].z;
658                         o[Pos].w = v[PositionT].w;
659
660                         for(int i = 0; i < 2; i++)
661                         {
662                                 o[C0 + i].x = v[Color0 + i].x;
663                                 o[C0 + i].y = v[Color0 + i].y;
664                                 o[C0 + i].z = v[Color0 + i].z;
665                                 o[C0 + i].w = v[Color0 + i].w;
666                         }
667
668                         for(int i = 0; i < 8; i++)
669                         {
670                                 o[T0 + i].x = v[TexCoord0 + i].x;
671                                 o[T0 + i].y = v[TexCoord0 + i].y;
672                                 o[T0 + i].z = v[TexCoord0 + i].z;
673                                 o[T0 + i].w = v[TexCoord0 + i].w;
674                         }
675
676                         o[Pts].y = v[PointSize].x;
677                 }
678         }
679
680         Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset)
681         {
682                 Vector4f reg;
683                 unsigned int i = src.index + offset;
684
685                 switch(src.type)
686                 {
687                 case Shader::PARAMETER_TEMP:
688                         if(src.rel.type == Shader::PARAMETER_VOID)
689                         {
690                                 reg = r[i];
691                         }
692                         else if(!src.rel.dynamic)
693                         {
694                                 reg = r[i + relativeAddress(src.rel, src.bufferIndex)];
695                         }
696                         else
697                         {
698                                 reg = r[i + dynamicAddress(src.rel)];
699                         }
700                         break;
701                 case Shader::PARAMETER_CONST:
702                         reg = readConstant(src, offset);
703                         break;
704                 case Shader::PARAMETER_INPUT:
705                         if(src.rel.type == Shader::PARAMETER_VOID)
706                         {
707                                 reg = v[i];
708                         }
709                         else if(!src.rel.dynamic)
710                         {
711                                 reg = v[i + relativeAddress(src.rel, src.bufferIndex)];
712                         }
713                         else
714                         {
715                                 reg = v[i + dynamicAddress(src.rel)];
716                         }
717                         break;
718                 case Shader::PARAMETER_VOID: return r[0];   // Dummy
719                 case Shader::PARAMETER_FLOAT4LITERAL:
720                         reg.x = Float4(src.value[0]);
721                         reg.y = Float4(src.value[1]);
722                         reg.z = Float4(src.value[2]);
723                         reg.w = Float4(src.value[3]);
724                         break;
725                 case Shader::PARAMETER_ADDR:      reg = a0; break;
726                 case Shader::PARAMETER_CONSTBOOL: return r[0];   // Dummy
727                 case Shader::PARAMETER_CONSTINT:  return r[0];   // Dummy
728                 case Shader::PARAMETER_LOOP:      return r[0];   // Dummy
729                 case Shader::PARAMETER_PREDICATE: return r[0];   // Dummy
730                 case Shader::PARAMETER_SAMPLER:
731                         if(src.rel.type == Shader::PARAMETER_VOID)
732                         {
733                                 reg.x = As<Float4>(Int4(i));
734                         }
735                         else if(src.rel.type == Shader::PARAMETER_TEMP)
736                         {
737                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
738                         }
739                         return reg;
740                 case Shader::PARAMETER_OUTPUT:
741                         if(src.rel.type == Shader::PARAMETER_VOID)
742                         {
743                                 reg = o[i];
744                         }
745                         else if(!src.rel.dynamic)
746                         {
747                                 reg = o[i + relativeAddress(src.rel, src.bufferIndex)];
748                         }
749                         else
750                         {
751                                 reg = o[i + dynamicAddress(src.rel)];
752                         }
753                         break;
754                 case Shader::PARAMETER_MISCTYPE:
755                         if(src.index == Shader::InstanceIDIndex)
756                         {
757                                 reg.x = As<Float>(instanceID);
758                         }
759                         else if(src.index == Shader::VertexIDIndex)
760                         {
761                                 reg.x = As<Float4>(vertexID);
762                         }
763                         else ASSERT(false);
764                         return reg;
765                 default:
766                         ASSERT(false);
767                 }
768
769                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
770                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
771                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
772                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
773
774                 Vector4f mod;
775
776                 switch(src.modifier)
777                 {
778                 case Shader::MODIFIER_NONE:
779                         mod.x = x;
780                         mod.y = y;
781                         mod.z = z;
782                         mod.w = w;
783                         break;
784                 case Shader::MODIFIER_NEGATE:
785                         mod.x = -x;
786                         mod.y = -y;
787                         mod.z = -z;
788                         mod.w = -w;
789                         break;
790                 case Shader::MODIFIER_ABS:
791                         mod.x = Abs(x);
792                         mod.y = Abs(y);
793                         mod.z = Abs(z);
794                         mod.w = Abs(w);
795                         break;
796                 case Shader::MODIFIER_ABS_NEGATE:
797                         mod.x = -Abs(x);
798                         mod.y = -Abs(y);
799                         mod.z = -Abs(z);
800                         mod.w = -Abs(w);
801                         break;
802                 case Shader::MODIFIER_NOT:
803                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
804                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
805                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
806                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
807                         break;
808                 default:
809                         ASSERT(false);
810                 }
811
812                 return mod;
813         }
814
815         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index)
816         {
817                 if(bufferIndex == -1)
818                 {
819                         return data + OFFSET(DrawData, vs.c[index]);
820                 }
821                 else
822                 {
823                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index;
824                 }
825         }
826
827         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int &offset)
828         {
829                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
830         }
831
832         Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset)
833         {
834                 Vector4f c;
835                 unsigned int i = src.index + offset;
836
837                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
838                 {
839                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
840
841                         c.x = c.x.xxxx;
842                         c.y = c.y.yyyy;
843                         c.z = c.z.zzzz;
844                         c.w = c.w.wwww;
845
846                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
847                         {
848                                 for(size_t j = 0; j < shader->getLength(); j++)
849                                 {
850                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
851
852                                         if(instruction.opcode == Shader::OPCODE_DEF)
853                                         {
854                                                 if(instruction.dst.index == i)
855                                                 {
856                                                         c.x = Float4(instruction.src[0].value[0]);
857                                                         c.y = Float4(instruction.src[0].value[1]);
858                                                         c.z = Float4(instruction.src[0].value[2]);
859                                                         c.w = Float4(instruction.src[0].value[3]);
860
861                                                         break;
862                                                 }
863                                         }
864                                 }
865                         }
866                 }
867                 else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP)
868                 {
869                         Int a = relativeAddress(src.rel, src.bufferIndex);
870
871                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
872
873                         c.x = c.x.xxxx;
874                         c.y = c.y.yyyy;
875                         c.z = c.z.zzzz;
876                         c.w = c.w.wwww;
877                 }
878                 else
879                 {
880                         int component = src.rel.swizzle & 0x03;
881                         Float4 a;
882
883                         switch(src.rel.type)
884                         {
885                         case Shader::PARAMETER_ADDR:     a = a0[component]; break;
886                         case Shader::PARAMETER_TEMP:     a = r[src.rel.index][component]; break;
887                         case Shader::PARAMETER_INPUT:    a = v[src.rel.index][component]; break;
888                         case Shader::PARAMETER_OUTPUT:   a = o[src.rel.index][component]; break;
889                         case Shader::PARAMETER_CONST:    a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
890                         case Shader::PARAMETER_MISCTYPE:
891                                 switch(src.rel.index)
892                                 {
893                                 case Shader::InstanceIDIndex: a = As<Float4>(Int4(instanceID)); break;
894                                 case Shader::VertexIDIndex:   a = As<Float4>(vertexID);         break;
895                                 default: ASSERT(false);
896                                 }
897                                 break;
898                         default: ASSERT(false);
899                         }
900
901                         Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
902
903                         index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
904
905                         Int index0 = Extract(index, 0);
906                         Int index1 = Extract(index, 1);
907                         Int index2 = Extract(index, 2);
908                         Int index3 = Extract(index, 3);
909
910                         c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
911                         c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
912                         c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
913                         c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
914
915                         transpose4x4(c.x, c.y, c.z, c.w);
916                 }
917
918                 return c;
919         }
920
921         Int VertexProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex)
922         {
923                 ASSERT(!rel.dynamic);
924
925                 if(rel.type == Shader::PARAMETER_TEMP)
926                 {
927                         return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale;
928                 }
929                 else if(rel.type == Shader::PARAMETER_INPUT)
930                 {
931                         return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale;
932                 }
933                 else if(rel.type == Shader::PARAMETER_OUTPUT)
934                 {
935                         return As<Int>(Extract(o[rel.index].x, 0)) * rel.scale;
936                 }
937                 else if(rel.type == Shader::PARAMETER_CONST)
938                 {
939                         return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale;
940                 }
941                 else if(rel.type == Shader::PARAMETER_LOOP)
942                 {
943                         return aL[loopDepth];
944                 }
945                 else ASSERT(false);
946
947                 return 0;
948         }
949
950         Int4 VertexProgram::dynamicAddress(const Shader::Relative &rel)
951         {
952                 int component = rel.swizzle & 0x03;
953                 Float4 a;
954
955                 switch(rel.type)
956                 {
957                 case Shader::PARAMETER_ADDR:     a = a0[component]; break;
958                 case Shader::PARAMETER_TEMP:     a = r[rel.index][component]; break;
959                 case Shader::PARAMETER_INPUT:    a = v[rel.index][component]; break;
960                 case Shader::PARAMETER_OUTPUT:   a = o[rel.index][component]; break;
961                 case Shader::PARAMETER_MISCTYPE:
962                         switch(rel.index)
963                         {
964                         case Shader::InstanceIDIndex: a = As<Float>(instanceID); break;
965                         case Shader::VertexIDIndex:   a = As<Float4>(vertexID);  break;
966                         default: ASSERT(false);
967                         }
968                         break;
969                 default: ASSERT(false);
970                 }
971
972                 return As<Int4>(a) * Int4(rel.scale);
973         }
974
975         Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
976         {
977                 if(scalar)
978                 {
979                         return Int4(0xFFFFFFFF);
980                 }
981
982                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
983
984                 if(shader->containsBreakInstruction() && instruction->analysisBreak)
985                 {
986                         enable &= enableBreak;
987                 }
988
989                 if(shader->containsContinueInstruction() && instruction->analysisContinue)
990                 {
991                         enable &= enableContinue;
992                 }
993
994                 if(shader->containsLeaveInstruction() && instruction->analysisLeave)
995                 {
996                         enable &= enableLeave;
997                 }
998
999                 return enable;
1000         }
1001
1002         void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
1003         {
1004                 Vector4f row0 = fetchRegister(src1, 0);
1005                 Vector4f row1 = fetchRegister(src1, 1);
1006
1007                 dst.x = dot3(src0, row0);
1008                 dst.y = dot3(src0, row1);
1009         }
1010
1011         void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
1012         {
1013                 Vector4f row0 = fetchRegister(src1, 0);
1014                 Vector4f row1 = fetchRegister(src1, 1);
1015                 Vector4f row2 = fetchRegister(src1, 2);
1016
1017                 dst.x = dot3(src0, row0);
1018                 dst.y = dot3(src0, row1);
1019                 dst.z = dot3(src0, row2);
1020         }
1021
1022         void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
1023         {
1024                 Vector4f row0 = fetchRegister(src1, 0);
1025                 Vector4f row1 = fetchRegister(src1, 1);
1026                 Vector4f row2 = fetchRegister(src1, 2);
1027                 Vector4f row3 = fetchRegister(src1, 3);
1028
1029                 dst.x = dot3(src0, row0);
1030                 dst.y = dot3(src0, row1);
1031                 dst.z = dot3(src0, row2);
1032                 dst.w = dot3(src0, row3);
1033         }
1034
1035         void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
1036         {
1037                 Vector4f row0 = fetchRegister(src1, 0);
1038                 Vector4f row1 = fetchRegister(src1, 1);
1039                 Vector4f row2 = fetchRegister(src1, 2);
1040
1041                 dst.x = dot4(src0, row0);
1042                 dst.y = dot4(src0, row1);
1043                 dst.z = dot4(src0, row2);
1044         }
1045
1046         void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
1047         {
1048                 Vector4f row0 = fetchRegister(src1, 0);
1049                 Vector4f row1 = fetchRegister(src1, 1);
1050                 Vector4f row2 = fetchRegister(src1, 2);
1051                 Vector4f row3 = fetchRegister(src1, 3);
1052
1053                 dst.x = dot4(src0, row0);
1054                 dst.y = dot4(src0, row1);
1055                 dst.z = dot4(src0, row2);
1056                 dst.w = dot4(src0, row3);
1057         }
1058
1059         void VertexProgram::BREAK()
1060         {
1061                 enableBreak = enableBreak & ~enableStack[enableIndex];
1062         }
1063
1064         void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1065         {
1066                 Int4 condition;
1067
1068                 switch(control)
1069                 {
1070                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1071                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1072                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1073                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1074                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1075                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1076                 default:
1077                         ASSERT(false);
1078                 }
1079
1080                 BREAK(condition);
1081         }
1082
1083         void VertexProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1084         {
1085                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1086
1087                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1088                 {
1089                         condition = ~condition;
1090                 }
1091
1092                 BREAK(condition);
1093         }
1094
1095         void VertexProgram::BREAK(Int4 &condition)
1096         {
1097                 condition &= enableStack[enableIndex];
1098
1099                 enableBreak = enableBreak & ~condition;
1100         }
1101
1102         void VertexProgram::CONTINUE()
1103         {
1104                 enableContinue = enableContinue & ~enableStack[enableIndex];
1105         }
1106
1107         void VertexProgram::TEST()
1108         {
1109                 enableContinue = restoreContinue.back();
1110                 restoreContinue.pop_back();
1111         }
1112
1113         void VertexProgram::SCALAR()
1114         {
1115                 scalar = true;
1116         }
1117
1118         void VertexProgram::CALL(int labelIndex, int callSiteIndex)
1119         {
1120                 if(!labelBlock[labelIndex])
1121                 {
1122                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1123                 }
1124
1125                 if(callRetBlock[labelIndex].size() > 1)
1126                 {
1127                         callStack[stackIndex++] = UInt(callSiteIndex);
1128                 }
1129
1130                 Int4 restoreLeave = enableLeave;
1131
1132                 Nucleus::createBr(labelBlock[labelIndex]);
1133                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1134
1135                 enableLeave = restoreLeave;
1136         }
1137
1138         void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1139         {
1140                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1141                 {
1142                         CALLNZb(labelIndex, callSiteIndex, src);
1143                 }
1144                 else if(src.type == Shader::PARAMETER_PREDICATE)
1145                 {
1146                         CALLNZp(labelIndex, callSiteIndex, src);
1147                 }
1148                 else ASSERT(false);
1149         }
1150
1151         void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1152         {
1153                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1154
1155                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1156                 {
1157                         condition = !condition;
1158                 }
1159
1160                 if(!labelBlock[labelIndex])
1161                 {
1162                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1163                 }
1164
1165                 if(callRetBlock[labelIndex].size() > 1)
1166                 {
1167                         callStack[stackIndex++] = UInt(callSiteIndex);
1168                 }
1169
1170                 Int4 restoreLeave = enableLeave;
1171
1172                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1173                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1174
1175                 enableLeave = restoreLeave;
1176         }
1177
1178         void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1179         {
1180                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1181
1182                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1183                 {
1184                         condition = ~condition;
1185                 }
1186
1187                 condition &= enableStack[enableIndex];
1188
1189                 if(!labelBlock[labelIndex])
1190                 {
1191                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1192                 }
1193
1194                 if(callRetBlock[labelIndex].size() > 1)
1195                 {
1196                         callStack[stackIndex++] = UInt(callSiteIndex);
1197                 }
1198
1199                 enableIndex++;
1200                 enableStack[enableIndex] = condition;
1201                 Int4 restoreLeave = enableLeave;
1202
1203                 Bool notAllFalse = SignMask(condition) != 0;
1204                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1205                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1206
1207                 enableIndex--;
1208                 enableLeave = restoreLeave;
1209         }
1210
1211         void VertexProgram::ELSE()
1212         {
1213                 ifDepth--;
1214
1215                 BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1216                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1217
1218                 if(isConditionalIf[ifDepth])
1219                 {
1220                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1221                         Bool notAllFalse = SignMask(condition) != 0;
1222
1223                         branch(notAllFalse, falseBlock, endBlock);
1224
1225                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1226                 }
1227                 else
1228                 {
1229                         Nucleus::createBr(endBlock);
1230                         Nucleus::setInsertBlock(falseBlock);
1231                 }
1232
1233                 ifFalseBlock[ifDepth] = endBlock;
1234
1235                 ifDepth++;
1236         }
1237
1238         void VertexProgram::ENDIF()
1239         {
1240                 ifDepth--;
1241
1242                 BasicBlock *endBlock = ifFalseBlock[ifDepth];
1243
1244                 Nucleus::createBr(endBlock);
1245                 Nucleus::setInsertBlock(endBlock);
1246
1247                 if(isConditionalIf[ifDepth])
1248                 {
1249                         enableIndex--;
1250                 }
1251         }
1252
1253         void VertexProgram::ENDLOOP()
1254         {
1255                 loopRepDepth--;
1256
1257                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1258
1259                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1260                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1261
1262                 Nucleus::createBr(testBlock);
1263                 Nucleus::setInsertBlock(endBlock);
1264
1265                 loopDepth--;
1266                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1267         }
1268
1269         void VertexProgram::ENDREP()
1270         {
1271                 loopRepDepth--;
1272
1273                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1274                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1275
1276                 Nucleus::createBr(testBlock);
1277                 Nucleus::setInsertBlock(endBlock);
1278
1279                 loopDepth--;
1280                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1281         }
1282
1283         void VertexProgram::ENDWHILE()
1284         {
1285                 loopRepDepth--;
1286
1287                 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1288                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1289
1290                 Nucleus::createBr(testBlock);
1291                 Nucleus::setInsertBlock(endBlock);
1292
1293                 enableIndex--;
1294                 scalar = false;
1295         }
1296
1297         void VertexProgram::ENDSWITCH()
1298         {
1299                 loopRepDepth--;
1300
1301                 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1302
1303                 Nucleus::createBr(endBlock);
1304                 Nucleus::setInsertBlock(endBlock);
1305         }
1306
1307         void VertexProgram::IF(const Src &src)
1308         {
1309                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1310                 {
1311                         IFb(src);
1312                 }
1313                 else if(src.type == Shader::PARAMETER_PREDICATE)
1314                 {
1315                         IFp(src);
1316                 }
1317                 else
1318                 {
1319                         Int4 condition = As<Int4>(fetchRegister(src).x);
1320                         IF(condition);
1321                 }
1322         }
1323
1324         void VertexProgram::IFb(const Src &boolRegister)
1325         {
1326                 ASSERT(ifDepth < 24 + 4);
1327
1328                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1329
1330                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1331                 {
1332                         condition = !condition;
1333                 }
1334
1335                 BasicBlock *trueBlock = Nucleus::createBasicBlock();
1336                 BasicBlock *falseBlock = Nucleus::createBasicBlock();
1337
1338                 branch(condition, trueBlock, falseBlock);
1339
1340                 isConditionalIf[ifDepth] = false;
1341                 ifFalseBlock[ifDepth] = falseBlock;
1342
1343                 ifDepth++;
1344         }
1345
1346         void VertexProgram::IFp(const Src &predicateRegister)
1347         {
1348                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1349
1350                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1351                 {
1352                         condition = ~condition;
1353                 }
1354
1355                 IF(condition);
1356         }
1357
1358         void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1359         {
1360                 Int4 condition;
1361
1362                 switch(control)
1363                 {
1364                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
1365                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);  break;
1366                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
1367                 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x);  break;
1368                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
1369                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);  break;
1370                 default:
1371                         ASSERT(false);
1372                 }
1373
1374                 IF(condition);
1375         }
1376
1377         void VertexProgram::IF(Int4 &condition)
1378         {
1379                 condition &= enableStack[enableIndex];
1380
1381                 enableIndex++;
1382                 enableStack[enableIndex] = condition;
1383
1384                 BasicBlock *trueBlock = Nucleus::createBasicBlock();
1385                 BasicBlock *falseBlock = Nucleus::createBasicBlock();
1386
1387                 Bool notAllFalse = SignMask(condition) != 0;
1388
1389                 branch(notAllFalse, trueBlock, falseBlock);
1390
1391                 isConditionalIf[ifDepth] = true;
1392                 ifFalseBlock[ifDepth] = falseBlock;
1393
1394                 ifDepth++;
1395         }
1396
1397         void VertexProgram::LABEL(int labelIndex)
1398         {
1399                 if(!labelBlock[labelIndex])
1400                 {
1401                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1402                 }
1403
1404                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1405                 currentLabel = labelIndex;
1406         }
1407
1408         void VertexProgram::LOOP(const Src &integerRegister)
1409         {
1410                 loopDepth++;
1411
1412                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1413                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1414                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1415
1416                 // FIXME: Compiles to two instructions?
1417                 If(increment[loopDepth] == 0)
1418                 {
1419                         increment[loopDepth] = 1;
1420                 }
1421
1422                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1423                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1424                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1425
1426                 loopRepTestBlock[loopRepDepth] = testBlock;
1427                 loopRepEndBlock[loopRepDepth] = endBlock;
1428
1429                 // FIXME: jump(testBlock)
1430                 Nucleus::createBr(testBlock);
1431                 Nucleus::setInsertBlock(testBlock);
1432
1433                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1434                 Nucleus::setInsertBlock(loopBlock);
1435
1436                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1437
1438                 loopRepDepth++;
1439         }
1440
1441         void VertexProgram::REP(const Src &integerRegister)
1442         {
1443                 loopDepth++;
1444
1445                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1446                 aL[loopDepth] = aL[loopDepth - 1];
1447
1448                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1449                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1450                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1451
1452                 loopRepTestBlock[loopRepDepth] = testBlock;
1453                 loopRepEndBlock[loopRepDepth] = endBlock;
1454
1455                 // FIXME: jump(testBlock)
1456                 Nucleus::createBr(testBlock);
1457                 Nucleus::setInsertBlock(testBlock);
1458
1459                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1460                 Nucleus::setInsertBlock(loopBlock);
1461
1462                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1463
1464                 loopRepDepth++;
1465         }
1466
1467         void VertexProgram::WHILE(const Src &temporaryRegister)
1468         {
1469                 enableIndex++;
1470
1471                 BasicBlock *loopBlock = Nucleus::createBasicBlock();
1472                 BasicBlock *testBlock = Nucleus::createBasicBlock();
1473                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1474
1475                 loopRepTestBlock[loopRepDepth] = testBlock;
1476                 loopRepEndBlock[loopRepDepth] = endBlock;
1477
1478                 Int4 restoreBreak = enableBreak;
1479                 restoreContinue.push_back(enableContinue);
1480
1481                 // TODO: jump(testBlock)
1482                 Nucleus::createBr(testBlock);
1483                 Nucleus::setInsertBlock(testBlock);
1484
1485                 const Vector4f &src = fetchRegister(temporaryRegister);
1486                 Int4 condition = As<Int4>(src.x);
1487                 condition &= enableStack[enableIndex - 1];
1488                 if(shader->containsLeaveInstruction()) condition &= enableLeave;
1489                 if(shader->containsBreakInstruction()) condition &= enableBreak;
1490                 enableStack[enableIndex] = condition;
1491
1492                 Bool notAllFalse = SignMask(condition) != 0;
1493                 branch(notAllFalse, loopBlock, endBlock);
1494
1495                 Nucleus::setInsertBlock(endBlock);
1496                 enableBreak = restoreBreak;
1497
1498                 Nucleus::setInsertBlock(loopBlock);
1499
1500                 loopRepDepth++;
1501                 scalar = false;
1502         }
1503
1504         void VertexProgram::SWITCH()
1505         {
1506                 BasicBlock *endBlock = Nucleus::createBasicBlock();
1507
1508                 loopRepTestBlock[loopRepDepth] = nullptr;
1509                 loopRepEndBlock[loopRepDepth] = endBlock;
1510
1511                 Int4 restoreBreak = enableBreak;
1512
1513                 BasicBlock *currentBlock = Nucleus::getInsertBlock();
1514
1515                 Nucleus::setInsertBlock(endBlock);
1516                 enableBreak = restoreBreak;
1517
1518                 Nucleus::setInsertBlock(currentBlock);
1519
1520                 loopRepDepth++;
1521         }
1522
1523         void VertexProgram::RET()
1524         {
1525                 if(currentLabel == -1)
1526                 {
1527                         returnBlock = Nucleus::createBasicBlock();
1528                         Nucleus::createBr(returnBlock);
1529                 }
1530                 else
1531                 {
1532                         BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1533
1534                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1535                         {
1536                                 // FIXME: Encapsulate
1537                                 UInt index = callStack[--stackIndex];
1538
1539                                 Value *value = index.loadValue();
1540                                 SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1541
1542                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1543                                 {
1544                                         Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
1545                                 }
1546                         }
1547                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1548                         {
1549                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1550                         }
1551                         else   // Function isn't called
1552                         {
1553                                 Nucleus::createBr(unreachableBlock);
1554                         }
1555
1556                         Nucleus::setInsertBlock(unreachableBlock);
1557                         Nucleus::createUnreachable();
1558                 }
1559         }
1560
1561         void VertexProgram::LEAVE()
1562         {
1563                 enableLeave = enableLeave & ~enableStack[enableIndex];
1564
1565                 // FIXME: Return from function if all instances left
1566                 // FIXME: Use enableLeave in other control-flow constructs
1567         }
1568
1569         void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
1570         {
1571                 dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), (src0), Base);
1572         }
1573
1574         void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
1575         {
1576                 dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Base, Offset});
1577         }
1578
1579         void VertexProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1580         {
1581                 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod);
1582         }
1583
1584         void VertexProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1585         {
1586                 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset});
1587         }
1588
1589         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod)
1590         {
1591                 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch);
1592         }
1593
1594         void VertexProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod)
1595         {
1596                 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset});
1597         }
1598
1599         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy)
1600         {
1601                 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, src0, Grad);
1602         }
1603
1604         void VertexProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset)
1605         {
1606                 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset});
1607         }
1608
1609         void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1610         {
1611                 bool uniformSampler = (src1.type == Shader::PARAMETER_SAMPLER && src1.rel.type == Shader::PARAMETER_VOID);
1612                 Int offset = uniformSampler ? src1.index * sizeof(Texture) : As<Int>(Float(fetchRegister(src1).x.x)) * sizeof(Texture);
1613                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + offset;
1614
1615                 dst = SamplerCore::textureSize(texture, lod);
1616         }
1617
1618         Vector4f VertexProgram::sampleTexture(const Src &s, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
1619         {
1620                 Vector4f tmp;
1621
1622                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1623                 {
1624                         tmp = sampleTexture(s.index, uvwq, lod, dsx, dsy, offset, function);
1625                 }
1626                 else
1627                 {
1628                         Int index = As<Int>(Float(fetchRegister(s).x.x));
1629
1630                         for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
1631                         {
1632                                 if(shader->usesSampler(i))
1633                                 {
1634                                         If(index == i)
1635                                         {
1636                                                 tmp = sampleTexture(i, uvwq, lod, dsx, dsy, offset, function);
1637                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1638                                         }
1639                                 }
1640                         }
1641                 }
1642
1643                 Vector4f c;
1644                 c.x = tmp[(s.swizzle >> 0) & 0x3];
1645                 c.y = tmp[(s.swizzle >> 2) & 0x3];
1646                 c.z = tmp[(s.swizzle >> 4) & 0x3];
1647                 c.w = tmp[(s.swizzle >> 6) & 0x3];
1648
1649                 return c;
1650         }
1651
1652         Vector4f VertexProgram::sampleTexture(int sampler, Vector4f &uvwq, Float4 &lod, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
1653         {
1654                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + sampler * sizeof(Texture);
1655                 return SamplerCore(constants, state.sampler[sampler]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, lod, dsx, dsy, offset, function);
1656         }
1657 }