OSDN Git Service

Make the number of vertex outputs configurable.
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "VertexProgram.hpp"
16
17 #include "Renderer.hpp"
18 #include "VertexShader.hpp"
19 #include "Vertex.hpp"
20 #include "Half.hpp"
21 #include "SamplerCore.hpp"
22 #include "Debug.hpp"
23
24 namespace sw
25 {
26         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
27                 : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
28         {
29                 ifDepth = 0;
30                 loopRepDepth = 0;
31                 breakDepth = 0;
32                 currentLabel = -1;
33                 whileTest = false;
34
35                 for(int i = 0; i < 2048; i++)
36                 {
37                         labelBlock[i] = 0;
38                 }
39
40                 loopDepth = -1;
41                 enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
42
43                 if(shader && shader->containsBreakInstruction())
44                 {
45                         enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
46                 }
47
48                 if(shader && shader->containsContinueInstruction())
49                 {
50                         enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
51                 }
52
53                 if(shader->instanceIdDeclared)
54                 {
55                         instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
56                 }
57         }
58
59         VertexProgram::~VertexProgram()
60         {
61                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
62                 {
63                         delete sampler[i];
64                 }
65         }
66
67         void VertexProgram::pipeline()
68         {
69                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
70                 {
71                         sampler[i] = new SamplerCore(constants, state.samplerState[i]);
72                 }
73
74                 if(!state.preTransformed)
75                 {
76                         program();
77                 }
78                 else
79                 {
80                         passThrough();
81                 }
82         }
83
84         void VertexProgram::program()
85         {
86         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
87
88                 unsigned short version = shader->getVersion();
89
90                 enableIndex = 0;
91                 stackIndex = 0;
92
93                 if(shader->containsLeaveInstruction())
94                 {
95                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
96                 }
97
98                 // Create all call site return blocks up front
99                 for(size_t i = 0; i < shader->getLength(); i++)
100                 {
101                         const Shader::Instruction *instruction = shader->getInstruction(i);
102                         Shader::Opcode opcode = instruction->opcode;
103
104                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
105                         {
106                                 const Dst &dst = instruction->dst;
107
108                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
109                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
110                         }
111                 }
112
113                 for(size_t i = 0; i < shader->getLength(); i++)
114                 {
115                         const Shader::Instruction *instruction = shader->getInstruction(i);
116                         Shader::Opcode opcode = instruction->opcode;
117
118                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
119                         {
120                                 continue;
121                         }
122
123                         Dst dst = instruction->dst;
124                         Src src0 = instruction->src[0];
125                         Src src1 = instruction->src[1];
126                         Src src2 = instruction->src[2];
127                         Src src3 = instruction->src[3];
128                         Src src4 = instruction->src[4];
129
130                         bool predicate = instruction->predicate;
131                         Control control = instruction->control;
132                         bool integer = dst.type == Shader::PARAMETER_ADDR;
133                         bool pp = dst.partialPrecision;
134
135                         Vector4f d;
136                         Vector4f s0;
137                         Vector4f s1;
138                         Vector4f s2;
139                         Vector4f s3;
140                         Vector4f s4;
141
142                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
143                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
144                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
145                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
146                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
147
148                         switch(opcode)
149                         {
150                         case Shader::OPCODE_VS_1_0:                                     break;
151                         case Shader::OPCODE_VS_1_1:                                     break;
152                         case Shader::OPCODE_VS_2_0:                                     break;
153                         case Shader::OPCODE_VS_2_x:                                     break;
154                         case Shader::OPCODE_VS_2_sw:                                    break;
155                         case Shader::OPCODE_VS_3_0:                                     break;
156                         case Shader::OPCODE_VS_3_sw:                                    break;
157                         case Shader::OPCODE_DCL:                                        break;
158                         case Shader::OPCODE_DEF:                                        break;
159                         case Shader::OPCODE_DEFI:                                       break;
160                         case Shader::OPCODE_DEFB:                                       break;
161                         case Shader::OPCODE_NOP:                                        break;
162                         case Shader::OPCODE_ABS:        abs(d, s0);                     break;
163                         case Shader::OPCODE_IABS:       iabs(d, s0);                    break;
164                         case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
165                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
166                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
167                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);        break;
168                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);        break;
169                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);        break;
170                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);        break;
171                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);            break;
172                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);            break;
173                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);            break;
174                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);            break;
175                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);      break;
176                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);      break;
177                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);      break;
178                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);      break;
179                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                 break;
180                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                 break;
181                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                 break;
182                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                 break;
183                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
184                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
185                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
186                         case Shader::OPCODE_ATT:        att(d, s0, s1);                 break;
187                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);               break;
188                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                break;
189                         case Shader::OPCODE_EXPP:       expp(d, s0, version);           break;
190                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                 break;
191                         case Shader::OPCODE_FRC:        frc(d, s0);                     break;
192                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
193                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
194                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
195                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
196                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
197                         case Shader::OPCODE_LIT:        lit(d, s0);                     break;
198                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);               break;
199                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                break;
200                         case Shader::OPCODE_LOGP:       logp(d, s0, version);           break;
201                         case Shader::OPCODE_LOG:        log(d, s0, pp);                 break;
202                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);             break;
203                         case Shader::OPCODE_STEP:       step(d, s0, s1);                break;
204                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);          break;
205                         case Shader::OPCODE_FLOATBITSTOINT:
206                         case Shader::OPCODE_FLOATBITSTOUINT:
207                         case Shader::OPCODE_INTBITSTOFLOAT:
208                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
209                         case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);      break;
210                         case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);      break;
211                         case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);       break;
212                         case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);    break;
213                         case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);    break;
214                         case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);     break;
215                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
216                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
217                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
218                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);              break;
219                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);              break;
220                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);             break;
221                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);            break;
222                         case Shader::OPCODE_MAX:        max(d, s0, s1);                 break;
223                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
224                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
225                         case Shader::OPCODE_MIN:        min(d, s0, s1);                 break;
226                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
227                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
228                         case Shader::OPCODE_MOV:        mov(d, s0, integer);            break;
229                         case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
230                         case Shader::OPCODE_NEG:        neg(d, s0);                     break;
231                         case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
232                         case Shader::OPCODE_F2B:        f2b(d, s0);                     break;
233                         case Shader::OPCODE_B2F:        b2f(d, s0);                     break;
234                         case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
235                         case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
236                         case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
237                         case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
238                         case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
239                         case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
240                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                 break;
241                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
242                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                break;
243                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                break;
244                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                break;
245                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);            break;
246                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);             break;
247                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                break;
248                         case Shader::OPCODE_DIV:        div(d, s0, s1);                 break;
249                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
250                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
251                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                 break;
252                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
253                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
254                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
255                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                break;
256                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                break;
257                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                break;
258                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                break;
259                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                 break;
260                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);              break;
261                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);              break;
262                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);              break;
263                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);         break;
264                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);         break;
265                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);         break;
266                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
267                         case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
268                         case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
269                         case Shader::OPCODE_ISGN:       isgn(d, s0);                    break;
270                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
271                         case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
272                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
273                         case Shader::OPCODE_TAN:        tan(d, s0);                     break;
274                         case Shader::OPCODE_ACOS:       acos(d, s0);                    break;
275                         case Shader::OPCODE_ASIN:       asin(d, s0);                    break;
276                         case Shader::OPCODE_ATAN:       atan(d, s0);                    break;
277                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1);               break;
278                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                break;
279                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                break;
280                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                break;
281                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);               break;
282                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);               break;
283                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);               break;
284                         case Shader::OPCODE_SLT:        slt(d, s0, s1);                 break;
285                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                 break;
286                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
287                         case Shader::OPCODE_BREAK:      BREAK();                        break;
288                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);        break;
289                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                   break;
290                         case Shader::OPCODE_CONTINUE:   CONTINUE();                     break;
291                         case Shader::OPCODE_TEST:       TEST();                         break;
292                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);  break;
293                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0); break;
294                         case Shader::OPCODE_ELSE:       ELSE();                         break;
295                         case Shader::OPCODE_ENDIF:      ENDIF();                        break;
296                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
297                         case Shader::OPCODE_ENDREP:     ENDREP();                       break;
298                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
299                         case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                    break;
300                         case Shader::OPCODE_IF:         IF(src0);                       break;
301                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
302                         case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
303                         case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
304                         case Shader::OPCODE_REP:        REP(src0);                      break;
305                         case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
306                         case Shader::OPCODE_SWITCH:     SWITCH();                       break;
307                         case Shader::OPCODE_RET:        RET();                          break;
308                         case Shader::OPCODE_LEAVE:      LEAVE();                        break;
309                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
310                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);       break;
311                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
312                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);          break;
313                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);         break;
314                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
315                         case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
316                         case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
317                         case Shader::OPCODE_NOT:        not(d, s0);                     break;
318                         case Shader::OPCODE_OR:         or(d, s0, s1);                  break;
319                         case Shader::OPCODE_XOR:        xor(d, s0, s1);                 break;
320                         case Shader::OPCODE_AND:        and(d, s0, s1);                 break;
321                         case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
322                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
323                         case Shader::OPCODE_TEXLDL:     TEXLDL(d, s0, src1);            break;
324                         case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
325                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2, s3); break;
326                         case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2);      break;
327                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2);    break;
328                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2, s3); break;
329                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
330                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break;
331                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
332                         case Shader::OPCODE_END:                                        break;
333                         default:
334                                 ASSERT(false);
335                         }
336
337                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
338                         {
339                                 if(dst.integer)
340                                 {
341                                         switch(opcode)
342                                         {
343                                         case Shader::OPCODE_DIV:
344                                                 if(dst.x) d.x = Trunc(d.x);
345                                                 if(dst.y) d.y = Trunc(d.y);
346                                                 if(dst.z) d.z = Trunc(d.z);
347                                                 if(dst.w) d.w = Trunc(d.w);
348                                                 break;
349                                         default:
350                                                 break;   // No truncation to integer required when arguments are integer
351                                         }
352                                 }
353
354                                 if(dst.saturate)
355                                 {
356                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
357                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
358                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
359                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
360
361                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
362                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
363                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
364                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
365                                 }
366
367                                 if(instruction->isPredicated())
368                                 {
369                                         Vector4f pDst;   // FIXME: Rename
370
371                                         switch(dst.type)
372                                         {
373                                         case Shader::PARAMETER_VOID: break;
374                                         case Shader::PARAMETER_TEMP:
375                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
376                                                 {
377                                                         if(dst.x) pDst.x = r[dst.index].x;
378                                                         if(dst.y) pDst.y = r[dst.index].y;
379                                                         if(dst.z) pDst.z = r[dst.index].z;
380                                                         if(dst.w) pDst.w = r[dst.index].w;
381                                                 }
382                                                 else
383                                                 {
384                                                         Int a = relativeAddress(dst);
385
386                                                         if(dst.x) pDst.x = r[dst.index + a].x;
387                                                         if(dst.y) pDst.y = r[dst.index + a].y;
388                                                         if(dst.z) pDst.z = r[dst.index + a].z;
389                                                         if(dst.w) pDst.w = r[dst.index + a].w;
390                                                 }
391                                                 break;
392                                         case Shader::PARAMETER_ADDR: pDst = a0; break;
393                                         case Shader::PARAMETER_RASTOUT:
394                                                 switch(dst.index)
395                                                 {
396                                                 case 0:
397                                                         if(dst.x) pDst.x = o[Pos].x;
398                                                         if(dst.y) pDst.y = o[Pos].y;
399                                                         if(dst.z) pDst.z = o[Pos].z;
400                                                         if(dst.w) pDst.w = o[Pos].w;
401                                                         break;
402                                                 case 1:
403                                                         pDst.x = o[Fog].x;
404                                                         break;
405                                                 case 2:
406                                                         pDst.x = o[Pts].y;
407                                                         break;
408                                                 default:
409                                                         ASSERT(false);
410                                                 }
411                                                 break;
412                                         case Shader::PARAMETER_ATTROUT:
413                                                 if(dst.x) pDst.x = o[C0 + dst.index].x;
414                                                 if(dst.y) pDst.y = o[C0 + dst.index].y;
415                                                 if(dst.z) pDst.z = o[C0 + dst.index].z;
416                                                 if(dst.w) pDst.w = o[C0 + dst.index].w;
417                                                 break;
418                                         case Shader::PARAMETER_TEXCRDOUT:
419                                 //      case Shader::PARAMETER_OUTPUT:
420                                                 if(version < 0x0300)
421                                                 {
422                                                         if(dst.x) pDst.x = o[T0 + dst.index].x;
423                                                         if(dst.y) pDst.y = o[T0 + dst.index].y;
424                                                         if(dst.z) pDst.z = o[T0 + dst.index].z;
425                                                         if(dst.w) pDst.w = o[T0 + dst.index].w;
426                                                 }
427                                                 else
428                                                 {
429                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
430                                                         {
431                                                                 if(dst.x) pDst.x = o[dst.index].x;
432                                                                 if(dst.y) pDst.y = o[dst.index].y;
433                                                                 if(dst.z) pDst.z = o[dst.index].z;
434                                                                 if(dst.w) pDst.w = o[dst.index].w;
435                                                         }
436                                                         else
437                                                         {
438                                                                 Int a = relativeAddress(dst);
439
440                                                                 if(dst.x) pDst.x = o[dst.index + a].x;
441                                                                 if(dst.y) pDst.y = o[dst.index + a].y;
442                                                                 if(dst.z) pDst.z = o[dst.index + a].z;
443                                                                 if(dst.w) pDst.w = o[dst.index + a].w;
444                                                         }
445                                                 }
446                                                 break;
447                                         case Shader::PARAMETER_LABEL:                break;
448                                         case Shader::PARAMETER_PREDICATE: pDst = p0; break;
449                                         case Shader::PARAMETER_INPUT:                break;
450                                         default:
451                                                 ASSERT(false);
452                                         }
453
454                                         Int4 enable = enableMask(instruction);
455
456                                         Int4 xEnable = enable;
457                                         Int4 yEnable = enable;
458                                         Int4 zEnable = enable;
459                                         Int4 wEnable = enable;
460
461                                         if(predicate)
462                                         {
463                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
464
465                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
466                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
467                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
468                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
469
470                                                 if(!instruction->predicateNot)
471                                                 {
472                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
473                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
474                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
475                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
476                                                 }
477                                                 else
478                                                 {
479                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
480                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
481                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
482                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
483                                                 }
484                                         }
485
486                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
487                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
488                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
489                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
490
491                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
492                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
493                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
494                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
495                                 }
496
497                                 switch(dst.type)
498                                 {
499                                 case Shader::PARAMETER_VOID:
500                                         break;
501                                 case Shader::PARAMETER_TEMP:
502                                         if(dst.rel.type == Shader::PARAMETER_VOID)
503                                         {
504                                                 if(dst.x) r[dst.index].x = d.x;
505                                                 if(dst.y) r[dst.index].y = d.y;
506                                                 if(dst.z) r[dst.index].z = d.z;
507                                                 if(dst.w) r[dst.index].w = d.w;
508                                         }
509                                         else
510                                         {
511                                                 Int a = relativeAddress(dst);
512
513                                                 if(dst.x) r[dst.index + a].x = d.x;
514                                                 if(dst.y) r[dst.index + a].y = d.y;
515                                                 if(dst.z) r[dst.index + a].z = d.z;
516                                                 if(dst.w) r[dst.index + a].w = d.w;
517                                         }
518                                         break;
519                                 case Shader::PARAMETER_ADDR:
520                                         if(dst.x) a0.x = d.x;
521                                         if(dst.y) a0.y = d.y;
522                                         if(dst.z) a0.z = d.z;
523                                         if(dst.w) a0.w = d.w;
524                                         break;
525                                 case Shader::PARAMETER_RASTOUT:
526                                         switch(dst.index)
527                                         {
528                                         case 0:
529                                                 if(dst.x) o[Pos].x = d.x;
530                                                 if(dst.y) o[Pos].y = d.y;
531                                                 if(dst.z) o[Pos].z = d.z;
532                                                 if(dst.w) o[Pos].w = d.w;
533                                                 break;
534                                         case 1:
535                                                 o[Fog].x = d.x;
536                                                 break;
537                                         case 2:
538                                                 o[Pts].y = d.x;
539                                                 break;
540                                         default:        ASSERT(false);
541                                         }
542                                         break;
543                                 case Shader::PARAMETER_ATTROUT:
544                                         if(dst.x) o[C0 + dst.index].x = d.x;
545                                         if(dst.y) o[C0 + dst.index].y = d.y;
546                                         if(dst.z) o[C0 + dst.index].z = d.z;
547                                         if(dst.w) o[C0 + dst.index].w = d.w;
548                                         break;
549                                 case Shader::PARAMETER_TEXCRDOUT:
550                         //      case Shader::PARAMETER_OUTPUT:
551                                         if(version < 0x0300)
552                                         {
553                                                 if(dst.x) o[T0 + dst.index].x = d.x;
554                                                 if(dst.y) o[T0 + dst.index].y = d.y;
555                                                 if(dst.z) o[T0 + dst.index].z = d.z;
556                                                 if(dst.w) o[T0 + dst.index].w = d.w;
557                                         }
558                                         else
559                                         {
560                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
561                                                 {
562                                                         if(dst.x) o[dst.index].x = d.x;
563                                                         if(dst.y) o[dst.index].y = d.y;
564                                                         if(dst.z) o[dst.index].z = d.z;
565                                                         if(dst.w) o[dst.index].w = d.w;
566                                                 }
567                                                 else
568                                                 {
569                                                         Int a = relativeAddress(dst);
570
571                                                         if(dst.x) o[dst.index + a].x = d.x;
572                                                         if(dst.y) o[dst.index + a].y = d.y;
573                                                         if(dst.z) o[dst.index + a].z = d.z;
574                                                         if(dst.w) o[dst.index + a].w = d.w;
575                                                 }
576                                         }
577                                         break;
578                                 case Shader::PARAMETER_LABEL:             break;
579                                 case Shader::PARAMETER_PREDICATE: p0 = d; break;
580                                 case Shader::PARAMETER_INPUT:             break;
581                                 default:
582                                         ASSERT(false);
583                                 }
584                         }
585                 }
586
587                 if(currentLabel != -1)
588                 {
589                         Nucleus::setInsertBlock(returnBlock);
590                 }
591         }
592
593         void VertexProgram::passThrough()
594         {
595                 if(shader)
596                 {
597                         for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
598                         {
599                                 unsigned char usage = shader->output[i][0].usage;
600
601                                 switch(usage)
602                                 {
603                                 case 0xFF:
604                                         continue;
605                                 case Shader::USAGE_PSIZE:
606                                         o[i].y = v[i].x;
607                                         break;
608                                 case Shader::USAGE_TEXCOORD:
609                                         o[i].x = v[i].x;
610                                         o[i].y = v[i].y;
611                                         o[i].z = v[i].z;
612                                         o[i].w = v[i].w;
613                                         break;
614                                 case Shader::USAGE_POSITION:
615                                         o[i].x = v[i].x;
616                                         o[i].y = v[i].y;
617                                         o[i].z = v[i].z;
618                                         o[i].w = v[i].w;
619                                         break;
620                                 case Shader::USAGE_COLOR:
621                                         o[i].x = v[i].x;
622                                         o[i].y = v[i].y;
623                                         o[i].z = v[i].z;
624                                         o[i].w = v[i].w;
625                                         break;
626                                 case Shader::USAGE_FOG:
627                                         o[i].x = v[i].x;
628                                         break;
629                                 default:
630                                         ASSERT(false);
631                                 }
632                         }
633                 }
634                 else
635                 {
636                         o[Pos].x = v[PositionT].x;
637                         o[Pos].y = v[PositionT].y;
638                         o[Pos].z = v[PositionT].z;
639                         o[Pos].w = v[PositionT].w;
640
641                         for(int i = 0; i < 2; i++)
642                         {
643                                 o[C0 + i].x = v[Color0 + i].x;
644                                 o[C0 + i].y = v[Color0 + i].y;
645                                 o[C0 + i].z = v[Color0 + i].z;
646                                 o[C0 + i].w = v[Color0 + i].w;
647                         }
648
649                         for(int i = 0; i < 8; i++)
650                         {
651                                 o[T0 + i].x = v[TexCoord0 + i].x;
652                                 o[T0 + i].y = v[TexCoord0 + i].y;
653                                 o[T0 + i].z = v[TexCoord0 + i].z;
654                                 o[T0 + i].w = v[TexCoord0 + i].w;
655                         }
656
657                         o[Pts].y = v[PointSize].x;
658                 }
659         }
660
661         Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset)
662         {
663                 Vector4f reg;
664                 unsigned int i = src.index + offset;
665
666                 switch(src.type)
667                 {
668                 case Shader::PARAMETER_TEMP:
669                         if(src.rel.type == Shader::PARAMETER_VOID)
670                         {
671                                 reg = r[i];
672                         }
673                         else
674                         {
675                                 reg = r[i + relativeAddress(src, src.bufferIndex)];
676                         }
677                         break;
678                 case Shader::PARAMETER_CONST:
679                         reg = readConstant(src, offset);
680                         break;
681                 case Shader::PARAMETER_INPUT:
682                         if(src.rel.type == Shader::PARAMETER_VOID)
683                         {
684                                 reg = v[i];
685                         }
686                         else
687                         {
688                                 reg = v[i + relativeAddress(src, src.bufferIndex)];
689                         }
690                         break;
691                 case Shader::PARAMETER_VOID: return r[0];   // Dummy
692                 case Shader::PARAMETER_FLOAT4LITERAL:
693                         reg.x = Float4(src.value[0]);
694                         reg.y = Float4(src.value[1]);
695                         reg.z = Float4(src.value[2]);
696                         reg.w = Float4(src.value[3]);
697                         break;
698                 case Shader::PARAMETER_ADDR:      reg = a0; break;
699                 case Shader::PARAMETER_CONSTBOOL: return r[0];   // Dummy
700                 case Shader::PARAMETER_CONSTINT:  return r[0];   // Dummy
701                 case Shader::PARAMETER_LOOP:      return r[0];   // Dummy
702                 case Shader::PARAMETER_PREDICATE: return r[0];   // Dummy
703                 case Shader::PARAMETER_SAMPLER:
704                         if(src.rel.type == Shader::PARAMETER_VOID)
705                         {
706                                 reg.x = As<Float4>(Int4(i));
707                         }
708                         else if(src.rel.type == Shader::PARAMETER_TEMP)
709                         {
710                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
711                         }
712                         return reg;
713                 case Shader::PARAMETER_OUTPUT:
714                         if(src.rel.type == Shader::PARAMETER_VOID)
715                         {
716                                 reg = o[i];
717                         }
718                         else
719                         {
720                                 reg = o[i + relativeAddress(src, src.bufferIndex)];
721                         }
722                         break;
723                 case Shader::PARAMETER_MISCTYPE:
724                         reg.x = As<Float>(Int(instanceID));
725                         return reg;
726                 default:
727                         ASSERT(false);
728                 }
729
730                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
731                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
732                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
733                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
734
735                 Vector4f mod;
736
737                 switch(src.modifier)
738                 {
739                 case Shader::MODIFIER_NONE:
740                         mod.x = x;
741                         mod.y = y;
742                         mod.z = z;
743                         mod.w = w;
744                         break;
745                 case Shader::MODIFIER_NEGATE:
746                         mod.x = -x;
747                         mod.y = -y;
748                         mod.z = -z;
749                         mod.w = -w;
750                         break;
751                 case Shader::MODIFIER_ABS:
752                         mod.x = Abs(x);
753                         mod.y = Abs(y);
754                         mod.z = Abs(z);
755                         mod.w = Abs(w);
756                         break;
757                 case Shader::MODIFIER_ABS_NEGATE:
758                         mod.x = -Abs(x);
759                         mod.y = -Abs(y);
760                         mod.z = -Abs(z);
761                         mod.w = -Abs(w);
762                         break;
763                 case Shader::MODIFIER_NOT:
764                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
765                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
766                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
767                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
768                         break;
769                 default:
770                         ASSERT(false);
771                 }
772
773                 return mod;
774         }
775
776         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index)
777         {
778                 if(bufferIndex == -1)
779                 {
780                         return data + OFFSET(DrawData, vs.c[index]);
781                 }
782                 else
783                 {
784                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index;
785                 }
786         }
787
788         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
789         {
790                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
791         }
792
793         Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset)
794         {
795                 Vector4f c;
796                 unsigned int i = src.index + offset;
797
798                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
799                 {
800                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
801
802                         c.x = c.x.xxxx;
803                         c.y = c.y.yyyy;
804                         c.z = c.z.zzzz;
805                         c.w = c.w.wwww;
806
807                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
808                         {
809                                 for(size_t j = 0; j < shader->getLength(); j++)
810                                 {
811                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
812
813                                         if(instruction.opcode == Shader::OPCODE_DEF)
814                                         {
815                                                 if(instruction.dst.index == i)
816                                                 {
817                                                         c.x = Float4(instruction.src[0].value[0]);
818                                                         c.y = Float4(instruction.src[0].value[1]);
819                                                         c.z = Float4(instruction.src[0].value[2]);
820                                                         c.w = Float4(instruction.src[0].value[3]);
821
822                                                         break;
823                                                 }
824                                         }
825                                 }
826                         }
827                 }
828                 else if(src.rel.type == Shader::PARAMETER_LOOP)
829                 {
830                         Int loopCounter = aL[loopDepth];
831
832                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
833
834                         c.x = c.x.xxxx;
835                         c.y = c.y.yyyy;
836                         c.z = c.z.zzzz;
837                         c.w = c.w.wwww;
838                 }
839                 else
840                 {
841                         if(src.rel.deterministic)
842                         {
843                                 Int a = relativeAddress(src, src.bufferIndex);
844
845                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
846
847                                 c.x = c.x.xxxx;
848                                 c.y = c.y.yyyy;
849                                 c.z = c.z.zzzz;
850                                 c.w = c.w.wwww;
851                         }
852                         else
853                         {
854                                 int component = src.rel.swizzle & 0x03;
855                                 Float4 a;
856
857                                 switch(src.rel.type)
858                                 {
859                                 case Shader::PARAMETER_ADDR:   a = a0[component]; break;
860                                 case Shader::PARAMETER_TEMP:   a = r[src.rel.index][component]; break;
861                                 case Shader::PARAMETER_INPUT:  a = v[src.rel.index][component]; break;
862                                 case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
863                                 case Shader::PARAMETER_CONST:  a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
864                                 default: ASSERT(false);
865                                 }
866
867                                 Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
868
869                                 index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
870
871                                 Int index0 = Extract(index, 0);
872                                 Int index1 = Extract(index, 1);
873                                 Int index2 = Extract(index, 2);
874                                 Int index3 = Extract(index, 3);
875
876                                 c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
877                                 c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
878                                 c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
879                                 c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
880
881                                 transpose4x4(c.x, c.y, c.z, c.w);
882                         }
883                 }
884
885                 return c;
886         }
887
888         Int VertexProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
889         {
890                 ASSERT(var.rel.deterministic);
891
892                 if(var.rel.type == Shader::PARAMETER_TEMP)
893                 {
894                         return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
895                 }
896                 else if(var.rel.type == Shader::PARAMETER_INPUT)
897                 {
898                         return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
899                 }
900                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
901                 {
902                         return As<Int>(Extract(o[var.rel.index].x, 0)) * var.rel.scale;
903                 }
904                 else if(var.rel.type == Shader::PARAMETER_CONST)
905                 {
906                         return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
907                 }
908                 else if(var.rel.type == Shader::PARAMETER_LOOP)
909                 {
910                         return aL[loopDepth];
911                 }
912                 else ASSERT(false);
913
914                 return 0;
915         }
916
917         Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
918         {
919                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
920
921                 if(!whileTest)
922                 {
923                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
924                         {
925                                 enable &= enableBreak;
926                         }
927
928                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
929                         {
930                                 enable &= enableContinue;
931                         }
932
933                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
934                         {
935                                 enable &= enableLeave;
936                         }
937                 }
938
939                 return enable;
940         }
941
942         void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
943         {
944                 Vector4f row0 = fetchRegister(src1, 0);
945                 Vector4f row1 = fetchRegister(src1, 1);
946
947                 dst.x = dot3(src0, row0);
948                 dst.y = dot3(src0, row1);
949         }
950
951         void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
952         {
953                 Vector4f row0 = fetchRegister(src1, 0);
954                 Vector4f row1 = fetchRegister(src1, 1);
955                 Vector4f row2 = fetchRegister(src1, 2);
956
957                 dst.x = dot3(src0, row0);
958                 dst.y = dot3(src0, row1);
959                 dst.z = dot3(src0, row2);
960         }
961
962         void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
963         {
964                 Vector4f row0 = fetchRegister(src1, 0);
965                 Vector4f row1 = fetchRegister(src1, 1);
966                 Vector4f row2 = fetchRegister(src1, 2);
967                 Vector4f row3 = fetchRegister(src1, 3);
968
969                 dst.x = dot3(src0, row0);
970                 dst.y = dot3(src0, row1);
971                 dst.z = dot3(src0, row2);
972                 dst.w = dot3(src0, row3);
973         }
974
975         void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
976         {
977                 Vector4f row0 = fetchRegister(src1, 0);
978                 Vector4f row1 = fetchRegister(src1, 1);
979                 Vector4f row2 = fetchRegister(src1, 2);
980
981                 dst.x = dot4(src0, row0);
982                 dst.y = dot4(src0, row1);
983                 dst.z = dot4(src0, row2);
984         }
985
986         void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
987         {
988                 Vector4f row0 = fetchRegister(src1, 0);
989                 Vector4f row1 = fetchRegister(src1, 1);
990                 Vector4f row2 = fetchRegister(src1, 2);
991                 Vector4f row3 = fetchRegister(src1, 3);
992
993                 dst.x = dot4(src0, row0);
994                 dst.y = dot4(src0, row1);
995                 dst.z = dot4(src0, row2);
996                 dst.w = dot4(src0, row3);
997         }
998
999         void VertexProgram::BREAK()
1000         {
1001                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
1002                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1003
1004                 if(breakDepth == 0)
1005                 {
1006                         enableIndex = enableIndex - breakDepth;
1007                         Nucleus::createBr(endBlock);
1008                 }
1009                 else
1010                 {
1011                         enableBreak = enableBreak & ~enableStack[enableIndex];
1012                         Bool allBreak = SignMask(enableBreak) == 0x0;
1013
1014                         enableIndex = enableIndex - breakDepth;
1015                         branch(allBreak, endBlock, deadBlock);
1016                 }
1017
1018                 Nucleus::setInsertBlock(deadBlock);
1019                 enableIndex = enableIndex + breakDepth;
1020         }
1021
1022         void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1023         {
1024                 Int4 condition;
1025
1026                 switch(control)
1027                 {
1028                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1029                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1030                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1031                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1032                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1033                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1034                 default:
1035                         ASSERT(false);
1036                 }
1037
1038                 BREAK(condition);
1039         }
1040
1041         void VertexProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1042         {
1043                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1044
1045                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1046                 {
1047                         condition = ~condition;
1048                 }
1049
1050                 BREAK(condition);
1051         }
1052
1053         void VertexProgram::BREAK(Int4 &condition)
1054         {
1055                 condition &= enableStack[enableIndex];
1056
1057                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
1058                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1059
1060                 enableBreak = enableBreak & ~condition;
1061                 Bool allBreak = SignMask(enableBreak) == 0x0;
1062
1063                 enableIndex = enableIndex - breakDepth;
1064                 branch(allBreak, endBlock, continueBlock);
1065
1066                 Nucleus::setInsertBlock(continueBlock);
1067                 enableIndex = enableIndex + breakDepth;
1068         }
1069
1070         void VertexProgram::CONTINUE()
1071         {
1072                 enableContinue = enableContinue & ~enableStack[enableIndex];
1073         }
1074
1075         void VertexProgram::TEST()
1076         {
1077                 whileTest = true;
1078         }
1079
1080         void VertexProgram::CALL(int labelIndex, int callSiteIndex)
1081         {
1082                 if(!labelBlock[labelIndex])
1083                 {
1084                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1085                 }
1086
1087                 if(callRetBlock[labelIndex].size() > 1)
1088                 {
1089                         callStack[stackIndex++] = UInt(callSiteIndex);
1090                 }
1091
1092                 Int4 restoreLeave = enableLeave;
1093
1094                 Nucleus::createBr(labelBlock[labelIndex]);
1095                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1096
1097                 enableLeave = restoreLeave;
1098         }
1099
1100         void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1101         {
1102                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1103                 {
1104                         CALLNZb(labelIndex, callSiteIndex, src);
1105                 }
1106                 else if(src.type == Shader::PARAMETER_PREDICATE)
1107                 {
1108                         CALLNZp(labelIndex, callSiteIndex, src);
1109                 }
1110                 else ASSERT(false);
1111         }
1112
1113         void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1114         {
1115                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1116
1117                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1118                 {
1119                         condition = !condition;
1120                 }
1121
1122                 if(!labelBlock[labelIndex])
1123                 {
1124                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1125                 }
1126
1127                 if(callRetBlock[labelIndex].size() > 1)
1128                 {
1129                         callStack[stackIndex++] = UInt(callSiteIndex);
1130                 }
1131
1132                 Int4 restoreLeave = enableLeave;
1133
1134                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1135                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1136
1137                 enableLeave = restoreLeave;
1138         }
1139
1140         void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1141         {
1142                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1143
1144                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1145                 {
1146                         condition = ~condition;
1147                 }
1148
1149                 condition &= enableStack[enableIndex];
1150
1151                 if(!labelBlock[labelIndex])
1152                 {
1153                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1154                 }
1155
1156                 if(callRetBlock[labelIndex].size() > 1)
1157                 {
1158                         callStack[stackIndex++] = UInt(callSiteIndex);
1159                 }
1160
1161                 enableIndex++;
1162                 enableStack[enableIndex] = condition;
1163                 Int4 restoreLeave = enableLeave;
1164
1165                 Bool notAllFalse = SignMask(condition) != 0;
1166                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1167                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1168
1169                 enableIndex--;
1170                 enableLeave = restoreLeave;
1171         }
1172
1173         void VertexProgram::ELSE()
1174         {
1175                 ifDepth--;
1176
1177                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1178                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1179
1180                 if(isConditionalIf[ifDepth])
1181                 {
1182                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1183                         Bool notAllFalse = SignMask(condition) != 0;
1184
1185                         branch(notAllFalse, falseBlock, endBlock);
1186
1187                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1188                 }
1189                 else
1190                 {
1191                         Nucleus::createBr(endBlock);
1192                         Nucleus::setInsertBlock(falseBlock);
1193                 }
1194
1195                 ifFalseBlock[ifDepth] = endBlock;
1196
1197                 ifDepth++;
1198         }
1199
1200         void VertexProgram::ENDIF()
1201         {
1202                 ifDepth--;
1203
1204                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1205
1206                 Nucleus::createBr(endBlock);
1207                 Nucleus::setInsertBlock(endBlock);
1208
1209                 if(isConditionalIf[ifDepth])
1210                 {
1211                         breakDepth--;
1212                         enableIndex--;
1213                 }
1214         }
1215
1216         void VertexProgram::ENDLOOP()
1217         {
1218                 loopRepDepth--;
1219
1220                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1221
1222                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1223                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1224
1225                 Nucleus::createBr(testBlock);
1226                 Nucleus::setInsertBlock(endBlock);
1227
1228                 loopDepth--;
1229                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1230         }
1231
1232         void VertexProgram::ENDREP()
1233         {
1234                 loopRepDepth--;
1235
1236                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1237                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1238
1239                 Nucleus::createBr(testBlock);
1240                 Nucleus::setInsertBlock(endBlock);
1241
1242                 loopDepth--;
1243                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1244         }
1245
1246         void VertexProgram::ENDWHILE()
1247         {
1248                 loopRepDepth--;
1249
1250                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1251                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1252
1253                 Nucleus::createBr(testBlock);
1254                 Nucleus::setInsertBlock(endBlock);
1255
1256                 enableIndex--;
1257                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1258                 whileTest = false;
1259         }
1260
1261         void VertexProgram::ENDSWITCH()
1262         {
1263                 loopRepDepth--;
1264
1265                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1266
1267                 Nucleus::createBr(loopRepEndBlock[loopRepDepth]);
1268                 Nucleus::setInsertBlock(endBlock);
1269
1270                 enableIndex--;
1271                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1272         }
1273
1274         void VertexProgram::IF(const Src &src)
1275         {
1276                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1277                 {
1278                         IFb(src);
1279                 }
1280                 else if(src.type == Shader::PARAMETER_PREDICATE)
1281                 {
1282                         IFp(src);
1283                 }
1284                 else
1285                 {
1286                         Int4 condition = As<Int4>(fetchRegister(src).x);
1287                         IF(condition);
1288                 }
1289         }
1290
1291         void VertexProgram::IFb(const Src &boolRegister)
1292         {
1293                 ASSERT(ifDepth < 24 + 4);
1294
1295                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1296
1297                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1298                 {
1299                         condition = !condition;
1300                 }
1301
1302                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1303                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1304
1305                 branch(condition, trueBlock, falseBlock);
1306
1307                 isConditionalIf[ifDepth] = false;
1308                 ifFalseBlock[ifDepth] = falseBlock;
1309
1310                 ifDepth++;
1311         }
1312
1313         void VertexProgram::IFp(const Src &predicateRegister)
1314         {
1315                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1316
1317                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1318                 {
1319                         condition = ~condition;
1320                 }
1321
1322                 IF(condition);
1323         }
1324
1325         void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1326         {
1327                 Int4 condition;
1328
1329                 switch(control)
1330                 {
1331                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1332                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1333                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1334                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1335                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1336                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1337                 default:
1338                         ASSERT(false);
1339                 }
1340
1341                 IF(condition);
1342         }
1343
1344         void VertexProgram::IF(Int4 &condition)
1345         {
1346                 condition &= enableStack[enableIndex];
1347
1348                 enableIndex++;
1349                 enableStack[enableIndex] = condition;
1350
1351                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1352                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1353
1354                 Bool notAllFalse = SignMask(condition) != 0;
1355
1356                 branch(notAllFalse, trueBlock, falseBlock);
1357
1358                 isConditionalIf[ifDepth] = true;
1359                 ifFalseBlock[ifDepth] = falseBlock;
1360
1361                 ifDepth++;
1362                 breakDepth++;
1363         }
1364
1365         void VertexProgram::LABEL(int labelIndex)
1366         {
1367                 if(!labelBlock[labelIndex])
1368                 {
1369                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1370                 }
1371
1372                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1373                 currentLabel = labelIndex;
1374         }
1375
1376         void VertexProgram::LOOP(const Src &integerRegister)
1377         {
1378                 loopDepth++;
1379
1380                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1381                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1382                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1383
1384                 // FIXME: Compiles to two instructions?
1385                 If(increment[loopDepth] == 0)
1386                 {
1387                         increment[loopDepth] = 1;
1388                 }
1389
1390                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1391                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1392                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1393
1394                 loopRepTestBlock[loopRepDepth] = testBlock;
1395                 loopRepEndBlock[loopRepDepth] = endBlock;
1396
1397                 // FIXME: jump(testBlock)
1398                 Nucleus::createBr(testBlock);
1399                 Nucleus::setInsertBlock(testBlock);
1400
1401                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1402                 Nucleus::setInsertBlock(loopBlock);
1403
1404                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1405
1406                 loopRepDepth++;
1407                 breakDepth = 0;
1408         }
1409
1410         void VertexProgram::REP(const Src &integerRegister)
1411         {
1412                 loopDepth++;
1413
1414                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1415                 aL[loopDepth] = aL[loopDepth - 1];
1416
1417                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1418                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1419                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1420
1421                 loopRepTestBlock[loopRepDepth] = testBlock;
1422                 loopRepEndBlock[loopRepDepth] = endBlock;
1423
1424                 // FIXME: jump(testBlock)
1425                 Nucleus::createBr(testBlock);
1426                 Nucleus::setInsertBlock(testBlock);
1427
1428                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1429                 Nucleus::setInsertBlock(loopBlock);
1430
1431                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1432
1433                 loopRepDepth++;
1434                 breakDepth = 0;
1435         }
1436
1437         void VertexProgram::WHILE(const Src &temporaryRegister)
1438         {
1439                 enableIndex++;
1440
1441                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1442                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1443                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1444
1445                 loopRepTestBlock[loopRepDepth] = testBlock;
1446                 loopRepEndBlock[loopRepDepth] = endBlock;
1447
1448                 Int4 restoreBreak = enableBreak;
1449                 Int4 restoreContinue = enableContinue;
1450
1451                 // FIXME: jump(testBlock)
1452                 Nucleus::createBr(testBlock);
1453                 Nucleus::setInsertBlock(testBlock);
1454                 enableContinue = restoreContinue;
1455
1456                 const Vector4f &src = fetchRegister(temporaryRegister);
1457                 Int4 condition = As<Int4>(src.x);
1458                 condition &= enableStack[enableIndex - 1];
1459                 if(shader->containsLeaveInstruction()) condition &= enableLeave;
1460                 enableStack[enableIndex] = condition;
1461
1462                 Bool notAllFalse = SignMask(condition) != 0;
1463                 branch(notAllFalse, loopBlock, endBlock);
1464
1465                 Nucleus::setInsertBlock(endBlock);
1466                 enableBreak = restoreBreak;
1467
1468                 Nucleus::setInsertBlock(loopBlock);
1469
1470                 loopRepDepth++;
1471                 breakDepth = 0;
1472         }
1473
1474         void VertexProgram::SWITCH()
1475         {
1476                 enableIndex++;
1477                 enableStack[enableIndex] = Int4(0xFFFFFFFF);
1478
1479                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1480
1481                 loopRepTestBlock[loopRepDepth] = nullptr;
1482                 loopRepEndBlock[loopRepDepth] = endBlock;
1483
1484                 loopRepDepth++;
1485                 breakDepth = 0;
1486         }
1487
1488         void VertexProgram::RET()
1489         {
1490                 if(currentLabel == -1)
1491                 {
1492                         returnBlock = Nucleus::createBasicBlock();
1493                         Nucleus::createBr(returnBlock);
1494                 }
1495                 else
1496                 {
1497                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1498
1499                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1500                         {
1501                                 // FIXME: Encapsulate
1502                                 UInt index = callStack[--stackIndex];
1503
1504                                 llvm::Value *value = index.loadValue();
1505                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1506
1507                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1508                                 {
1509                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1510                                 }
1511                         }
1512                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1513                         {
1514                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1515                         }
1516                         else   // Function isn't called
1517                         {
1518                                 Nucleus::createBr(unreachableBlock);
1519                         }
1520
1521                         Nucleus::setInsertBlock(unreachableBlock);
1522                         Nucleus::createUnreachable();
1523                 }
1524         }
1525
1526         void VertexProgram::LEAVE()
1527         {
1528                 enableLeave = enableLeave & ~enableStack[enableIndex];
1529
1530                 // FIXME: Return from function if all instances left
1531                 // FIXME: Use enableLeave in other control-flow constructs
1532         }
1533
1534         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1)
1535         {
1536                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, Lod);
1537         }
1538
1539         void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
1540         {
1541                 Float4 lod0 = Float4(0.0f);
1542                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, lod0, Lod);
1543         }
1544
1545         void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1546         {
1547                 UNIMPLEMENTED();
1548         }
1549
1550         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src, const Src&, Vector4f &offset)
1551         {
1552                 UNIMPLEMENTED();
1553         }
1554
1555         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
1556         {
1557                 UNIMPLEMENTED();
1558         }
1559
1560         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &offset)
1561         {
1562                 UNIMPLEMENTED();
1563         }
1564
1565         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1566         {
1567                 UNIMPLEMENTED();
1568         }
1569
1570         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
1571         {
1572                 UNIMPLEMENTED();
1573         }
1574
1575         void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1576         {
1577                 Pointer<Byte> textureMipmap = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
1578                 for(int i = 0; i < 4; ++i)
1579                 {
1580                         Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap);
1581                         dst.x = Insert(dst.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
1582                         dst.y = Insert(dst.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
1583                         dst.z = Insert(dst.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
1584                 }
1585         }
1586
1587         void VertexProgram::sampleTexture(Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q, SamplerMethod method)
1588         {
1589                 Vector4f tmp;
1590
1591                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1592                 {
1593                         Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[TEXTURE_IMAGE_UNITS]) + s.index * sizeof(Texture);
1594                         sampler[s.index]->sampleTexture(texture, tmp, u, v, w, q, a0, a0, method);
1595                 }
1596                 else
1597                 {
1598                         Int index = As<Int>(Float(fetchRegister(s).x.x));
1599
1600                         for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
1601                         {
1602                                 if(shader->usesSampler(i))
1603                                 {
1604                                         If(index == i)
1605                                         {
1606                                                 Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[TEXTURE_IMAGE_UNITS]) + i * sizeof(Texture);
1607                                                 sampler[i]->sampleTexture(texture, tmp, u, v, w, q, a0, a0, method);
1608                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1609                                         }
1610                                 }
1611                         }
1612                 }
1613
1614                 c.x = tmp[(s.swizzle >> 0) & 0x3];
1615                 c.y = tmp[(s.swizzle >> 2) & 0x3];
1616                 c.z = tmp[(s.swizzle >> 4) & 0x3];
1617                 c.w = tmp[(s.swizzle >> 6) & 0x3];
1618         }
1619 }