OSDN Git Service

Apply the Apache 2.0 license.
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "VertexProgram.hpp"
16
17 #include "Renderer.hpp"
18 #include "VertexShader.hpp"
19 #include "Vertex.hpp"
20 #include "Half.hpp"
21 #include "SamplerCore.hpp"
22 #include "Debug.hpp"
23
24 namespace sw
25 {
26         VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
27                 : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
28         {
29                 ifDepth = 0;
30                 loopRepDepth = 0;
31                 breakDepth = 0;
32                 currentLabel = -1;
33                 whileTest = false;
34
35                 for(int i = 0; i < 2048; i++)
36                 {
37                         labelBlock[i] = 0;
38                 }
39
40                 loopDepth = -1;
41                 enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
42
43                 if(shader && shader->containsBreakInstruction())
44                 {
45                         enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
46                 }
47
48                 if(shader && shader->containsContinueInstruction())
49                 {
50                         enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
51                 }
52
53                 if(shader->instanceIdDeclared)
54                 {
55                         instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
56                 }
57         }
58
59         VertexProgram::~VertexProgram()
60         {
61                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
62                 {
63                         delete sampler[i];
64                 }
65         }
66
67         void VertexProgram::pipeline()
68         {
69                 for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
70                 {
71                         sampler[i] = new SamplerCore(constants, state.samplerState[i]);
72                 }
73
74                 if(!state.preTransformed)
75                 {
76                         program();
77                 }
78                 else
79                 {
80                         passThrough();
81                 }
82         }
83
84         void VertexProgram::program()
85         {
86         //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
87
88                 unsigned short version = shader->getVersion();
89
90                 enableIndex = 0;
91                 stackIndex = 0;
92
93                 if(shader->containsLeaveInstruction())
94                 {
95                         enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
96                 }
97
98                 // Create all call site return blocks up front
99                 for(size_t i = 0; i < shader->getLength(); i++)
100                 {
101                         const Shader::Instruction *instruction = shader->getInstruction(i);
102                         Shader::Opcode opcode = instruction->opcode;
103
104                         if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
105                         {
106                                 const Dst &dst = instruction->dst;
107
108                                 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
109                                 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
110                         }
111                 }
112
113                 for(size_t i = 0; i < shader->getLength(); i++)
114                 {
115                         const Shader::Instruction *instruction = shader->getInstruction(i);
116                         Shader::Opcode opcode = instruction->opcode;
117
118                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
119                         {
120                                 continue;
121                         }
122
123                         Dst dst = instruction->dst;
124                         Src src0 = instruction->src[0];
125                         Src src1 = instruction->src[1];
126                         Src src2 = instruction->src[2];
127                         Src src3 = instruction->src[3];
128                         Src src4 = instruction->src[4];
129
130                         bool predicate = instruction->predicate;
131                         Control control = instruction->control;
132                         bool integer = dst.type == Shader::PARAMETER_ADDR;
133                         bool pp = dst.partialPrecision;
134
135                         Vector4f d;
136                         Vector4f s0;
137                         Vector4f s1;
138                         Vector4f s2;
139                         Vector4f s3;
140                         Vector4f s4;
141
142                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
143                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
144                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
145                         if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
146                         if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
147
148                         switch(opcode)
149                         {
150                         case Shader::OPCODE_VS_1_0:                                     break;
151                         case Shader::OPCODE_VS_1_1:                                     break;
152                         case Shader::OPCODE_VS_2_0:                                     break;
153                         case Shader::OPCODE_VS_2_x:                                     break;
154                         case Shader::OPCODE_VS_2_sw:                                    break;
155                         case Shader::OPCODE_VS_3_0:                                     break;
156                         case Shader::OPCODE_VS_3_sw:                                    break;
157                         case Shader::OPCODE_DCL:                                        break;
158                         case Shader::OPCODE_DEF:                                        break;
159                         case Shader::OPCODE_DEFI:                                       break;
160                         case Shader::OPCODE_DEFB:                                       break;
161                         case Shader::OPCODE_NOP:                                        break;
162                         case Shader::OPCODE_ABS:        abs(d, s0);                     break;
163                         case Shader::OPCODE_IABS:       iabs(d, s0);                    break;
164                         case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
165                         case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
166                         case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
167                         case Shader::OPCODE_FORWARD1:   forward1(d, s0, s1, s2);        break;
168                         case Shader::OPCODE_FORWARD2:   forward2(d, s0, s1, s2);        break;
169                         case Shader::OPCODE_FORWARD3:   forward3(d, s0, s1, s2);        break;
170                         case Shader::OPCODE_FORWARD4:   forward4(d, s0, s1, s2);        break;
171                         case Shader::OPCODE_REFLECT1:   reflect1(d, s0, s1);            break;
172                         case Shader::OPCODE_REFLECT2:   reflect2(d, s0, s1);            break;
173                         case Shader::OPCODE_REFLECT3:   reflect3(d, s0, s1);            break;
174                         case Shader::OPCODE_REFLECT4:   reflect4(d, s0, s1);            break;
175                         case Shader::OPCODE_REFRACT1:   refract1(d, s0, s1, s2.x);      break;
176                         case Shader::OPCODE_REFRACT2:   refract2(d, s0, s1, s2.x);      break;
177                         case Shader::OPCODE_REFRACT3:   refract3(d, s0, s1, s2.x);      break;
178                         case Shader::OPCODE_REFRACT4:   refract4(d, s0, s1, s2.x);      break;
179                         case Shader::OPCODE_DP1:        dp1(d, s0, s1);                 break;
180                         case Shader::OPCODE_DP2:        dp2(d, s0, s1);                 break;
181                         case Shader::OPCODE_DP3:        dp3(d, s0, s1);                 break;
182                         case Shader::OPCODE_DP4:        dp4(d, s0, s1);                 break;
183                         case Shader::OPCODE_DET2:       det2(d, s0, s1);                break;
184                         case Shader::OPCODE_DET3:       det3(d, s0, s1, s2);            break;
185                         case Shader::OPCODE_DET4:       det4(d, s0, s1, s2, s3);        break;
186                         case Shader::OPCODE_ATT:        att(d, s0, s1);                 break;
187                         case Shader::OPCODE_EXP2X:      exp2x(d, s0, pp);               break;
188                         case Shader::OPCODE_EXP2:       exp2(d, s0, pp);                break;
189                         case Shader::OPCODE_EXPP:       expp(d, s0, version);           break;
190                         case Shader::OPCODE_EXP:        exp(d, s0, pp);                 break;
191                         case Shader::OPCODE_FRC:        frc(d, s0);                     break;
192                         case Shader::OPCODE_TRUNC:      trunc(d, s0);                   break;
193                         case Shader::OPCODE_FLOOR:      floor(d, s0);                   break;
194                         case Shader::OPCODE_ROUND:      round(d, s0);                   break;
195                         case Shader::OPCODE_ROUNDEVEN:  roundEven(d, s0);               break;
196                         case Shader::OPCODE_CEIL:       ceil(d, s0);                    break;
197                         case Shader::OPCODE_LIT:        lit(d, s0);                     break;
198                         case Shader::OPCODE_LOG2X:      log2x(d, s0, pp);               break;
199                         case Shader::OPCODE_LOG2:       log2(d, s0, pp);                break;
200                         case Shader::OPCODE_LOGP:       logp(d, s0, version);           break;
201                         case Shader::OPCODE_LOG:        log(d, s0, pp);                 break;
202                         case Shader::OPCODE_LRP:        lrp(d, s0, s1, s2);             break;
203                         case Shader::OPCODE_STEP:       step(d, s0, s1);                break;
204                         case Shader::OPCODE_SMOOTH:     smooth(d, s0, s1, s2);          break;
205                         case Shader::OPCODE_FLOATBITSTOINT:
206                         case Shader::OPCODE_FLOATBITSTOUINT:
207                         case Shader::OPCODE_INTBITSTOFLOAT:
208                         case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
209                         case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);      break;
210                         case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);      break;
211                         case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);       break;
212                         case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);    break;
213                         case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);    break;
214                         case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);     break;
215                         case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
216                         case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
217                         case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
218                         case Shader::OPCODE_M4X3:       M4X3(d, s0, src1);              break;
219                         case Shader::OPCODE_M4X4:       M4X4(d, s0, src1);              break;
220                         case Shader::OPCODE_MAD:        mad(d, s0, s1, s2);             break;
221                         case Shader::OPCODE_IMAD:       imad(d, s0, s1, s2);            break;
222                         case Shader::OPCODE_MAX:        max(d, s0, s1);                 break;
223                         case Shader::OPCODE_IMAX:       imax(d, s0, s1);                break;
224                         case Shader::OPCODE_UMAX:       umax(d, s0, s1);                break;
225                         case Shader::OPCODE_MIN:        min(d, s0, s1);                 break;
226                         case Shader::OPCODE_IMIN:       imin(d, s0, s1);                break;
227                         case Shader::OPCODE_UMIN:       umin(d, s0, s1);                break;
228                         case Shader::OPCODE_MOV:        mov(d, s0, integer);            break;
229                         case Shader::OPCODE_MOVA:       mov(d, s0, true);               break;
230                         case Shader::OPCODE_NEG:        neg(d, s0);                     break;
231                         case Shader::OPCODE_INEG:       ineg(d, s0);                    break;
232                         case Shader::OPCODE_F2B:        f2b(d, s0);                     break;
233                         case Shader::OPCODE_B2F:        b2f(d, s0);                     break;
234                         case Shader::OPCODE_F2I:        f2i(d, s0);                     break;
235                         case Shader::OPCODE_I2F:        i2f(d, s0);                     break;
236                         case Shader::OPCODE_F2U:        f2u(d, s0);                     break;
237                         case Shader::OPCODE_U2F:        u2f(d, s0);                     break;
238                         case Shader::OPCODE_I2B:        i2b(d, s0);                     break;
239                         case Shader::OPCODE_B2I:        b2i(d, s0);                     break;
240                         case Shader::OPCODE_MUL:        mul(d, s0, s1);                 break;
241                         case Shader::OPCODE_IMUL:       imul(d, s0, s1);                break;
242                         case Shader::OPCODE_NRM2:       nrm2(d, s0, pp);                break;
243                         case Shader::OPCODE_NRM3:       nrm3(d, s0, pp);                break;
244                         case Shader::OPCODE_NRM4:       nrm4(d, s0, pp);                break;
245                         case Shader::OPCODE_POWX:       powx(d, s0, s1, pp);            break;
246                         case Shader::OPCODE_POW:        pow(d, s0, s1, pp);             break;
247                         case Shader::OPCODE_RCPX:       rcpx(d, s0, pp);                break;
248                         case Shader::OPCODE_DIV:        div(d, s0, s1);                 break;
249                         case Shader::OPCODE_IDIV:       idiv(d, s0, s1);                break;
250                         case Shader::OPCODE_UDIV:       udiv(d, s0, s1);                break;
251                         case Shader::OPCODE_MOD:        mod(d, s0, s1);                 break;
252                         case Shader::OPCODE_IMOD:       imod(d, s0, s1);                break;
253                         case Shader::OPCODE_UMOD:       umod(d, s0, s1);                break;
254                         case Shader::OPCODE_SHL:        shl(d, s0, s1);                 break;
255                         case Shader::OPCODE_ISHR:       ishr(d, s0, s1);                break;
256                         case Shader::OPCODE_USHR:       ushr(d, s0, s1);                break;
257                         case Shader::OPCODE_RSQX:       rsqx(d, s0, pp);                break;
258                         case Shader::OPCODE_SQRT:       sqrt(d, s0, pp);                break;
259                         case Shader::OPCODE_RSQ:        rsq(d, s0, pp);                 break;
260                         case Shader::OPCODE_LEN2:       len2(d.x, s0, pp);              break;
261                         case Shader::OPCODE_LEN3:       len3(d.x, s0, pp);              break;
262                         case Shader::OPCODE_LEN4:       len4(d.x, s0, pp);              break;
263                         case Shader::OPCODE_DIST1:      dist1(d.x, s0, s1, pp);         break;
264                         case Shader::OPCODE_DIST2:      dist2(d.x, s0, s1, pp);         break;
265                         case Shader::OPCODE_DIST3:      dist3(d.x, s0, s1, pp);         break;
266                         case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
267                         case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
268                         case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
269                         case Shader::OPCODE_ISGN:       isgn(d, s0);                    break;
270                         case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
271                         case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
272                         case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
273                         case Shader::OPCODE_TAN:        tan(d, s0);                     break;
274                         case Shader::OPCODE_ACOS:       acos(d, s0);                    break;
275                         case Shader::OPCODE_ASIN:       asin(d, s0);                    break;
276                         case Shader::OPCODE_ATAN:       atan(d, s0);                    break;
277                         case Shader::OPCODE_ATAN2:      atan2(d, s0, s1);               break;
278                         case Shader::OPCODE_COSH:       cosh(d, s0, pp);                break;
279                         case Shader::OPCODE_SINH:       sinh(d, s0, pp);                break;
280                         case Shader::OPCODE_TANH:       tanh(d, s0, pp);                break;
281                         case Shader::OPCODE_ACOSH:      acosh(d, s0, pp);               break;
282                         case Shader::OPCODE_ASINH:      asinh(d, s0, pp);               break;
283                         case Shader::OPCODE_ATANH:      atanh(d, s0, pp);               break;
284                         case Shader::OPCODE_SLT:        slt(d, s0, s1);                 break;
285                         case Shader::OPCODE_SUB:        sub(d, s0, s1);                 break;
286                         case Shader::OPCODE_ISUB:       isub(d, s0, s1);                break;
287                         case Shader::OPCODE_BREAK:      BREAK();                        break;
288                         case Shader::OPCODE_BREAKC:     BREAKC(s0, s1, control);        break;
289                         case Shader::OPCODE_BREAKP:     BREAKP(src0);                   break;
290                         case Shader::OPCODE_CONTINUE:   CONTINUE();                     break;
291                         case Shader::OPCODE_TEST:       TEST();                         break;
292                         case Shader::OPCODE_CALL:       CALL(dst.label, dst.callSite);  break;
293                         case Shader::OPCODE_CALLNZ:     CALLNZ(dst.label, dst.callSite, src0); break;
294                         case Shader::OPCODE_ELSE:       ELSE();                         break;
295                         case Shader::OPCODE_ENDIF:      ENDIF();                        break;
296                         case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
297                         case Shader::OPCODE_ENDREP:     ENDREP();                       break;
298                         case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
299                         case Shader::OPCODE_IF:         IF(src0);                       break;
300                         case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
301                         case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
302                         case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
303                         case Shader::OPCODE_REP:        REP(src0);                      break;
304                         case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
305                         case Shader::OPCODE_RET:        RET();                          break;
306                         case Shader::OPCODE_LEAVE:      LEAVE();                        break;
307                         case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
308                         case Shader::OPCODE_ICMP:       icmp(d, s0, s1, control);       break;
309                         case Shader::OPCODE_UCMP:       ucmp(d, s0, s1, control);       break;
310                         case Shader::OPCODE_SELECT:     select(d, s0, s1, s2);          break;
311                         case Shader::OPCODE_EXTRACT:    extract(d.x, s0, s1.x);         break;
312                         case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
313                         case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
314                         case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
315                         case Shader::OPCODE_NOT:        not(d, s0);                     break;
316                         case Shader::OPCODE_OR:         or(d, s0, s1);                  break;
317                         case Shader::OPCODE_XOR:        xor(d, s0, s1);                 break;
318                         case Shader::OPCODE_AND:        and(d, s0, s1);                 break;
319                         case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
320                         case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
321                         case Shader::OPCODE_TEXLDL:     TEXLDL(d, s0, src1);            break;
322                         case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
323                         case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2, s3); break;
324                         case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2);      break;
325                         case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2);    break;
326                         case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2, s3); break;
327                         case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
328                         case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break;
329                         case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
330                         case Shader::OPCODE_END:                                        break;
331                         default:
332                                 ASSERT(false);
333                         }
334
335                         if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_NOP)
336                         {
337                                 if(dst.integer)
338                                 {
339                                         switch(opcode)
340                                         {
341                                         case Shader::OPCODE_DIV:
342                                                 if(dst.x) d.x = Trunc(d.x);
343                                                 if(dst.y) d.y = Trunc(d.y);
344                                                 if(dst.z) d.z = Trunc(d.z);
345                                                 if(dst.w) d.w = Trunc(d.w);
346                                                 break;
347                                         default:
348                                                 break;   // No truncation to integer required when arguments are integer
349                                         }
350                                 }
351
352                                 if(dst.saturate)
353                                 {
354                                         if(dst.x) d.x = Max(d.x, Float4(0.0f));
355                                         if(dst.y) d.y = Max(d.y, Float4(0.0f));
356                                         if(dst.z) d.z = Max(d.z, Float4(0.0f));
357                                         if(dst.w) d.w = Max(d.w, Float4(0.0f));
358
359                                         if(dst.x) d.x = Min(d.x, Float4(1.0f));
360                                         if(dst.y) d.y = Min(d.y, Float4(1.0f));
361                                         if(dst.z) d.z = Min(d.z, Float4(1.0f));
362                                         if(dst.w) d.w = Min(d.w, Float4(1.0f));
363                                 }
364
365                                 if(instruction->isPredicated())
366                                 {
367                                         Vector4f pDst;   // FIXME: Rename
368
369                                         switch(dst.type)
370                                         {
371                                         case Shader::PARAMETER_VOID: break;
372                                         case Shader::PARAMETER_TEMP:
373                                                 if(dst.rel.type == Shader::PARAMETER_VOID)
374                                                 {
375                                                         if(dst.x) pDst.x = r[dst.index].x;
376                                                         if(dst.y) pDst.y = r[dst.index].y;
377                                                         if(dst.z) pDst.z = r[dst.index].z;
378                                                         if(dst.w) pDst.w = r[dst.index].w;
379                                                 }
380                                                 else
381                                                 {
382                                                         Int a = relativeAddress(dst);
383
384                                                         if(dst.x) pDst.x = r[dst.index + a].x;
385                                                         if(dst.y) pDst.y = r[dst.index + a].y;
386                                                         if(dst.z) pDst.z = r[dst.index + a].z;
387                                                         if(dst.w) pDst.w = r[dst.index + a].w;
388                                                 }
389                                                 break;
390                                         case Shader::PARAMETER_ADDR: pDst = a0; break;
391                                         case Shader::PARAMETER_RASTOUT:
392                                                 switch(dst.index)
393                                                 {
394                                                 case 0:
395                                                         if(dst.x) pDst.x = o[Pos].x;
396                                                         if(dst.y) pDst.y = o[Pos].y;
397                                                         if(dst.z) pDst.z = o[Pos].z;
398                                                         if(dst.w) pDst.w = o[Pos].w;
399                                                         break;
400                                                 case 1:
401                                                         pDst.x = o[Fog].x;
402                                                         break;
403                                                 case 2:
404                                                         pDst.x = o[Pts].y;
405                                                         break;
406                                                 default:
407                                                         ASSERT(false);
408                                                 }
409                                                 break;
410                                         case Shader::PARAMETER_ATTROUT:
411                                                 if(dst.x) pDst.x = o[D0 + dst.index].x;
412                                                 if(dst.y) pDst.y = o[D0 + dst.index].y;
413                                                 if(dst.z) pDst.z = o[D0 + dst.index].z;
414                                                 if(dst.w) pDst.w = o[D0 + dst.index].w;
415                                                 break;
416                                         case Shader::PARAMETER_TEXCRDOUT:
417                                 //      case Shader::PARAMETER_OUTPUT:
418                                                 if(version < 0x0300)
419                                                 {
420                                                         if(dst.x) pDst.x = o[T0 + dst.index].x;
421                                                         if(dst.y) pDst.y = o[T0 + dst.index].y;
422                                                         if(dst.z) pDst.z = o[T0 + dst.index].z;
423                                                         if(dst.w) pDst.w = o[T0 + dst.index].w;
424                                                 }
425                                                 else
426                                                 {
427                                                         if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
428                                                         {
429                                                                 if(dst.x) pDst.x = o[dst.index].x;
430                                                                 if(dst.y) pDst.y = o[dst.index].y;
431                                                                 if(dst.z) pDst.z = o[dst.index].z;
432                                                                 if(dst.w) pDst.w = o[dst.index].w;
433                                                         }
434                                                         else
435                                                         {
436                                                                 Int a = relativeAddress(dst);
437
438                                                                 if(dst.x) pDst.x = o[dst.index + a].x;
439                                                                 if(dst.y) pDst.y = o[dst.index + a].y;
440                                                                 if(dst.z) pDst.z = o[dst.index + a].z;
441                                                                 if(dst.w) pDst.w = o[dst.index + a].w;
442                                                         }
443                                                 }
444                                                 break;
445                                         case Shader::PARAMETER_LABEL:                break;
446                                         case Shader::PARAMETER_PREDICATE: pDst = p0; break;
447                                         case Shader::PARAMETER_INPUT:                break;
448                                         default:
449                                                 ASSERT(false);
450                                         }
451
452                                         Int4 enable = enableMask(instruction);
453
454                                         Int4 xEnable = enable;
455                                         Int4 yEnable = enable;
456                                         Int4 zEnable = enable;
457                                         Int4 wEnable = enable;
458
459                                         if(predicate)
460                                         {
461                                                 unsigned char pSwizzle = instruction->predicateSwizzle;
462
463                                                 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03];
464                                                 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03];
465                                                 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03];
466                                                 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03];
467
468                                                 if(!instruction->predicateNot)
469                                                 {
470                                                         if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
471                                                         if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
472                                                         if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
473                                                         if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
474                                                 }
475                                                 else
476                                                 {
477                                                         if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
478                                                         if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
479                                                         if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
480                                                         if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
481                                                 }
482                                         }
483
484                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
485                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
486                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
487                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
488
489                                         if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
490                                         if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
491                                         if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
492                                         if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
493                                 }
494
495                                 switch(dst.type)
496                                 {
497                                 case Shader::PARAMETER_VOID:
498                                         break;
499                                 case Shader::PARAMETER_TEMP:
500                                         if(dst.rel.type == Shader::PARAMETER_VOID)
501                                         {
502                                                 if(dst.x) r[dst.index].x = d.x;
503                                                 if(dst.y) r[dst.index].y = d.y;
504                                                 if(dst.z) r[dst.index].z = d.z;
505                                                 if(dst.w) r[dst.index].w = d.w;
506                                         }
507                                         else
508                                         {
509                                                 Int a = relativeAddress(dst);
510
511                                                 if(dst.x) r[dst.index + a].x = d.x;
512                                                 if(dst.y) r[dst.index + a].y = d.y;
513                                                 if(dst.z) r[dst.index + a].z = d.z;
514                                                 if(dst.w) r[dst.index + a].w = d.w;
515                                         }
516                                         break;
517                                 case Shader::PARAMETER_ADDR:
518                                         if(dst.x) a0.x = d.x;
519                                         if(dst.y) a0.y = d.y;
520                                         if(dst.z) a0.z = d.z;
521                                         if(dst.w) a0.w = d.w;
522                                         break;
523                                 case Shader::PARAMETER_RASTOUT:
524                                         switch(dst.index)
525                                         {
526                                         case 0:
527                                                 if(dst.x) o[Pos].x = d.x;
528                                                 if(dst.y) o[Pos].y = d.y;
529                                                 if(dst.z) o[Pos].z = d.z;
530                                                 if(dst.w) o[Pos].w = d.w;
531                                                 break;
532                                         case 1:
533                                                 o[Fog].x = d.x;
534                                                 break;
535                                         case 2:
536                                                 o[Pts].y = d.x;
537                                                 break;
538                                         default:        ASSERT(false);
539                                         }
540                                         break;
541                                 case Shader::PARAMETER_ATTROUT:
542                                         if(dst.x) o[D0 + dst.index].x = d.x;
543                                         if(dst.y) o[D0 + dst.index].y = d.y;
544                                         if(dst.z) o[D0 + dst.index].z = d.z;
545                                         if(dst.w) o[D0 + dst.index].w = d.w;
546                                         break;
547                                 case Shader::PARAMETER_TEXCRDOUT:
548                         //      case Shader::PARAMETER_OUTPUT:
549                                         if(version < 0x0300)
550                                         {
551                                                 if(dst.x) o[T0 + dst.index].x = d.x;
552                                                 if(dst.y) o[T0 + dst.index].y = d.y;
553                                                 if(dst.z) o[T0 + dst.index].z = d.z;
554                                                 if(dst.w) o[T0 + dst.index].w = d.w;
555                                         }
556                                         else
557                                         {
558                                                 if(dst.rel.type == Shader::PARAMETER_VOID)   // Not relative
559                                                 {
560                                                         if(dst.x) o[dst.index].x = d.x;
561                                                         if(dst.y) o[dst.index].y = d.y;
562                                                         if(dst.z) o[dst.index].z = d.z;
563                                                         if(dst.w) o[dst.index].w = d.w;
564                                                 }
565                                                 else
566                                                 {
567                                                         Int a = relativeAddress(dst);
568
569                                                         if(dst.x) o[dst.index + a].x = d.x;
570                                                         if(dst.y) o[dst.index + a].y = d.y;
571                                                         if(dst.z) o[dst.index + a].z = d.z;
572                                                         if(dst.w) o[dst.index + a].w = d.w;
573                                                 }
574                                         }
575                                         break;
576                                 case Shader::PARAMETER_LABEL:             break;
577                                 case Shader::PARAMETER_PREDICATE: p0 = d; break;
578                                 case Shader::PARAMETER_INPUT:             break;
579                                 default:
580                                         ASSERT(false);
581                                 }
582                         }
583                 }
584
585                 if(currentLabel != -1)
586                 {
587                         Nucleus::setInsertBlock(returnBlock);
588                 }
589         }
590
591         void VertexProgram::passThrough()
592         {
593                 if(shader)
594                 {
595                         for(int i = 0; i < 12; i++)
596                         {
597                                 unsigned char usage = shader->output[i][0].usage;
598
599                                 switch(usage)
600                                 {
601                                 case 0xFF:
602                                         continue;
603                                 case Shader::USAGE_PSIZE:
604                                         o[i].y = v[i].x;
605                                         break;
606                                 case Shader::USAGE_TEXCOORD:
607                                         o[i].x = v[i].x;
608                                         o[i].y = v[i].y;
609                                         o[i].z = v[i].z;
610                                         o[i].w = v[i].w;
611                                         break;
612                                 case Shader::USAGE_POSITION:
613                                         o[i].x = v[i].x;
614                                         o[i].y = v[i].y;
615                                         o[i].z = v[i].z;
616                                         o[i].w = v[i].w;
617                                         break;
618                                 case Shader::USAGE_COLOR:
619                                         o[i].x = v[i].x;
620                                         o[i].y = v[i].y;
621                                         o[i].z = v[i].z;
622                                         o[i].w = v[i].w;
623                                         break;
624                                 case Shader::USAGE_FOG:
625                                         o[i].x = v[i].x;
626                                         break;
627                                 default:
628                                         ASSERT(false);
629                                 }
630                         }
631                 }
632                 else
633                 {
634                         o[Pos].x = v[PositionT].x;
635                         o[Pos].y = v[PositionT].y;
636                         o[Pos].z = v[PositionT].z;
637                         o[Pos].w = v[PositionT].w;
638
639                         for(int i = 0; i < 2; i++)
640                         {
641                                 o[D0 + i].x = v[Color0 + i].x;
642                                 o[D0 + i].y = v[Color0 + i].y;
643                                 o[D0 + i].z = v[Color0 + i].z;
644                                 o[D0 + i].w = v[Color0 + i].w;
645                         }
646
647                         for(int i = 0; i < 8; i++)
648                         {
649                                 o[T0 + i].x = v[TexCoord0 + i].x;
650                                 o[T0 + i].y = v[TexCoord0 + i].y;
651                                 o[T0 + i].z = v[TexCoord0 + i].z;
652                                 o[T0 + i].w = v[TexCoord0 + i].w;
653                         }
654
655                         o[Pts].y = v[PointSize].x;
656                 }
657         }
658
659         Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset)
660         {
661                 Vector4f reg;
662                 unsigned int i = src.index + offset;
663
664                 switch(src.type)
665                 {
666                 case Shader::PARAMETER_TEMP:
667                         if(src.rel.type == Shader::PARAMETER_VOID)
668                         {
669                                 reg = r[i];
670                         }
671                         else
672                         {
673                                 reg = r[i + relativeAddress(src, src.bufferIndex)];
674                         }
675                         break;
676                 case Shader::PARAMETER_CONST:
677                         reg = readConstant(src, offset);
678                         break;
679                 case Shader::PARAMETER_INPUT:
680                         if(src.rel.type == Shader::PARAMETER_VOID)
681                         {
682                                 reg = v[i];
683                         }
684                         else
685                         {
686                                 reg = v[i + relativeAddress(src, src.bufferIndex)];
687                         }
688                         break;
689                 case Shader::PARAMETER_VOID: return r[0];   // Dummy
690                 case Shader::PARAMETER_FLOAT4LITERAL:
691                         reg.x = Float4(src.value[0]);
692                         reg.y = Float4(src.value[1]);
693                         reg.z = Float4(src.value[2]);
694                         reg.w = Float4(src.value[3]);
695                         break;
696                 case Shader::PARAMETER_ADDR:      reg = a0; break;
697                 case Shader::PARAMETER_CONSTBOOL: return r[0];   // Dummy
698                 case Shader::PARAMETER_CONSTINT:  return r[0];   // Dummy
699                 case Shader::PARAMETER_LOOP:      return r[0];   // Dummy
700                 case Shader::PARAMETER_PREDICATE: return r[0];   // Dummy
701                 case Shader::PARAMETER_SAMPLER:
702                         if(src.rel.type == Shader::PARAMETER_VOID)
703                         {
704                                 reg.x = As<Float4>(Int4(i));
705                         }
706                         else if(src.rel.type == Shader::PARAMETER_TEMP)
707                         {
708                                 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x));
709                         }
710                         return reg;
711                 case Shader::PARAMETER_OUTPUT:
712                         if(src.rel.type == Shader::PARAMETER_VOID)
713                         {
714                                 reg = o[i];
715                         }
716                         else
717                         {
718                                 reg = o[i + relativeAddress(src, src.bufferIndex)];
719                         }
720                         break;
721                 case Shader::PARAMETER_MISCTYPE:
722                         reg.x = As<Float>(Int(instanceID));
723                         return reg;
724                 default:
725                         ASSERT(false);
726                 }
727
728                 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
729                 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
730                 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
731                 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
732
733                 Vector4f mod;
734
735                 switch(src.modifier)
736                 {
737                 case Shader::MODIFIER_NONE:
738                         mod.x = x;
739                         mod.y = y;
740                         mod.z = z;
741                         mod.w = w;
742                         break;
743                 case Shader::MODIFIER_NEGATE:
744                         mod.x = -x;
745                         mod.y = -y;
746                         mod.z = -z;
747                         mod.w = -w;
748                         break;
749                 case Shader::MODIFIER_ABS:
750                         mod.x = Abs(x);
751                         mod.y = Abs(y);
752                         mod.z = Abs(z);
753                         mod.w = Abs(w);
754                         break;
755                 case Shader::MODIFIER_ABS_NEGATE:
756                         mod.x = -Abs(x);
757                         mod.y = -Abs(y);
758                         mod.z = -Abs(z);
759                         mod.w = -Abs(w);
760                         break;
761                 case Shader::MODIFIER_NOT:
762                         mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
763                         mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
764                         mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
765                         mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
766                         break;
767                 default:
768                         ASSERT(false);
769                 }
770
771                 return mod;
772         }
773
774         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index)
775         {
776                 if(bufferIndex == -1)
777                 {
778                         return data + OFFSET(DrawData, vs.c[index]);
779                 }
780                 else
781                 {
782                         return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.u[bufferIndex])) + index;
783                 }
784         }
785
786         RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
787         {
788                 return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
789         }
790
791         Vector4f VertexProgram::readConstant(const Src &src, unsigned int offset)
792         {
793                 Vector4f c;
794                 unsigned int i = src.index + offset;
795
796                 if(src.rel.type == Shader::PARAMETER_VOID)   // Not relative
797                 {
798                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i));
799
800                         c.x = c.x.xxxx;
801                         c.y = c.y.yyyy;
802                         c.z = c.z.zzzz;
803                         c.w = c.w.wwww;
804
805                         if(shader->containsDefineInstruction())   // Constant may be known at compile time
806                         {
807                                 for(size_t j = 0; j < shader->getLength(); j++)
808                                 {
809                                         const Shader::Instruction &instruction = *shader->getInstruction(j);
810
811                                         if(instruction.opcode == Shader::OPCODE_DEF)
812                                         {
813                                                 if(instruction.dst.index == i)
814                                                 {
815                                                         c.x = Float4(instruction.src[0].value[0]);
816                                                         c.y = Float4(instruction.src[0].value[1]);
817                                                         c.z = Float4(instruction.src[0].value[2]);
818                                                         c.w = Float4(instruction.src[0].value[3]);
819
820                                                         break;
821                                                 }
822                                         }
823                                 }
824                         }
825                 }
826                 else if(src.rel.type == Shader::PARAMETER_LOOP)
827                 {
828                         Int loopCounter = aL[loopDepth];
829
830                         c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
831
832                         c.x = c.x.xxxx;
833                         c.y = c.y.yyyy;
834                         c.z = c.z.zzzz;
835                         c.w = c.w.wwww;
836                 }
837                 else
838                 {
839                         if(src.rel.deterministic)
840                         {
841                                 Int a = relativeAddress(src, src.bufferIndex);
842
843                                 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
844
845                                 c.x = c.x.xxxx;
846                                 c.y = c.y.yyyy;
847                                 c.z = c.z.zzzz;
848                                 c.w = c.w.wwww;
849                         }
850                         else
851                         {
852                                 int component = src.rel.swizzle & 0x03;
853                                 Float4 a;
854
855                                 switch(src.rel.type)
856                                 {
857                                 case Shader::PARAMETER_ADDR:   a = a0[component]; break;
858                                 case Shader::PARAMETER_TEMP:   a = r[src.rel.index][component]; break;
859                                 case Shader::PARAMETER_INPUT:  a = v[src.rel.index][component]; break;
860                                 case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
861                                 case Shader::PARAMETER_CONST:  a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
862                                 default: ASSERT(false);
863                                 }
864
865                                 Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
866
867                                 index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS));   // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
868
869                                 Int index0 = Extract(index, 0);
870                                 Int index1 = Extract(index, 1);
871                                 Int index2 = Extract(index, 2);
872                                 Int index3 = Extract(index, 3);
873
874                                 c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
875                                 c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
876                                 c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
877                                 c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
878
879                                 transpose4x4(c.x, c.y, c.z, c.w);
880                         }
881                 }
882
883                 return c;
884         }
885
886         Int VertexProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
887         {
888                 ASSERT(var.rel.deterministic);
889
890                 if(var.rel.type == Shader::PARAMETER_TEMP)
891                 {
892                         return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
893                 }
894                 else if(var.rel.type == Shader::PARAMETER_INPUT)
895                 {
896                         return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
897                 }
898                 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
899                 {
900                         return As<Int>(Extract(o[var.rel.index].x, 0)) * var.rel.scale;
901                 }
902                 else if(var.rel.type == Shader::PARAMETER_CONST)
903                 {
904                         return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
905                 }
906                 else if(var.rel.type == Shader::PARAMETER_LOOP)
907                 {
908                         return aL[loopDepth];
909                 }
910                 else ASSERT(false);
911
912                 return 0;
913         }
914
915         Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
916         {
917                 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
918
919                 if(!whileTest)
920                 {
921                         if(shader->containsBreakInstruction() && instruction->analysisBreak)
922                         {
923                                 enable &= enableBreak;
924                         }
925
926                         if(shader->containsContinueInstruction() && instruction->analysisContinue)
927                         {
928                                 enable &= enableContinue;
929                         }
930
931                         if(shader->containsLeaveInstruction() && instruction->analysisLeave)
932                         {
933                                 enable &= enableLeave;
934                         }
935                 }
936
937                 return enable;
938         }
939
940         void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
941         {
942                 Vector4f row0 = fetchRegister(src1, 0);
943                 Vector4f row1 = fetchRegister(src1, 1);
944
945                 dst.x = dot3(src0, row0);
946                 dst.y = dot3(src0, row1);
947         }
948
949         void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
950         {
951                 Vector4f row0 = fetchRegister(src1, 0);
952                 Vector4f row1 = fetchRegister(src1, 1);
953                 Vector4f row2 = fetchRegister(src1, 2);
954
955                 dst.x = dot3(src0, row0);
956                 dst.y = dot3(src0, row1);
957                 dst.z = dot3(src0, row2);
958         }
959
960         void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
961         {
962                 Vector4f row0 = fetchRegister(src1, 0);
963                 Vector4f row1 = fetchRegister(src1, 1);
964                 Vector4f row2 = fetchRegister(src1, 2);
965                 Vector4f row3 = fetchRegister(src1, 3);
966
967                 dst.x = dot3(src0, row0);
968                 dst.y = dot3(src0, row1);
969                 dst.z = dot3(src0, row2);
970                 dst.w = dot3(src0, row3);
971         }
972
973         void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
974         {
975                 Vector4f row0 = fetchRegister(src1, 0);
976                 Vector4f row1 = fetchRegister(src1, 1);
977                 Vector4f row2 = fetchRegister(src1, 2);
978
979                 dst.x = dot4(src0, row0);
980                 dst.y = dot4(src0, row1);
981                 dst.z = dot4(src0, row2);
982         }
983
984         void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
985         {
986                 Vector4f row0 = fetchRegister(src1, 0);
987                 Vector4f row1 = fetchRegister(src1, 1);
988                 Vector4f row2 = fetchRegister(src1, 2);
989                 Vector4f row3 = fetchRegister(src1, 3);
990
991                 dst.x = dot4(src0, row0);
992                 dst.y = dot4(src0, row1);
993                 dst.z = dot4(src0, row2);
994                 dst.w = dot4(src0, row3);
995         }
996
997         void VertexProgram::BREAK()
998         {
999                 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
1000                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1001
1002                 if(breakDepth == 0)
1003                 {
1004                         enableIndex = enableIndex - breakDepth;
1005                         Nucleus::createBr(endBlock);
1006                 }
1007                 else
1008                 {
1009                         enableBreak = enableBreak & ~enableStack[enableIndex];
1010                         Bool allBreak = SignMask(enableBreak) == 0x0;
1011
1012                         enableIndex = enableIndex - breakDepth;
1013                         branch(allBreak, endBlock, deadBlock);
1014                 }
1015
1016                 Nucleus::setInsertBlock(deadBlock);
1017                 enableIndex = enableIndex + breakDepth;
1018         }
1019
1020         void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control)
1021         {
1022                 Int4 condition;
1023
1024                 switch(control)
1025                 {
1026                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1027                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1028                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1029                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1030                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1031                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1032                 default:
1033                         ASSERT(false);
1034                 }
1035
1036                 BREAK(condition);
1037         }
1038
1039         void VertexProgram::BREAKP(const Src &predicateRegister)   // FIXME: Factor out parts common with BREAKC
1040         {
1041                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1042
1043                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1044                 {
1045                         condition = ~condition;
1046                 }
1047
1048                 BREAK(condition);
1049         }
1050
1051         void VertexProgram::BREAK(Int4 &condition)
1052         {
1053                 condition &= enableStack[enableIndex];
1054
1055                 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
1056                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
1057
1058                 enableBreak = enableBreak & ~condition;
1059                 Bool allBreak = SignMask(enableBreak) == 0x0;
1060
1061                 enableIndex = enableIndex - breakDepth;
1062                 branch(allBreak, endBlock, continueBlock);
1063
1064                 Nucleus::setInsertBlock(continueBlock);
1065                 enableIndex = enableIndex + breakDepth;
1066         }
1067
1068         void VertexProgram::CONTINUE()
1069         {
1070                 enableContinue = enableContinue & ~enableStack[enableIndex];
1071         }
1072
1073         void VertexProgram::TEST()
1074         {
1075                 whileTest = true;
1076         }
1077
1078         void VertexProgram::CALL(int labelIndex, int callSiteIndex)
1079         {
1080                 if(!labelBlock[labelIndex])
1081                 {
1082                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1083                 }
1084
1085                 if(callRetBlock[labelIndex].size() > 1)
1086                 {
1087                         callStack[stackIndex++] = UInt(callSiteIndex);
1088                 }
1089
1090                 Int4 restoreLeave = enableLeave;
1091
1092                 Nucleus::createBr(labelBlock[labelIndex]);
1093                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1094
1095                 enableLeave = restoreLeave;
1096         }
1097
1098         void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src)
1099         {
1100                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1101                 {
1102                         CALLNZb(labelIndex, callSiteIndex, src);
1103                 }
1104                 else if(src.type == Shader::PARAMETER_PREDICATE)
1105                 {
1106                         CALLNZp(labelIndex, callSiteIndex, src);
1107                 }
1108                 else ASSERT(false);
1109         }
1110
1111         void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister)
1112         {
1113                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1114
1115                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1116                 {
1117                         condition = !condition;
1118                 }
1119
1120                 if(!labelBlock[labelIndex])
1121                 {
1122                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1123                 }
1124
1125                 if(callRetBlock[labelIndex].size() > 1)
1126                 {
1127                         callStack[stackIndex++] = UInt(callSiteIndex);
1128                 }
1129
1130                 Int4 restoreLeave = enableLeave;
1131
1132                 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1133                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1134
1135                 enableLeave = restoreLeave;
1136         }
1137
1138         void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister)
1139         {
1140                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1141
1142                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1143                 {
1144                         condition = ~condition;
1145                 }
1146
1147                 condition &= enableStack[enableIndex];
1148
1149                 if(!labelBlock[labelIndex])
1150                 {
1151                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1152                 }
1153
1154                 if(callRetBlock[labelIndex].size() > 1)
1155                 {
1156                         callStack[stackIndex++] = UInt(callSiteIndex);
1157                 }
1158
1159                 enableIndex++;
1160                 enableStack[enableIndex] = condition;
1161                 Int4 restoreLeave = enableLeave;
1162
1163                 Bool notAllFalse = SignMask(condition) != 0;
1164                 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
1165                 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
1166
1167                 enableIndex--;
1168                 enableLeave = restoreLeave;
1169         }
1170
1171         void VertexProgram::ELSE()
1172         {
1173                 ifDepth--;
1174
1175                 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
1176                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1177
1178                 if(isConditionalIf[ifDepth])
1179                 {
1180                         Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1181                         Bool notAllFalse = SignMask(condition) != 0;
1182
1183                         branch(notAllFalse, falseBlock, endBlock);
1184
1185                         enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1];
1186                 }
1187                 else
1188                 {
1189                         Nucleus::createBr(endBlock);
1190                         Nucleus::setInsertBlock(falseBlock);
1191                 }
1192
1193                 ifFalseBlock[ifDepth] = endBlock;
1194
1195                 ifDepth++;
1196         }
1197
1198         void VertexProgram::ENDIF()
1199         {
1200                 ifDepth--;
1201
1202                 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
1203
1204                 Nucleus::createBr(endBlock);
1205                 Nucleus::setInsertBlock(endBlock);
1206
1207                 if(isConditionalIf[ifDepth])
1208                 {
1209                         breakDepth--;
1210                         enableIndex--;
1211                 }
1212         }
1213
1214         void VertexProgram::ENDLOOP()
1215         {
1216                 loopRepDepth--;
1217
1218                 aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
1219
1220                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1221                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1222
1223                 Nucleus::createBr(testBlock);
1224                 Nucleus::setInsertBlock(endBlock);
1225
1226                 loopDepth--;
1227                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1228         }
1229
1230         void VertexProgram::ENDREP()
1231         {
1232                 loopRepDepth--;
1233
1234                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1235                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1236
1237                 Nucleus::createBr(testBlock);
1238                 Nucleus::setInsertBlock(endBlock);
1239
1240                 loopDepth--;
1241                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1242         }
1243
1244         void VertexProgram::ENDWHILE()
1245         {
1246                 loopRepDepth--;
1247
1248                 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
1249                 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
1250
1251                 Nucleus::createBr(testBlock);
1252                 Nucleus::setInsertBlock(endBlock);
1253
1254                 enableIndex--;
1255                 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
1256                 whileTest = false;
1257         }
1258
1259         void VertexProgram::IF(const Src &src)
1260         {
1261                 if(src.type == Shader::PARAMETER_CONSTBOOL)
1262                 {
1263                         IFb(src);
1264                 }
1265                 else if(src.type == Shader::PARAMETER_PREDICATE)
1266                 {
1267                         IFp(src);
1268                 }
1269                 else
1270                 {
1271                         Int4 condition = As<Int4>(fetchRegister(src).x);
1272                         IF(condition);
1273                 }
1274         }
1275
1276         void VertexProgram::IFb(const Src &boolRegister)
1277         {
1278                 ASSERT(ifDepth < 24 + 4);
1279
1280                 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0));   // FIXME
1281
1282                 if(boolRegister.modifier == Shader::MODIFIER_NOT)
1283                 {
1284                         condition = !condition;
1285                 }
1286
1287                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1288                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1289
1290                 branch(condition, trueBlock, falseBlock);
1291
1292                 isConditionalIf[ifDepth] = false;
1293                 ifFalseBlock[ifDepth] = falseBlock;
1294
1295                 ifDepth++;
1296         }
1297
1298         void VertexProgram::IFp(const Src &predicateRegister)
1299         {
1300                 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]);
1301
1302                 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
1303                 {
1304                         condition = ~condition;
1305                 }
1306
1307                 IF(condition);
1308         }
1309
1310         void VertexProgram::IFC(Vector4f &src0, Vector4f &src1, Control control)
1311         {
1312                 Int4 condition;
1313
1314                 switch(control)
1315                 {
1316                 case Shader::CONTROL_GT: condition = CmpNLE(src0.x,  src1.x);   break;
1317                 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x);             break;
1318                 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x);    break;
1319                 case Shader::CONTROL_LT: condition = CmpLT(src0.x,  src1.x);    break;
1320                 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x);    break;
1321                 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x);             break;
1322                 default:
1323                         ASSERT(false);
1324                 }
1325
1326                 IF(condition);
1327         }
1328
1329         void VertexProgram::IF(Int4 &condition)
1330         {
1331                 condition &= enableStack[enableIndex];
1332
1333                 enableIndex++;
1334                 enableStack[enableIndex] = condition;
1335
1336                 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1337                 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1338
1339                 Bool notAllFalse = SignMask(condition) != 0;
1340
1341                 branch(notAllFalse, trueBlock, falseBlock);
1342
1343                 isConditionalIf[ifDepth] = true;
1344                 ifFalseBlock[ifDepth] = falseBlock;
1345
1346                 ifDepth++;
1347                 breakDepth++;
1348         }
1349
1350         void VertexProgram::LABEL(int labelIndex)
1351         {
1352                 if(!labelBlock[labelIndex])
1353                 {
1354                         labelBlock[labelIndex] = Nucleus::createBasicBlock();
1355                 }
1356
1357                 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1358                 currentLabel = labelIndex;
1359         }
1360
1361         void VertexProgram::LOOP(const Src &integerRegister)
1362         {
1363                 loopDepth++;
1364
1365                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1366                 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1367                 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1368
1369                 // FIXME: Compiles to two instructions?
1370                 If(increment[loopDepth] == 0)
1371                 {
1372                         increment[loopDepth] = 1;
1373                 }
1374
1375                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1376                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1377                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1378
1379                 loopRepTestBlock[loopRepDepth] = testBlock;
1380                 loopRepEndBlock[loopRepDepth] = endBlock;
1381
1382                 // FIXME: jump(testBlock)
1383                 Nucleus::createBr(testBlock);
1384                 Nucleus::setInsertBlock(testBlock);
1385
1386                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1387                 Nucleus::setInsertBlock(loopBlock);
1388
1389                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1390
1391                 loopRepDepth++;
1392                 breakDepth = 0;
1393         }
1394
1395         void VertexProgram::REP(const Src &integerRegister)
1396         {
1397                 loopDepth++;
1398
1399                 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1400                 aL[loopDepth] = aL[loopDepth - 1];
1401
1402                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1403                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1404                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1405
1406                 loopRepTestBlock[loopRepDepth] = testBlock;
1407                 loopRepEndBlock[loopRepDepth] = endBlock;
1408
1409                 // FIXME: jump(testBlock)
1410                 Nucleus::createBr(testBlock);
1411                 Nucleus::setInsertBlock(testBlock);
1412
1413                 branch(iteration[loopDepth] > 0, loopBlock, endBlock);
1414                 Nucleus::setInsertBlock(loopBlock);
1415
1416                 iteration[loopDepth] = iteration[loopDepth] - 1;   // FIXME: --
1417
1418                 loopRepDepth++;
1419                 breakDepth = 0;
1420         }
1421
1422         void VertexProgram::WHILE(const Src &temporaryRegister)
1423         {
1424                 enableIndex++;
1425
1426                 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1427                 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1428                 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1429
1430                 loopRepTestBlock[loopRepDepth] = testBlock;
1431                 loopRepEndBlock[loopRepDepth] = endBlock;
1432
1433                 Int4 restoreBreak = enableBreak;
1434                 Int4 restoreContinue = enableContinue;
1435
1436                 // FIXME: jump(testBlock)
1437                 Nucleus::createBr(testBlock);
1438                 Nucleus::setInsertBlock(testBlock);
1439                 enableContinue = restoreContinue;
1440
1441                 const Vector4f &src = fetchRegister(temporaryRegister);
1442                 Int4 condition = As<Int4>(src.x);
1443                 condition &= enableStack[enableIndex - 1];
1444                 if(shader->containsLeaveInstruction()) condition &= enableLeave;
1445                 enableStack[enableIndex] = condition;
1446
1447                 Bool notAllFalse = SignMask(condition) != 0;
1448                 branch(notAllFalse, loopBlock, endBlock);
1449
1450                 Nucleus::setInsertBlock(endBlock);
1451                 enableBreak = restoreBreak;
1452
1453                 Nucleus::setInsertBlock(loopBlock);
1454
1455                 loopRepDepth++;
1456                 breakDepth = 0;
1457         }
1458
1459         void VertexProgram::RET()
1460         {
1461                 if(currentLabel == -1)
1462                 {
1463                         returnBlock = Nucleus::createBasicBlock();
1464                         Nucleus::createBr(returnBlock);
1465                 }
1466                 else
1467                 {
1468                         llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1469
1470                         if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
1471                         {
1472                                 // FIXME: Encapsulate
1473                                 UInt index = callStack[--stackIndex];
1474
1475                                 llvm::Value *value = index.loadValue();
1476                                 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
1477
1478                                 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
1479                                 {
1480                                         Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
1481                                 }
1482                         }
1483                         else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
1484                         {
1485                                 Nucleus::createBr(callRetBlock[currentLabel][0]);
1486                         }
1487                         else   // Function isn't called
1488                         {
1489                                 Nucleus::createBr(unreachableBlock);
1490                         }
1491
1492                         Nucleus::setInsertBlock(unreachableBlock);
1493                         Nucleus::createUnreachable();
1494                 }
1495         }
1496
1497         void VertexProgram::LEAVE()
1498         {
1499                 enableLeave = enableLeave & ~enableStack[enableIndex];
1500
1501                 // FIXME: Return from function if all instances left
1502                 // FIXME: Use enableLeave in other control-flow constructs
1503         }
1504
1505         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1)
1506         {
1507                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, src0.w, Lod);
1508         }
1509
1510         void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
1511         {
1512                 Float4 lod0 = Float4(0.0f);
1513                 sampleTexture(dst, src1, src0.x, src0.y, src0.z, lod0, Lod);
1514         }
1515
1516         void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1517         {
1518                 UNIMPLEMENTED();
1519         }
1520
1521         void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src, const Src&, Vector4f &offset)
1522         {
1523                 UNIMPLEMENTED();
1524         }
1525
1526         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
1527         {
1528                 UNIMPLEMENTED();
1529         }
1530
1531         void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &offset)
1532         {
1533                 UNIMPLEMENTED();
1534         }
1535
1536         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
1537         {
1538                 UNIMPLEMENTED();
1539         }
1540
1541         void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
1542         {
1543                 UNIMPLEMENTED();
1544         }
1545
1546         void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
1547         {
1548                 Pointer<Byte> textureMipmap = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
1549                 for(int i = 0; i < 4; ++i)
1550                 {
1551                         Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap);
1552                         dst.x = Insert(dst.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
1553                         dst.y = Insert(dst.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
1554                         dst.z = Insert(dst.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
1555                 }
1556         }
1557
1558         void VertexProgram::sampleTexture(Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q, SamplerMethod method)
1559         {
1560                 Vector4f tmp;
1561
1562                 if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
1563                 {
1564                         Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[TEXTURE_IMAGE_UNITS]) + s.index * sizeof(Texture);
1565                         sampler[s.index]->sampleTexture(texture, tmp, u, v, w, q, a0, a0, method);
1566                 }
1567                 else
1568                 {
1569                         Int index = As<Int>(Float(fetchRegister(s).x.x));
1570
1571                         for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
1572                         {
1573                                 if(shader->usesSampler(i))
1574                                 {
1575                                         If(index == i)
1576                                         {
1577                                                 Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[TEXTURE_IMAGE_UNITS]) + i * sizeof(Texture);
1578                                                 sampler[i]->sampleTexture(texture, tmp, u, v, w, q, a0, a0, method);
1579                                                 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
1580                                         }
1581                                 }
1582                         }
1583                 }
1584
1585                 c.x = tmp[(s.swizzle >> 0) & 0x3];
1586                 c.y = tmp[(s.swizzle >> 2) & 0x3];
1587                 c.z = tmp[(s.swizzle >> 4) & 0x3];
1588                 c.w = tmp[(s.swizzle >> 6) & 0x3];
1589         }
1590 }