OSDN Git Service

Matrix determinant and inverse implementation
[android-x86/external-swiftshader.git] / src / Shader / Shader.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2013 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Shader.hpp"
13
14 #include "VertexShader.hpp"
15 #include "PixelShader.hpp"
16 #include "Math.hpp"
17 #include "Debug.hpp"
18
19 #include <set>
20 #include <fstream>
21 #include <sstream>
22 #include <stdarg.h>
23
24 namespace sw
25 {
26         volatile int Shader::serialCounter = 1;
27
28         Shader::Opcode Shader::OPCODE_DP(int i)
29         {
30                 switch(i)
31                 {
32                 default: ASSERT(false);
33                 case 1: return OPCODE_DP1;
34                 case 2: return OPCODE_DP2;
35                 case 3: return OPCODE_DP3;
36                 case 4: return OPCODE_DP4;
37                 }
38         }
39
40         Shader::Opcode Shader::OPCODE_LEN(int i)
41         {
42                 switch(i)
43                 {
44                 default: ASSERT(false);
45                 case 1: return OPCODE_ABS;
46                 case 2: return OPCODE_LEN2;
47                 case 3: return OPCODE_LEN3;
48                 case 4: return OPCODE_LEN4;
49                 }
50         }
51
52         Shader::Opcode Shader::OPCODE_DIST(int i)
53         {
54                 switch(i)
55                 {
56                 default: ASSERT(false);
57                 case 1: return OPCODE_DIST1;
58                 case 2: return OPCODE_DIST2;
59                 case 3: return OPCODE_DIST3;
60                 case 4: return OPCODE_DIST4;
61                 }
62         }
63
64         Shader::Opcode Shader::OPCODE_NRM(int i)        
65         {
66                 switch(i)
67                 {
68                 default: ASSERT(false);
69                 case 1: return OPCODE_SGN;
70                 case 2: return OPCODE_NRM2;
71                 case 3: return OPCODE_NRM3;
72                 case 4: return OPCODE_NRM4;
73                 }
74         }
75
76         Shader::Opcode Shader::OPCODE_FORWARD(int i)
77         {
78                 switch(i)
79                 {
80                 default: ASSERT(false);
81                 case 1: return OPCODE_FORWARD1;
82                 case 2: return OPCODE_FORWARD2;
83                 case 3: return OPCODE_FORWARD3;
84                 case 4: return OPCODE_FORWARD4;
85                 }
86         }
87
88         Shader::Opcode Shader::OPCODE_REFLECT(int i)
89         {
90                 switch(i)
91                 {
92                 default: ASSERT(false);
93                 case 1: return OPCODE_REFLECT1;
94                 case 2: return OPCODE_REFLECT2;
95                 case 3: return OPCODE_REFLECT3;
96                 case 4: return OPCODE_REFLECT4;
97                 }
98         }
99
100         Shader::Opcode Shader::OPCODE_REFRACT(int i)
101         {
102                 switch(i)
103                 {
104                 default: ASSERT(false);
105                 case 1: return OPCODE_REFRACT1;
106                 case 2: return OPCODE_REFRACT2;
107                 case 3: return OPCODE_REFRACT3;
108                 case 4: return OPCODE_REFRACT4;
109                 }
110         }
111
112         Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
113         {
114                 control = CONTROL_RESERVED0;
115
116                 predicate = false;
117                 predicateNot = false;
118                 predicateSwizzle = 0xE4;
119                 
120                 coissue = false;
121                 samplerType = SAMPLER_UNKNOWN;
122                 usage = USAGE_POSITION;
123                 usageIndex = 0;
124         }
125
126         Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
127         {
128                 parseOperationToken(*token++, majorVersion);
129
130                 samplerType = SAMPLER_UNKNOWN;
131                 usage = USAGE_POSITION;
132                 usageIndex = 0;
133
134                 if(opcode == OPCODE_IF ||
135                    opcode == OPCODE_IFC ||
136                    opcode == OPCODE_LOOP ||
137                    opcode == OPCODE_REP ||
138                    opcode == OPCODE_BREAKC ||
139                    opcode == OPCODE_BREAKP)   // No destination operand
140                 {
141                         if(size > 0) parseSourceToken(0, token++, majorVersion);
142                         if(size > 1) parseSourceToken(1, token++, majorVersion);
143                         if(size > 2) parseSourceToken(2, token++, majorVersion);
144                         if(size > 3) ASSERT(false);
145                 }
146                 else if(opcode == OPCODE_DCL)
147                 {
148                         parseDeclarationToken(*token++);
149                         parseDestinationToken(token++, majorVersion);
150                 }
151                 else
152                 {
153                         if(size > 0)
154                         {
155                                 parseDestinationToken(token, majorVersion);
156
157                                 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
158                                 {
159                                         token++;
160                                         size--;
161                                 }
162                                 
163                                 token++;
164                                 size--;
165                         }
166
167                         if(predicate)
168                         {
169                                 ASSERT(size != 0);
170
171                                 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
172                                 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
173                                 
174                                 token++;
175                                 size--;
176                         }
177
178                         for(int i = 0; size > 0; i++)
179                         {
180                                 parseSourceToken(i, token, majorVersion);
181
182                                 token++;
183                                 size--;
184
185                                 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
186                                 {
187                                         token++;
188                                         size--;
189                                 }
190                         }
191                 }
192         }
193
194         Shader::Instruction::~Instruction()
195         {
196         }
197
198         std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
199         {
200                 std::string instructionString;
201                 
202                 if(opcode != OPCODE_DCL)
203                 {
204                         instructionString += coissue ? "+ " : "";
205                         
206                         if(predicate)
207                         {
208                                 instructionString += predicateNot ? "(!p0" : "(p0";
209                                 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
210                                 instructionString += ") ";
211                         }
212
213                         instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
214
215                         if(dst.type != PARAMETER_VOID)
216                         {
217                                 instructionString += " " + dst.string(shaderType, version) +
218                                                            dst.relativeString() +
219                                                            dst.maskString(); 
220                         }
221
222                         for(int i = 0; i < 4; i++)
223                         {
224                                 if(src[i].type != PARAMETER_VOID)
225                                 {
226                                         instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
227                                         instructionString += src[i].preModifierString() +
228                                                                                  src[i].string(shaderType, version) +
229                                                                                  src[i].relativeString() + 
230                                                                                  src[i].postModifierString() + 
231                                                                                  src[i].swizzleString();
232                                 }
233                         }
234                 }
235                 else   // DCL
236                 {
237                         instructionString += "dcl";
238
239                         if(dst.type == PARAMETER_SAMPLER)
240                         {
241                                 switch(samplerType)
242                                 {
243                                 case SAMPLER_UNKNOWN: instructionString += " ";        break;
244                                 case SAMPLER_1D:      instructionString += "_1d ";     break;
245                                 case SAMPLER_2D:      instructionString += "_2d ";     break;
246                                 case SAMPLER_CUBE:    instructionString += "_cube ";   break;
247                                 case SAMPLER_VOLUME:  instructionString += "_volume "; break;
248                                 default:
249                                         ASSERT(false);
250                                 }
251
252                                 instructionString += dst.string(shaderType, version);
253                         }
254                         else if(dst.type == PARAMETER_INPUT ||
255                                     dst.type == PARAMETER_OUTPUT ||
256                                     dst.type == PARAMETER_TEXTURE)
257                         {
258                                 if(version >= 0x0300)
259                                 {
260                                         switch(usage)
261                                         {
262                                         case USAGE_POSITION:     instructionString += "_position";     break;
263                                         case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
264                                         case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
265                                         case USAGE_NORMAL:       instructionString += "_normal";       break;
266                                         case USAGE_PSIZE:        instructionString += "_psize";        break;
267                                         case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
268                                         case USAGE_TANGENT:      instructionString += "_tangent";      break;
269                                         case USAGE_BINORMAL:     instructionString += "_binormal";     break;
270                                         case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
271                                         case USAGE_POSITIONT:    instructionString += "_positiont";    break;
272                                         case USAGE_COLOR:        instructionString += "_color";        break;
273                                         case USAGE_FOG:          instructionString += "_fog";          break;
274                                         case USAGE_DEPTH:        instructionString += "_depth";        break;
275                                         case USAGE_SAMPLE:       instructionString += "_sample";       break;
276                                         default:
277                                                 ASSERT(false);
278                                         }
279
280                                         if(usageIndex > 0)
281                                         {
282                                                 std::ostringstream buffer;
283
284                                                 buffer << (int)usageIndex;
285
286                                                 instructionString += buffer.str();
287                                         }
288                                 }
289                                 else ASSERT(dst.type != PARAMETER_OUTPUT);
290
291                                 instructionString += " ";
292
293                                 instructionString += dst.string(shaderType, version);
294                                 instructionString += dst.maskString();
295                         }
296                         else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
297                         {
298                                 instructionString += " ";
299
300                                 instructionString += dst.string(shaderType, version);
301                         }
302                         else ASSERT(false);
303                 }
304
305                 return instructionString;
306         }
307
308         std::string Shader::DestinationParameter::modifierString() const
309         {
310                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
311                 {
312                         return "";
313                 }
314
315                 std::string modifierString;
316
317                 if(integer)
318                 {
319                         modifierString += "_int";
320                 }
321
322                 if(saturate)
323                 {
324                         modifierString += "_sat";
325                 }
326
327                 if(partialPrecision)
328                 {
329                         modifierString += "_pp";
330                 }
331
332                 if(centroid)
333                 {
334                         modifierString += "_centroid";
335                 }
336
337                 return modifierString;
338         }
339
340         std::string Shader::DestinationParameter::shiftString() const
341         {
342                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
343                 {
344                         return "";
345                 }
346
347                 switch(shift)
348                 {
349                 case 0:         return "";
350                 case 1:         return "_x2";
351                 case 2:         return "_x4"; 
352                 case 3:         return "_x8";
353                 case -1:        return "_d2";
354                 case -2:        return "_d4"; 
355                 case -3:        return "_d8";
356                 default:
357                         return "";
358                 //      ASSERT(false);   // FIXME
359                 }
360         }
361
362         std::string Shader::DestinationParameter::maskString() const
363         {
364                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
365                 {
366                         return "";
367                 }
368
369                 switch(mask)
370                 {
371                 case 0x0:       return "";
372                 case 0x1:       return ".x";
373                 case 0x2:       return ".y";
374                 case 0x3:       return ".xy";
375                 case 0x4:       return ".z";
376                 case 0x5:       return ".xz";
377                 case 0x6:       return ".yz";
378                 case 0x7:       return ".xyz";
379                 case 0x8:       return ".w";
380                 case 0x9:       return ".xw";
381                 case 0xA:       return ".yw";
382                 case 0xB:       return ".xyw";
383                 case 0xC:       return ".zw";
384                 case 0xD:       return ".xzw";
385                 case 0xE:       return ".yzw";
386                 case 0xF:       return "";
387                 default:
388                         ASSERT(false);
389                 }
390
391                 return "";
392         }
393
394         std::string Shader::SourceParameter::preModifierString() const
395         {
396                 if(type == PARAMETER_VOID)
397                 {
398                         return "";
399                 }
400
401                 switch(modifier)
402                 {
403                 case MODIFIER_NONE:                     return "";
404                 case MODIFIER_NEGATE:           return "-";
405                 case MODIFIER_BIAS:                     return "";
406                 case MODIFIER_BIAS_NEGATE:      return "-";
407                 case MODIFIER_SIGN:                     return "";
408                 case MODIFIER_SIGN_NEGATE:      return "-";
409                 case MODIFIER_COMPLEMENT:       return "1-";
410                 case MODIFIER_X2:                       return "";
411                 case MODIFIER_X2_NEGATE:        return "-";
412                 case MODIFIER_DZ:                       return "";
413                 case MODIFIER_DW:                       return "";
414                 case MODIFIER_ABS:                      return "";
415                 case MODIFIER_ABS_NEGATE:       return "-";
416                 case MODIFIER_NOT:                      return "!";
417                 default:
418                         ASSERT(false);
419                 }
420
421                 return "";
422         }
423
424         std::string Shader::Parameter::relativeString() const
425         {
426                 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
427                 {
428                         if(rel.type == PARAMETER_VOID)
429                         {
430                                 return "";
431                         }
432                         else if(rel.type == PARAMETER_ADDR)
433                         {
434                                 switch(rel.swizzle & 0x03)
435                                 {
436                                 case 0: return "[a0.x]";
437                                 case 1: return "[a0.y]";
438                                 case 2: return "[a0.z]";
439                                 case 3: return "[a0.w]";
440                                 }
441                         }
442                         else if(rel.type == PARAMETER_TEMP)
443                         {
444                                 std::ostringstream buffer;
445                                 buffer << rel.index;
446
447                                 switch(rel.swizzle & 0x03)
448                                 {
449                                 case 0: return "[r" + buffer.str() + ".x]";
450                                 case 1: return "[r" + buffer.str() + ".y]";
451                                 case 2: return "[r" + buffer.str() + ".z]";
452                                 case 3: return "[r" + buffer.str() + ".w]";
453                                 }
454                         }
455                         else if(rel.type == PARAMETER_LOOP)
456                         {
457                                 return "[aL]";
458                         }
459                         else ASSERT(false);
460                 }
461
462                 return "";
463         }
464
465         std::string Shader::SourceParameter::postModifierString() const
466         {
467                 if(type == PARAMETER_VOID)
468                 {
469                         return "";
470                 }
471
472                 switch(modifier)
473                 {
474                 case MODIFIER_NONE:                     return "";
475                 case MODIFIER_NEGATE:           return "";
476                 case MODIFIER_BIAS:                     return "_bias";
477                 case MODIFIER_BIAS_NEGATE:      return "_bias";
478                 case MODIFIER_SIGN:                     return "_bx2";
479                 case MODIFIER_SIGN_NEGATE:      return "_bx2";
480                 case MODIFIER_COMPLEMENT:       return "";
481                 case MODIFIER_X2:                       return "_x2";
482                 case MODIFIER_X2_NEGATE:        return "_x2";
483                 case MODIFIER_DZ:                       return "_dz";
484                 case MODIFIER_DW:                       return "_dw";
485                 case MODIFIER_ABS:                      return "_abs";
486                 case MODIFIER_ABS_NEGATE:       return "_abs";
487                 case MODIFIER_NOT:                      return "";
488                 default:
489                         ASSERT(false);
490                 }
491
492                 return "";
493         }
494
495         std::string Shader::SourceParameter::swizzleString() const
496         {
497                 return Instruction::swizzleString(type, swizzle);
498         }
499
500         void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
501         {
502                 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
503                 {
504                         opcode = (Opcode)token;
505
506                         control = CONTROL_RESERVED0;
507                         predicate = false;
508                         coissue = false;
509                 }
510                 else
511                 {
512                         opcode = (Opcode)(token & 0x0000FFFF);
513                         control = (Control)((token & 0x00FF0000) >> 16);
514
515                         int size = (token & 0x0F000000) >> 24;
516
517                         predicate = (token & 0x10000000) != 0x00000000;
518                         coissue = (token & 0x40000000) != 0x00000000;
519
520                         if(majorVersion < 2)
521                         {
522                                 if(size != 0)
523                                 {
524                                         ASSERT(false);   // Reserved
525                                 }
526                         }
527
528                         if(majorVersion < 2)
529                         {
530                                 if(predicate)
531                                 {
532                                         ASSERT(false);
533                                 }
534                         }
535
536                         if((token & 0x20000000) != 0x00000000)
537                         {
538                                 ASSERT(false);   // Reserved
539                         }
540
541                         if(majorVersion >= 2)
542                         {
543                                 if(coissue)
544                                 {
545                                         ASSERT(false);   // Reserved
546                                 }
547                         }
548
549                         if((token & 0x80000000) != 0x00000000)
550                         {
551                                 ASSERT(false);
552                         }
553                 }
554         }
555
556         void Shader::Instruction::parseDeclarationToken(unsigned long token)
557         {
558                 samplerType = (SamplerType)((token & 0x78000000) >> 27);
559                 usage = (Usage)(token & 0x0000001F);
560                 usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
561         }
562
563         void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
564         {
565                 dst.index = (unsigned short)(token[0] & 0x000007FF);
566                 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
567
568                 // TODO: Check type and index range
569
570                 bool relative = (token[0] & 0x00002000) != 0x00000000;
571                 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
572                 dst.rel.swizzle = 0x00;
573                 dst.rel.scale = 1;
574
575                 if(relative && majorVersion >= 3)
576                 {
577                         dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
578                         dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
579                 }
580                 else if(relative) ASSERT(false);   // Reserved
581
582                 if((token[0] & 0x0000C000) != 0x00000000)
583                 {
584                         ASSERT(false);   // Reserved
585                 }
586
587                 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
588                 dst.saturate = (token[0] & 0x00100000) != 0;
589                 dst.partialPrecision = (token[0] & 0x00200000) != 0;
590                 dst.centroid = (token[0] & 0x00400000) != 0;
591                 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
592
593                 if(majorVersion >= 2)
594                 {
595                         if(dst.shift)
596                         {
597                                 ASSERT(false);   // Reserved
598                         }
599                 }
600
601                 if((token[0] & 0x80000000) != 0x80000000)
602                 {
603                         ASSERT(false);
604                 }
605         }
606
607         void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
608         {
609                 // Defaults
610                 src[i].index = 0;
611                 src[i].type = PARAMETER_VOID;
612                 src[i].modifier = MODIFIER_NONE;
613                 src[i].swizzle = 0xE4;
614                 src[i].rel.type = PARAMETER_VOID;
615                 src[i].rel.swizzle = 0x00;
616                 src[i].rel.scale = 1;
617                 
618                 switch(opcode)
619                 {
620                 case OPCODE_DEF:
621                         src[0].type = PARAMETER_FLOAT4LITERAL;
622                         src[0].value[i] = *(float*)token;
623                         break;
624                 case OPCODE_DEFB:
625                         src[0].type = PARAMETER_BOOL1LITERAL;
626                         src[0].boolean[0] = *(int*)token;
627                         break;
628                 case OPCODE_DEFI:
629                         src[0].type = PARAMETER_INT4LITERAL;
630                         src[0].integer[i] = *(int*)token;
631                         break;
632                 default:
633                         src[i].index = (unsigned short)(token[0] & 0x000007FF);
634                         src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
635
636                         // FIXME: Check type and index range
637
638                         bool relative = (token[0] & 0x00002000) != 0x00000000;
639                         src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
640
641                         if((token[0] & 0x0000C000) != 0x00000000)
642                         {
643                                 if(opcode != OPCODE_DEF &&
644                                    opcode != OPCODE_DEFI &&
645                                    opcode != OPCODE_DEFB)
646                                 {
647                                         ASSERT(false);
648                                 }
649                         }
650
651                         src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
652                         src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
653
654                         if((token[0] & 0x80000000) != 0x80000000)
655                         {
656                                 if(opcode != OPCODE_DEF &&
657                                    opcode != OPCODE_DEFI &&
658                                    opcode != OPCODE_DEFB)
659                                 {
660                                         ASSERT(false);
661                                 }
662                         }
663
664                         if(relative && majorVersion >= 2)
665                         {
666                                 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
667                                 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
668                         }
669                 }
670         }
671
672         std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
673         {
674                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
675                 {
676                         return "";
677                 }
678
679                 int x = (swizzle & 0x03) >> 0;
680                 int y = (swizzle & 0x0C) >> 2;
681                 int z = (swizzle & 0x30) >> 4;
682                 int w = (swizzle & 0xC0) >> 6;
683
684                 std::string swizzleString = ".";
685
686                 switch(x)
687                 {
688                 case 0: swizzleString += "x"; break;
689                 case 1: swizzleString += "y"; break;
690                 case 2: swizzleString += "z"; break;
691                 case 3: swizzleString += "w"; break;
692                 }
693
694                 if(!(x == y && y == z && z == w))
695                 {
696                         switch(y)
697                         {
698                         case 0: swizzleString += "x"; break;
699                         case 1: swizzleString += "y"; break;
700                         case 2: swizzleString += "z"; break;
701                         case 3: swizzleString += "w"; break;
702                         }
703
704                         if(!(y == z && z == w))
705                         {
706                                 switch(z)
707                                 {
708                                 case 0: swizzleString += "x"; break;
709                                 case 1: swizzleString += "y"; break;
710                                 case 2: swizzleString += "z"; break;
711                                 case 3: swizzleString += "w"; break;
712                                 }
713
714                                 if(!(z == w))
715                                 {
716                                         switch(w)
717                                         {
718                                         case 0: swizzleString += "x"; break;
719                                         case 1: swizzleString += "y"; break;
720                                         case 2: swizzleString += "z"; break;
721                                         case 3: swizzleString += "w"; break;
722                                         }
723                                 }
724                         }
725                 }
726
727                 return swizzleString;
728         }
729
730         std::string Shader::Instruction::operationString(unsigned short version) const
731         {
732                 switch(opcode)
733                 {
734                 case OPCODE_NULL:                       return "null";
735                 case OPCODE_NOP:                        return "nop";
736                 case OPCODE_MOV:                        return "mov";
737                 case OPCODE_ADD:                        return "add";
738                 case OPCODE_IADD:                       return "iadd";
739                 case OPCODE_SUB:                        return "sub";
740                 case OPCODE_ISUB:                       return "isub";
741                 case OPCODE_MAD:                        return "mad";
742                 case OPCODE_IMAD:                       return "imad";
743                 case OPCODE_MUL:                        return "mul";
744                 case OPCODE_IMUL:                       return "imul";
745                 case OPCODE_RCPX:                       return "rcpx";
746                 case OPCODE_DIV:                        return "div";
747                 case OPCODE_IDIV:                       return "idiv";
748                 case OPCODE_UDIV:                       return "udiv";
749                 case OPCODE_MOD:                        return "mod";
750                 case OPCODE_IMOD:                       return "imod";
751                 case OPCODE_UMOD:                       return "umod";
752                 case OPCODE_SHL:                        return "shl";
753                 case OPCODE_ISHR:                       return "ishr";
754                 case OPCODE_USHR:                       return "ushr";
755                 case OPCODE_RSQX:                       return "rsqx";
756                 case OPCODE_SQRT:                       return "sqrt";
757                 case OPCODE_RSQ:                        return "rsq";
758                 case OPCODE_LEN2:                       return "len2";
759                 case OPCODE_LEN3:                       return "len3";
760                 case OPCODE_LEN4:                       return "len4";
761                 case OPCODE_DIST1:                      return "dist1";
762                 case OPCODE_DIST2:                      return "dist2";
763                 case OPCODE_DIST3:                      return "dist3";
764                 case OPCODE_DIST4:                      return "dist4";
765                 case OPCODE_DP3:                        return "dp3";
766                 case OPCODE_DP4:                        return "dp4";
767                 case OPCODE_DET2:                       return "det2";
768                 case OPCODE_DET3:                       return "det3";
769                 case OPCODE_DET4:                       return "det4";
770                 case OPCODE_MIN:                        return "min";
771                 case OPCODE_IMIN:                       return "imin";
772                 case OPCODE_UMIN:                       return "umin";
773                 case OPCODE_MAX:                        return "max";
774                 case OPCODE_IMAX:                       return "imax";
775                 case OPCODE_UMAX:                       return "umax";
776                 case OPCODE_SLT:                        return "slt";
777                 case OPCODE_SGE:                        return "sge";
778                 case OPCODE_EXP2X:                      return "exp2x";
779                 case OPCODE_LOG2X:                      return "log2x";
780                 case OPCODE_LIT:                        return "lit";
781                 case OPCODE_ATT:                        return "att";
782                 case OPCODE_LRP:                        return "lrp";
783                 case OPCODE_STEP:                       return "step";
784                 case OPCODE_SMOOTH:                     return "smooth";
785                 case OPCODE_FLOATBITSTOINT:      return "floatBitsToInt";
786                 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
787                 case OPCODE_INTBITSTOFLOAT:      return "intBitsToFloat";
788                 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
789                 case OPCODE_PACKSNORM2x16:       return "packSnorm2x16";
790                 case OPCODE_PACKUNORM2x16:       return "packUnorm2x16";
791                 case OPCODE_PACKHALF2x16:        return "packHalf2x16";
792                 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
793                 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
794                 case OPCODE_UNPACKHALF2x16:      return "unpackHalf2x16";
795                 case OPCODE_FRC:                        return "frc";
796                 case OPCODE_M4X4:                       return "m4x4";
797                 case OPCODE_M4X3:                       return "m4x3";
798                 case OPCODE_M3X4:                       return "m3x4";
799                 case OPCODE_M3X3:                       return "m3x3";
800                 case OPCODE_M3X2:                       return "m3x2";
801                 case OPCODE_CALL:                       return "call";
802                 case OPCODE_CALLNZ:                     return "callnz";
803                 case OPCODE_LOOP:                       return "loop";
804                 case OPCODE_RET:                        return "ret";
805                 case OPCODE_ENDLOOP:            return "endloop";
806                 case OPCODE_LABEL:                      return "label";
807                 case OPCODE_DCL:                        return "dcl";
808                 case OPCODE_POWX:                       return "powx";
809                 case OPCODE_CRS:                        return "crs";
810                 case OPCODE_SGN:                        return "sgn";
811                 case OPCODE_ABS:                        return "abs";
812                 case OPCODE_NRM2:                       return "nrm2";
813                 case OPCODE_NRM3:                       return "nrm3";
814                 case OPCODE_NRM4:                       return "nrm4";
815                 case OPCODE_SINCOS:                     return "sincos";
816                 case OPCODE_REP:                        return "rep";
817                 case OPCODE_ENDREP:                     return "endrep";
818                 case OPCODE_IF:                         return "if";
819                 case OPCODE_IFC:                        return "ifc";
820                 case OPCODE_ELSE:                       return "else";
821                 case OPCODE_ENDIF:                      return "endif";
822                 case OPCODE_BREAK:                      return "break";
823                 case OPCODE_BREAKC:                     return "breakc";
824                 case OPCODE_MOVA:                       return "mova";
825                 case OPCODE_DEFB:                       return "defb";
826                 case OPCODE_DEFI:                       return "defi";
827                 case OPCODE_TEXCOORD:           return "texcoord";
828                 case OPCODE_TEXKILL:            return "texkill";
829                 case OPCODE_DISCARD:            return "discard";
830                 case OPCODE_TEX:
831                         if(version < 0x0104)    return "tex";
832                         else                                    return "texld";
833                 case OPCODE_TEXBEM:                     return "texbem";
834                 case OPCODE_TEXBEML:            return "texbeml";
835                 case OPCODE_TEXREG2AR:          return "texreg2ar";
836                 case OPCODE_TEXREG2GB:          return "texreg2gb";
837                 case OPCODE_TEXM3X2PAD:         return "texm3x2pad";
838                 case OPCODE_TEXM3X2TEX:         return "texm3x2tex";
839                 case OPCODE_TEXM3X3PAD:         return "texm3x3pad";
840                 case OPCODE_TEXM3X3TEX:         return "texm3x3tex";
841                 case OPCODE_RESERVED0:          return "reserved0";
842                 case OPCODE_TEXM3X3SPEC:        return "texm3x3spec";
843                 case OPCODE_TEXM3X3VSPEC:       return "texm3x3vspec";
844                 case OPCODE_EXPP:                       return "expp";
845                 case OPCODE_LOGP:                       return "logp";
846                 case OPCODE_CND:                        return "cnd";
847                 case OPCODE_DEF:                        return "def";
848                 case OPCODE_TEXREG2RGB:         return "texreg2rgb";
849                 case OPCODE_TEXDP3TEX:          return "texdp3tex";
850                 case OPCODE_TEXM3X2DEPTH:       return "texm3x2depth";
851                 case OPCODE_TEXDP3:                     return "texdp3";
852                 case OPCODE_TEXM3X3:            return "texm3x3";
853                 case OPCODE_TEXDEPTH:           return "texdepth";
854                 case OPCODE_CMP0:                       return "cmp0";
855                 case OPCODE_ICMP:                       return "icmp";
856                 case OPCODE_UCMP:                       return "ucmp";
857                 case OPCODE_SELECT:                     return "select";
858                 case OPCODE_EXTRACT:            return "extract";
859                 case OPCODE_INSERT:                     return "insert";
860                 case OPCODE_BEM:                        return "bem";
861                 case OPCODE_DP2ADD:                     return "dp2add";
862                 case OPCODE_DFDX:                       return "dFdx";
863                 case OPCODE_DFDY:                       return "dFdy";
864                 case OPCODE_FWIDTH:                     return "fwidth";
865                 case OPCODE_TEXLDD:                     return "texldd";
866                 case OPCODE_CMP:                        return "cmp";
867                 case OPCODE_TEXLDL:                     return "texldl";
868                 case OPCODE_BREAKP:                     return "breakp";
869                 case OPCODE_PHASE:                      return "phase";
870                 case OPCODE_COMMENT:            return "comment";
871                 case OPCODE_END:                        return "end";
872                 case OPCODE_PS_1_0:                     return "ps_1_0";
873                 case OPCODE_PS_1_1:                     return "ps_1_1";
874                 case OPCODE_PS_1_2:                     return "ps_1_2";
875                 case OPCODE_PS_1_3:                     return "ps_1_3";
876                 case OPCODE_PS_1_4:                     return "ps_1_4";
877                 case OPCODE_PS_2_0:                     return "ps_2_0";
878                 case OPCODE_PS_2_x:                     return "ps_2_x";
879                 case OPCODE_PS_3_0:                     return "ps_3_0";
880                 case OPCODE_VS_1_0:                     return "vs_1_0";
881                 case OPCODE_VS_1_1:                     return "vs_1_1";
882                 case OPCODE_VS_2_0:                     return "vs_2_0";
883                 case OPCODE_VS_2_x:                     return "vs_2_x";
884                 case OPCODE_VS_2_sw:            return "vs_2_sw";
885                 case OPCODE_VS_3_0:                     return "vs_3_0";
886                 case OPCODE_VS_3_sw:            return "vs_3_sw";
887                 case OPCODE_WHILE:          return "while";
888                 case OPCODE_ENDWHILE:       return "endwhile";
889                 case OPCODE_COS:            return "cos";
890                 case OPCODE_SIN:            return "sin";
891                 case OPCODE_TAN:            return "tan";
892                 case OPCODE_ACOS:           return "acos";
893                 case OPCODE_ASIN:           return "asin";
894                 case OPCODE_ATAN:           return "atan";
895                 case OPCODE_ATAN2:          return "atan2";
896                 case OPCODE_COSH:           return "cosh";
897                 case OPCODE_SINH:           return "sinh";
898                 case OPCODE_TANH:           return "tanh";
899                 case OPCODE_ACOSH:          return "acosh";
900                 case OPCODE_ASINH:          return "asinh";
901                 case OPCODE_ATANH:          return "atanh";
902                 case OPCODE_DP1:            return "dp1";
903                 case OPCODE_DP2:            return "dp2";
904                 case OPCODE_TRUNC:          return "trunc";
905                 case OPCODE_FLOOR:          return "floor";
906                 case OPCODE_ROUND:          return "round";
907                 case OPCODE_ROUNDEVEN:      return "roundEven";
908                 case OPCODE_CEIL:           return "ceil";
909                 case OPCODE_EXP2:           return "exp2";
910                 case OPCODE_LOG2:           return "log2";
911                 case OPCODE_EXP:            return "exp";
912                 case OPCODE_LOG:            return "log";
913                 case OPCODE_POW:            return "pow";
914                 case OPCODE_F2B:            return "f2b";
915                 case OPCODE_B2F:            return "b2f";
916                 case OPCODE_F2I:            return "f2i";
917                 case OPCODE_I2F:            return "i2f";
918                 case OPCODE_F2U:            return "f2u";
919                 case OPCODE_U2F:            return "u2f";
920                 case OPCODE_B2I:            return "b2i";
921                 case OPCODE_I2B:            return "i2b";
922                 case OPCODE_B2U:            return "b2u";
923                 case OPCODE_U2B:            return "u2b";
924                 case OPCODE_ALL:            return "all";
925                 case OPCODE_ANY:            return "any";
926                 case OPCODE_NEG:            return "neg";
927                 case OPCODE_INEG:           return "ineg";
928                 case OPCODE_ISNAN:          return "isnan";
929                 case OPCODE_ISINF:          return "isinf";
930                 case OPCODE_NOT:            return "not";
931                 case OPCODE_OR:             return "or";
932                 case OPCODE_XOR:            return "xor";
933                 case OPCODE_AND:            return "and";
934                 case OPCODE_EQ:             return "eq";
935                 case OPCODE_NE:             return "neq";
936                 case OPCODE_FORWARD1:       return "forward1";
937                 case OPCODE_FORWARD2:       return "forward2";
938                 case OPCODE_FORWARD3:       return "forward3";
939                 case OPCODE_FORWARD4:       return "forward4";
940                 case OPCODE_REFLECT1:       return "reflect1";
941                 case OPCODE_REFLECT2:       return "reflect2";
942                 case OPCODE_REFLECT3:       return "reflect3";
943                 case OPCODE_REFLECT4:       return "reflect4";
944                 case OPCODE_REFRACT1:       return "refract1";
945                 case OPCODE_REFRACT2:       return "refract2";
946                 case OPCODE_REFRACT3:       return "refract3";
947                 case OPCODE_REFRACT4:       return "refract4";
948                 case OPCODE_LEAVE:          return "leave";
949                 case OPCODE_CONTINUE:       return "continue";
950                 case OPCODE_TEST:           return "test";
951                 default:
952                         ASSERT(false);
953                 }
954
955                 return "<unknown>";
956         }
957
958         std::string Shader::Instruction::controlString() const
959         {
960                 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
961                 {
962                         if(project) return "p";
963
964                         if(bias) return "b";
965
966                         // FIXME: LOD
967                 }
968
969                 switch(control)
970                 {
971                 case 1: return "_gt";
972                 case 2: return "_eq";
973                 case 3: return "_ge";
974                 case 4: return "_lt";
975                 case 5: return "_ne";
976                 case 6: return "_le";
977                 default:
978                         return "";
979                 //      ASSERT(false);   // FIXME
980                 }
981         }
982
983         std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
984         {
985                 std::ostringstream buffer;
986
987                 if(type == PARAMETER_FLOAT4LITERAL)
988                 {
989                         buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
990
991                         return buffer.str();
992                 }
993                 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
994                 {
995                         buffer << index;
996                         
997                         return typeString(shaderType, version) + buffer.str();
998                 }
999                 else
1000                 {
1001                         return typeString(shaderType, version);
1002                 }
1003         }
1004
1005         std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1006         {
1007                 switch(type)
1008                 {
1009                 case PARAMETER_TEMP:                    return "r";
1010                 case PARAMETER_INPUT:                   return "v";
1011                 case PARAMETER_CONST:                   return "c";
1012                 case PARAMETER_TEXTURE:
1013         //      case PARAMETER_ADDR:
1014                         if(shaderType == SHADER_PIXEL)  return "t";
1015                         else                                                    return "a0";
1016                 case PARAMETER_RASTOUT:
1017                         if(index == 0)              return "oPos";
1018                         else if(index == 1)         return "oFog";
1019                         else if(index == 2)         return "oPts";
1020                         else                        ASSERT(false);
1021                 case PARAMETER_ATTROUT:                 return "oD";
1022                 case PARAMETER_TEXCRDOUT:
1023         //      case PARAMETER_OUTPUT:                  return "";
1024                         if(version < 0x0300)            return "oT";
1025                         else                                            return "o";
1026                 case PARAMETER_CONSTINT:                return "i";
1027                 case PARAMETER_COLOROUT:                return "oC";
1028                 case PARAMETER_DEPTHOUT:                return "oDepth";
1029                 case PARAMETER_SAMPLER:                 return "s";
1030         //      case PARAMETER_CONST2:                  return "";
1031         //      case PARAMETER_CONST3:                  return "";
1032         //      case PARAMETER_CONST4:                  return "";
1033                 case PARAMETER_CONSTBOOL:               return "b";
1034                 case PARAMETER_LOOP:                    return "aL";
1035         //      case PARAMETER_TEMPFLOAT16:             return "";
1036                 case PARAMETER_MISCTYPE:
1037                         if(index == 0)                          return "vPos";
1038                         else if(index == 1)                     return "vFace";
1039                         else                                            ASSERT(false);
1040                 case PARAMETER_LABEL:                   return "l";
1041                 case PARAMETER_PREDICATE:               return "p0";
1042                 case PARAMETER_FLOAT4LITERAL:   return "";
1043                 case PARAMETER_BOOL1LITERAL:    return "";
1044                 case PARAMETER_INT4LITERAL:             return "";
1045         //      case PARAMETER_VOID:                    return "";
1046                 default:
1047                         ASSERT(false);
1048                 }
1049
1050                 return "";
1051         }
1052
1053         bool Shader::Instruction::isBranch() const
1054         {
1055                 return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1056         }
1057         
1058         bool Shader::Instruction::isCall() const
1059         {
1060                 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1061         }
1062
1063         bool Shader::Instruction::isBreak() const
1064         {
1065                 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1066         }
1067
1068         bool Shader::Instruction::isLoop() const
1069         {
1070                 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1071         }
1072
1073         bool Shader::Instruction::isEndLoop() const
1074         {
1075                 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1076         }
1077
1078         bool Shader::Instruction::isPredicated() const
1079         {
1080                 return predicate ||
1081                        analysisBranch ||
1082                        analysisBreak ||
1083                        analysisContinue ||
1084                        analysisLeave;
1085         }
1086
1087         Shader::Shader() : serialID(serialCounter++)
1088         {
1089                 usedSamplers = 0;
1090         }
1091
1092         Shader::~Shader()
1093         {
1094                 for(unsigned int i = 0; i < instruction.size(); i++)
1095                 {
1096                         delete instruction[i];
1097                         instruction[i] = 0;
1098                 }
1099         }
1100
1101         void Shader::parse(const unsigned long *token)
1102         {
1103                 minorVersion = (unsigned char)(token[0] & 0x000000FF);
1104                 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1105                 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1106
1107                 int length;
1108
1109                 if(shaderType == SHADER_VERTEX)
1110                 {
1111                         length = VertexShader::validate(token);
1112                 }
1113                 else if(shaderType == SHADER_PIXEL)
1114                 {
1115                         length = PixelShader::validate(token);
1116                 }
1117                 else ASSERT(false);
1118
1119                 ASSERT(length != 0);
1120                 instruction.resize(length);
1121
1122                 for(int i = 0; i < length; i++)
1123                 {
1124                         while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1125                         {
1126                                 int length = (*token & 0x7FFF0000) >> 16;
1127
1128                                 token += length + 1;
1129                         }
1130
1131                         int tokenCount = size(*token);
1132
1133                         instruction[i] = new Instruction(token, tokenCount, majorVersion);
1134
1135                         token += 1 + tokenCount;
1136                 }
1137         }
1138         
1139         int Shader::size(unsigned long opcode) const
1140         {
1141                 return size(opcode, version);
1142         }
1143
1144         int Shader::size(unsigned long opcode, unsigned short version)
1145         {
1146                 if(version > 0x0300)
1147                 {
1148                         ASSERT(false);
1149                 }
1150
1151                 static const char size[] =
1152                 {
1153                         0,   // NOP = 0
1154                         2,   // MOV
1155                         3,   // ADD
1156                         3,   // SUB
1157                         4,   // MAD
1158                         3,   // MUL
1159                         2,   // RCP
1160                         2,   // RSQ
1161                         3,   // DP3
1162                         3,   // DP4
1163                         3,   // MIN
1164                         3,   // MAX
1165                         3,   // SLT
1166                         3,   // SGE
1167                         2,   // EXP
1168                         2,   // LOG
1169                         2,   // LIT
1170                         3,   // DST
1171                         4,   // LRP
1172                         2,   // FRC
1173                         3,   // M4x4
1174                         3,   // M4x3
1175                         3,   // M3x4
1176                         3,   // M3x3
1177                         3,   // M3x2
1178                         1,   // CALL
1179                         2,   // CALLNZ
1180                         2,   // LOOP
1181                         0,   // RET
1182                         0,   // ENDLOOP
1183                         1,   // LABEL
1184                         2,   // DCL
1185                         3,   // POW
1186                         3,   // CRS
1187                         4,   // SGN
1188                         2,   // ABS
1189                         2,   // NRM
1190                         4,   // SINCOS
1191                         1,   // REP
1192                         0,   // ENDREP
1193                         1,   // IF
1194                         2,   // IFC
1195                         0,   // ELSE
1196                         0,   // ENDIF
1197                         0,   // BREAK
1198                         2,   // BREAKC
1199                         2,   // MOVA
1200                         2,   // DEFB
1201                         5,   // DEFI
1202                         -1,  // 49
1203                         -1,  // 50
1204                         -1,  // 51
1205                         -1,  // 52
1206                         -1,  // 53
1207                         -1,  // 54
1208                         -1,  // 55
1209                         -1,  // 56
1210                         -1,  // 57
1211                         -1,  // 58
1212                         -1,  // 59
1213                         -1,  // 60
1214                         -1,  // 61
1215                         -1,  // 62
1216                         -1,  // 63
1217                         1,   // TEXCOORD = 64
1218                         1,   // TEXKILL
1219                         1,   // TEX
1220                         2,   // TEXBEM
1221                         2,   // TEXBEML
1222                         2,   // TEXREG2AR
1223                         2,   // TEXREG2GB
1224                         2,   // TEXM3x2PAD
1225                         2,   // TEXM3x2TEX
1226                         2,   // TEXM3x3PAD
1227                         2,   // TEXM3x3TEX
1228                         -1,  // RESERVED0
1229                         3,   // TEXM3x3SPEC
1230                         2,   // TEXM3x3VSPEC
1231                         2,   // EXPP
1232                         2,   // LOGP
1233                         4,   // CND
1234                         5,   // DEF
1235                         2,   // TEXREG2RGB
1236                         2,   // TEXDP3TEX
1237                         2,   // TEXM3x2DEPTH
1238                         2,   // TEXDP3
1239                         2,   // TEXM3x3
1240                         1,   // TEXDEPTH
1241                         4,   // CMP
1242                         3,   // BEM
1243                         4,   // DP2ADD
1244                         2,   // DSX
1245                         2,   // DSY
1246                         5,   // TEXLDD
1247                         3,   // SETP
1248                         3,   // TEXLDL
1249                         2,   // BREAKP
1250                         -1,  // 97
1251                         -1,  // 98
1252                         -1,  // 99
1253                         -1,  // 100
1254                         -1,  // 101
1255                         -1,  // 102
1256                         -1,  // 103
1257                         -1,  // 104
1258                         -1,  // 105
1259                         -1,  // 106
1260                         -1,  // 107
1261                         -1,  // 108
1262                         -1,  // 109
1263                         -1,  // 110
1264                         -1,  // 111
1265                         -1,  // 112
1266                 };
1267
1268                 int length = 0;
1269
1270                 if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1271                 {
1272                         return (opcode & 0x7FFF0000) >> 16;
1273                 }
1274
1275                 if(opcode != OPCODE_PS_1_0 &&
1276                    opcode != OPCODE_PS_1_1 &&
1277                    opcode != OPCODE_PS_1_2 &&
1278                    opcode != OPCODE_PS_1_3 &&
1279                    opcode != OPCODE_PS_1_4 &&
1280                    opcode != OPCODE_PS_2_0 &&
1281                    opcode != OPCODE_PS_2_x &&
1282                    opcode != OPCODE_PS_3_0 &&
1283                    opcode != OPCODE_VS_1_0 &&
1284                    opcode != OPCODE_VS_1_1 &&
1285                    opcode != OPCODE_VS_2_0 &&
1286                    opcode != OPCODE_VS_2_x &&
1287                    opcode != OPCODE_VS_2_sw &&
1288                    opcode != OPCODE_VS_3_0 &&
1289                    opcode != OPCODE_VS_3_sw &&
1290                    opcode != OPCODE_PHASE &&
1291                    opcode != OPCODE_END)
1292                 {
1293                         if(version >= 0x0200)
1294                         {
1295                                 length = (opcode & 0x0F000000) >> 24;
1296                         }
1297                         else
1298                         {
1299                                 length = size[opcode & 0x0000FFFF];
1300                         }
1301                 }
1302
1303                 if(length < 0)
1304                 {
1305                         ASSERT(false);
1306                 }
1307
1308                 if(version == 0x0104)
1309                 {
1310                         switch(opcode & 0x0000FFFF)
1311                         {
1312                         case OPCODE_TEX:
1313                                 length += 1;
1314                                 break;
1315                         case OPCODE_TEXCOORD:
1316                                 length += 1;
1317                                 break;
1318                         default:
1319                                 break;
1320                         }
1321                 }
1322
1323                 return length;
1324         }
1325
1326         bool Shader::maskContainsComponent(int mask, int component)
1327         {
1328                 return (mask & (1 << component)) != 0;
1329         }
1330
1331         bool Shader::swizzleContainsComponent(int swizzle, int component)
1332         {
1333                 if((swizzle & 0x03) >> 0 == component) return true;
1334                 if((swizzle & 0x0C) >> 2 == component) return true;
1335                 if((swizzle & 0x30) >> 4 == component) return true;
1336                 if((swizzle & 0xC0) >> 6 == component) return true;
1337
1338                 return false;
1339         }
1340
1341         bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1342         {
1343                 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1344                 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1345                 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1346                 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1347
1348                 return false;
1349         }
1350
1351         bool Shader::containsDynamicBranching() const
1352         {
1353                 return dynamicBranching;
1354         }
1355
1356         bool Shader::containsBreakInstruction() const
1357         {
1358                 return containsBreak;
1359         }
1360
1361         bool Shader::containsContinueInstruction() const
1362         {
1363                 return containsContinue;
1364         }
1365
1366         bool Shader::containsLeaveInstruction() const
1367         {
1368                 return containsLeave;
1369         }
1370
1371         bool Shader::containsDefineInstruction() const
1372         {
1373                 return containsDefine;
1374         }
1375
1376         bool Shader::usesSampler(int index) const
1377         {
1378                 return (usedSamplers & (1 << index)) != 0;
1379         }
1380
1381         int Shader::getSerialID() const
1382         {
1383                 return serialID;
1384         }
1385
1386         size_t Shader::getLength() const
1387         {
1388                 return instruction.size();
1389         }
1390
1391         Shader::ShaderType Shader::getShaderType() const
1392         {
1393                 return shaderType;
1394         }
1395
1396         unsigned short Shader::getVersion() const
1397         {
1398                 return version;
1399         }
1400
1401         void Shader::print(const char *fileName, ...) const
1402         {
1403                 char fullName[1024 + 1];
1404
1405                 va_list vararg;
1406                 va_start(vararg, fileName);
1407                 vsnprintf(fullName, 1024, fileName, vararg);
1408                 va_end(vararg);
1409
1410                 std::ofstream file(fullName, std::ofstream::out);
1411
1412                 for(unsigned int i = 0; i < instruction.size(); i++)
1413                 {
1414                         file << instruction[i]->string(shaderType, version) << std::endl;
1415                 }
1416         }
1417
1418         void Shader::printInstruction(int index, const char *fileName) const
1419         {
1420                 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1421
1422                 file << instruction[index]->string(shaderType, version) << std::endl;
1423         }
1424
1425         void Shader::append(Instruction *instruction)
1426         {
1427                 this->instruction.push_back(instruction);
1428         }
1429
1430         void Shader::declareSampler(int i)
1431         {
1432                 usedSamplers |= 1 << i;
1433         }
1434
1435         const Shader::Instruction *Shader::getInstruction(unsigned int i) const
1436         {
1437                 ASSERT(i < instruction.size());
1438
1439                 return instruction[i];
1440         }
1441
1442         void Shader::optimize()
1443         {
1444                 optimizeLeave();
1445                 optimizeCall();
1446                 removeNull();
1447         }
1448
1449         void Shader::optimizeLeave()
1450         {
1451                 // A return (leave) right before the end of a function or the shader can be removed
1452                 for(unsigned int i = 0; i < instruction.size(); i++)
1453                 {
1454                         if(instruction[i]->opcode == OPCODE_LEAVE)
1455                         {
1456                                 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1457                                 {
1458                                         instruction[i]->opcode = OPCODE_NULL;
1459                                 }
1460                         }
1461                 }
1462         }
1463
1464         void Shader::optimizeCall()
1465         {
1466                 // Eliminate uncalled functions
1467                 std::set<int> calledFunctions;
1468                 bool rescan = true;
1469
1470                 while(rescan)
1471                 {
1472                         calledFunctions.clear();
1473                         rescan = false;
1474
1475                         for(unsigned int i = 0; i < instruction.size(); i++)
1476                         {
1477                                 if(instruction[i]->isCall())
1478                                 {
1479                                         calledFunctions.insert(instruction[i]->dst.label);
1480                                 }
1481                         }
1482
1483                         if(!calledFunctions.empty())
1484                         {
1485                                 for(unsigned int i = 0; i < instruction.size(); i++)
1486                                 {
1487                                         if(instruction[i]->opcode == OPCODE_LABEL)
1488                                         {
1489                                                 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1490                                                 {
1491                                                         for( ; i < instruction.size(); i++)
1492                                                         {
1493                                                                 Opcode oldOpcode = instruction[i]->opcode;
1494                                                                 instruction[i]->opcode = OPCODE_NULL;
1495
1496                                                                 if(oldOpcode == OPCODE_RET)
1497                                                                 {
1498                                                                         rescan = true;
1499                                                                         break;
1500                                                                 }
1501                                                         }
1502                                                 }
1503                                         }
1504                                 }
1505                         }
1506                 }
1507
1508                 // Optimize the entry call
1509                 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1510                 {
1511                         if(calledFunctions.size() == 1)
1512                         {
1513                                 instruction[0]->opcode = OPCODE_NULL;
1514                                 instruction[1]->opcode = OPCODE_NULL;
1515
1516                                 for(size_t i = 2; i < instruction.size(); i++)
1517                                 {
1518                                         if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1519                                         {
1520                                                 instruction[i]->opcode = OPCODE_NULL;
1521                                         }
1522                                 }
1523                         }
1524                 }
1525         }
1526
1527         void Shader::removeNull()
1528         {
1529                 size_t size = 0;
1530                 for(size_t i = 0; i < instruction.size(); i++)
1531                 {
1532                         if(instruction[i]->opcode != OPCODE_NULL)
1533                         {
1534                                 instruction[size] = instruction[i];
1535                                 size++;
1536                         }
1537                         else
1538                         {
1539                                 delete instruction[i];
1540                         }
1541                 }
1542
1543                 instruction.resize(size);
1544         }
1545
1546         void Shader::analyzeDirtyConstants()
1547         {
1548                 dirtyConstantsF = 0;
1549                 dirtyConstantsI = 0;
1550                 dirtyConstantsB = 0;
1551
1552                 for(unsigned int i = 0; i < instruction.size(); i++)
1553                 {
1554                         switch(instruction[i]->opcode)
1555                         {
1556                         case OPCODE_DEF:
1557                                 if(instruction[i]->dst.index + 1 > dirtyConstantsF)
1558                                 {
1559                                         dirtyConstantsF = instruction[i]->dst.index + 1;
1560                                 }
1561                                 break;
1562                         case OPCODE_DEFI:
1563                                 if(instruction[i]->dst.index + 1 > dirtyConstantsI)
1564                                 {
1565                                         dirtyConstantsI = instruction[i]->dst.index + 1;
1566                                 }
1567                                 break;
1568                         case OPCODE_DEFB:
1569                                 if(instruction[i]->dst.index + 1 > dirtyConstantsB)
1570                                 {
1571                                         dirtyConstantsB = instruction[i]->dst.index + 1;
1572                                 }
1573                                 break;
1574                         }
1575                 }
1576         }
1577
1578         void Shader::analyzeDynamicBranching()
1579         {
1580                 dynamicBranching = false;
1581                 containsLeave = false;
1582                 containsBreak = false;
1583                 containsContinue = false;
1584                 containsDefine = false;
1585
1586                 // Determine global presence of branching instructions
1587                 for(unsigned int i = 0; i < instruction.size(); i++)
1588                 {
1589                         switch(instruction[i]->opcode)
1590                         {
1591                         case OPCODE_CALLNZ:
1592                         case OPCODE_IF:
1593                         case OPCODE_IFC:
1594                         case OPCODE_BREAK:
1595                         case OPCODE_BREAKC:
1596                         case OPCODE_CMP:
1597                         case OPCODE_BREAKP:
1598                         case OPCODE_LEAVE:
1599                         case OPCODE_CONTINUE:
1600                                 if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL)
1601                                 {
1602                                         dynamicBranching = true;
1603                                 }
1604
1605                                 if(instruction[i]->opcode == OPCODE_LEAVE)
1606                                 {
1607                                         containsLeave = true;
1608                                 }
1609                                 
1610                                 if(instruction[i]->isBreak())
1611                                 {
1612                                         containsBreak = true;
1613                                 }
1614
1615                                 if(instruction[i]->opcode == OPCODE_CONTINUE)
1616                                 {
1617                                         containsContinue = true;
1618                                 }
1619                         case OPCODE_DEF:
1620                         case OPCODE_DEFB:
1621                         case OPCODE_DEFI:
1622                                 containsDefine = true;
1623                         }
1624                 }
1625
1626                 // Conservatively determine which instructions are affected by dynamic branching
1627                 int branchDepth = 0;
1628                 int breakDepth = 0;
1629                 int continueDepth = 0;
1630                 bool leaveReturn = false;
1631
1632                 for(unsigned int i = 0; i < instruction.size(); i++)
1633                 {
1634                         // If statements
1635                         if(instruction[i]->isBranch())
1636                         {
1637                                 branchDepth++;
1638                         }
1639                         else if(instruction[i]->opcode == OPCODE_ENDIF)
1640                         {
1641                                 branchDepth--;
1642                         }
1643
1644                         if(branchDepth > 0)
1645                         {
1646                                 instruction[i]->analysisBranch = true;
1647
1648                                 if(instruction[i]->isCall())
1649                                 {
1650                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1651                                 }
1652                         }
1653
1654                         // Break statemement
1655                         if(instruction[i]->isBreak())
1656                         {
1657                                 breakDepth++;
1658                         }
1659
1660                         if(breakDepth > 0)
1661                         {
1662                                 if(instruction[i]->isLoop())   // Nested loop, don't make the end of it disable the break execution mask
1663                                 {
1664                                         breakDepth++;
1665                                 }
1666                                 else if(instruction[i]->isEndLoop())
1667                                 {
1668                                         breakDepth--;
1669                                 }
1670
1671                                 instruction[i]->analysisBreak = true;
1672
1673                                 if(instruction[i]->isCall())
1674                                 {
1675                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1676                                 }
1677                         }
1678
1679                         // Continue statement
1680                         if(instruction[i]->opcode == OPCODE_CONTINUE)
1681                         {
1682                                 continueDepth++;
1683                         }
1684
1685                         if(continueDepth > 0)
1686                         {
1687                                 if(instruction[i]->isLoop())   // Nested loop, don't make the end of it disable the break execution mask
1688                                 {
1689                                         continueDepth++;
1690                                 }
1691                                 else if(instruction[i]->isEndLoop())
1692                                 {
1693                                         continueDepth--;
1694                                 }
1695
1696                                 instruction[i]->analysisContinue = true;
1697
1698                                 if(instruction[i]->isCall())
1699                                 {
1700                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1701                                 }
1702                         }
1703
1704                         // Return (leave) statement
1705                         if(instruction[i]->opcode == OPCODE_LEAVE)
1706                         {
1707                                 leaveReturn = true;
1708                         }
1709                         else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1710                         {
1711                                 leaveReturn = false;
1712                         }
1713
1714                         if(leaveReturn)
1715                         {
1716                                 instruction[i]->analysisLeave = true;
1717
1718                                 if(instruction[i]->isCall())
1719                                 {
1720                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1721                                 }
1722                         }
1723                 }
1724         }
1725
1726         void Shader::markFunctionAnalysis(int functionLabel, Analysis flag)
1727         {
1728                 bool marker = false;
1729                 for(unsigned int i = 0; i < instruction.size(); i++)
1730                 {
1731                         if(!marker)
1732                         {
1733                                 if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel)
1734                                 {
1735                                         marker = true;
1736                                 }
1737                         }
1738                         else
1739                         {
1740                                 if(instruction[i]->opcode == OPCODE_RET)
1741                                 {
1742                                         break;
1743                                 }
1744                                 else if(instruction[i]->isCall())
1745                                 {
1746                                         markFunctionAnalysis(instruction[i]->dst.label, flag);
1747                                 }
1748
1749                                 instruction[i]->analysis |= flag;
1750                         }
1751                 }
1752         }
1753
1754         void Shader::analyzeSamplers()
1755         {
1756                 for(unsigned int i = 0; i < instruction.size(); i++)
1757                 {
1758                         switch(instruction[i]->opcode)
1759                         {
1760                         case OPCODE_TEX:
1761                         case OPCODE_TEXBEM:
1762                         case OPCODE_TEXBEML:
1763                         case OPCODE_TEXREG2AR:
1764                         case OPCODE_TEXREG2GB:
1765                         case OPCODE_TEXM3X2TEX:
1766                         case OPCODE_TEXM3X3TEX:
1767                         case OPCODE_TEXM3X3SPEC:
1768                         case OPCODE_TEXM3X3VSPEC:
1769                         case OPCODE_TEXREG2RGB:
1770                         case OPCODE_TEXDP3TEX:
1771                         case OPCODE_TEXM3X2DEPTH:
1772                         case OPCODE_TEXLDD:
1773                         case OPCODE_TEXLDL:
1774                                 {
1775                                         Parameter &dst = instruction[i]->dst;
1776                                         Parameter &src1 = instruction[i]->src[1];
1777
1778                                         if(majorVersion >= 2)
1779                                         {
1780                                                 usedSamplers |= 1 << src1.index;
1781                                         }
1782                                         else
1783                                         {
1784                                                 usedSamplers |= 1 << dst.index;
1785                                         }
1786                                 }
1787                                 break;
1788                         }
1789                 }
1790         }
1791
1792         // Assigns a unique index to each call instruction, on a per label basis.
1793         // This is used to know what basic block to return to.
1794         void Shader::analyzeCallSites()
1795         {
1796                 int callSiteIndex[2048] = {0};
1797
1798                 for(unsigned int i = 0; i < instruction.size(); i++)
1799                 {
1800                         if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ)
1801                         {
1802                                 int label = instruction[i]->dst.label;
1803
1804                                 instruction[i]->dst.callSite = callSiteIndex[label]++;
1805                         }
1806                 }
1807         }
1808
1809         void Shader::analyzeDynamicIndexing()
1810         {
1811                 dynamicallyIndexedTemporaries = false;
1812                 dynamicallyIndexedInput = false;
1813                 dynamicallyIndexedOutput = false;
1814
1815                 for(unsigned int i = 0; i < instruction.size(); i++)
1816                 {
1817                         if(instruction[i]->dst.rel.type == PARAMETER_ADDR ||
1818                            instruction[i]->dst.rel.type == PARAMETER_LOOP ||
1819                            instruction[i]->dst.rel.type == PARAMETER_TEMP ||
1820                            instruction[i]->dst.rel.type == PARAMETER_CONST)
1821                         {
1822                                 switch(instruction[i]->dst.type)
1823                                 {
1824                                 case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1825                                 case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1826                                 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1827                                 }
1828                         }
1829
1830                         for(int j = 0; j < 3; j++)
1831                         {
1832                                 if(instruction[i]->src[j].rel.type == PARAMETER_ADDR ||
1833                                    instruction[i]->src[j].rel.type == PARAMETER_LOOP ||
1834                                    instruction[i]->src[j].rel.type == PARAMETER_TEMP ||
1835                                    instruction[i]->src[j].rel.type == PARAMETER_CONST)
1836                                 {
1837                                         switch(instruction[i]->src[j].type)
1838                                         {
1839                                         case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1840                                         case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1841                                         case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1842                                         }
1843                                 }
1844                         }
1845                 }
1846         }
1847 }