OSDN Git Service

Fixed -1 to implicitly unsigned char narrowing error.
[android-x86/external-swiftshader.git] / src / Shader / Shader.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Shader.hpp"
16
17 #include "VertexShader.hpp"
18 #include "PixelShader.hpp"
19 #include "Math.hpp"
20 #include "Debug.hpp"
21
22 #include <set>
23 #include <fstream>
24 #include <sstream>
25 #include <stdarg.h>
26
27 namespace sw
28 {
29         volatile int Shader::serialCounter = 1;
30
31         Shader::Opcode Shader::OPCODE_DP(int i)
32         {
33                 switch(i)
34                 {
35                 default: ASSERT(false);
36                 case 1: return OPCODE_DP1;
37                 case 2: return OPCODE_DP2;
38                 case 3: return OPCODE_DP3;
39                 case 4: return OPCODE_DP4;
40                 }
41         }
42
43         Shader::Opcode Shader::OPCODE_LEN(int i)
44         {
45                 switch(i)
46                 {
47                 default: ASSERT(false);
48                 case 1: return OPCODE_ABS;
49                 case 2: return OPCODE_LEN2;
50                 case 3: return OPCODE_LEN3;
51                 case 4: return OPCODE_LEN4;
52                 }
53         }
54
55         Shader::Opcode Shader::OPCODE_DIST(int i)
56         {
57                 switch(i)
58                 {
59                 default: ASSERT(false);
60                 case 1: return OPCODE_DIST1;
61                 case 2: return OPCODE_DIST2;
62                 case 3: return OPCODE_DIST3;
63                 case 4: return OPCODE_DIST4;
64                 }
65         }
66
67         Shader::Opcode Shader::OPCODE_NRM(int i)
68         {
69                 switch(i)
70                 {
71                 default: ASSERT(false);
72                 case 1: return OPCODE_SGN;
73                 case 2: return OPCODE_NRM2;
74                 case 3: return OPCODE_NRM3;
75                 case 4: return OPCODE_NRM4;
76                 }
77         }
78
79         Shader::Opcode Shader::OPCODE_FORWARD(int i)
80         {
81                 switch(i)
82                 {
83                 default: ASSERT(false);
84                 case 1: return OPCODE_FORWARD1;
85                 case 2: return OPCODE_FORWARD2;
86                 case 3: return OPCODE_FORWARD3;
87                 case 4: return OPCODE_FORWARD4;
88                 }
89         }
90
91         Shader::Opcode Shader::OPCODE_REFLECT(int i)
92         {
93                 switch(i)
94                 {
95                 default: ASSERT(false);
96                 case 1: return OPCODE_REFLECT1;
97                 case 2: return OPCODE_REFLECT2;
98                 case 3: return OPCODE_REFLECT3;
99                 case 4: return OPCODE_REFLECT4;
100                 }
101         }
102
103         Shader::Opcode Shader::OPCODE_REFRACT(int i)
104         {
105                 switch(i)
106                 {
107                 default: ASSERT(false);
108                 case 1: return OPCODE_REFRACT1;
109                 case 2: return OPCODE_REFRACT2;
110                 case 3: return OPCODE_REFRACT3;
111                 case 4: return OPCODE_REFRACT4;
112                 }
113         }
114
115         Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
116         {
117                 control = CONTROL_RESERVED0;
118
119                 predicate = false;
120                 predicateNot = false;
121                 predicateSwizzle = 0xE4;
122
123                 coissue = false;
124                 samplerType = SAMPLER_UNKNOWN;
125                 usage = USAGE_POSITION;
126                 usageIndex = 0;
127         }
128
129         Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
130         {
131                 parseOperationToken(*token++, majorVersion);
132
133                 samplerType = SAMPLER_UNKNOWN;
134                 usage = USAGE_POSITION;
135                 usageIndex = 0;
136
137                 if(opcode == OPCODE_IF ||
138                    opcode == OPCODE_IFC ||
139                    opcode == OPCODE_LOOP ||
140                    opcode == OPCODE_REP ||
141                    opcode == OPCODE_BREAKC ||
142                    opcode == OPCODE_BREAKP)   // No destination operand
143                 {
144                         if(size > 0) parseSourceToken(0, token++, majorVersion);
145                         if(size > 1) parseSourceToken(1, token++, majorVersion);
146                         if(size > 2) parseSourceToken(2, token++, majorVersion);
147                         if(size > 3) ASSERT(false);
148                 }
149                 else if(opcode == OPCODE_DCL)
150                 {
151                         parseDeclarationToken(*token++);
152                         parseDestinationToken(token++, majorVersion);
153                 }
154                 else
155                 {
156                         if(size > 0)
157                         {
158                                 parseDestinationToken(token, majorVersion);
159
160                                 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
161                                 {
162                                         token++;
163                                         size--;
164                                 }
165
166                                 token++;
167                                 size--;
168                         }
169
170                         if(predicate)
171                         {
172                                 ASSERT(size != 0);
173
174                                 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
175                                 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
176
177                                 token++;
178                                 size--;
179                         }
180
181                         for(int i = 0; size > 0; i++)
182                         {
183                                 parseSourceToken(i, token, majorVersion);
184
185                                 token++;
186                                 size--;
187
188                                 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
189                                 {
190                                         token++;
191                                         size--;
192                                 }
193                         }
194                 }
195         }
196
197         Shader::Instruction::~Instruction()
198         {
199         }
200
201         std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
202         {
203                 std::string instructionString;
204
205                 if(opcode != OPCODE_DCL)
206                 {
207                         instructionString += coissue ? "+ " : "";
208
209                         if(predicate)
210                         {
211                                 instructionString += predicateNot ? "(!p0" : "(p0";
212                                 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
213                                 instructionString += ") ";
214                         }
215
216                         instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
217
218                         if(dst.type != PARAMETER_VOID)
219                         {
220                                 instructionString += " " + dst.string(shaderType, version) +
221                                                            dst.relativeString() +
222                                                            dst.maskString();
223                         }
224
225                         for(int i = 0; i < 4; i++)
226                         {
227                                 if(src[i].type != PARAMETER_VOID)
228                                 {
229                                         instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
230                                         instructionString += src[i].preModifierString() +
231                                                                                  src[i].string(shaderType, version) +
232                                                                                  src[i].relativeString() +
233                                                                                  src[i].postModifierString() +
234                                                                                  src[i].swizzleString();
235                                 }
236                         }
237                 }
238                 else   // DCL
239                 {
240                         instructionString += "dcl";
241
242                         if(dst.type == PARAMETER_SAMPLER)
243                         {
244                                 switch(samplerType)
245                                 {
246                                 case SAMPLER_UNKNOWN: instructionString += " ";        break;
247                                 case SAMPLER_1D:      instructionString += "_1d ";     break;
248                                 case SAMPLER_2D:      instructionString += "_2d ";     break;
249                                 case SAMPLER_CUBE:    instructionString += "_cube ";   break;
250                                 case SAMPLER_VOLUME:  instructionString += "_volume "; break;
251                                 default:
252                                         ASSERT(false);
253                                 }
254
255                                 instructionString += dst.string(shaderType, version);
256                         }
257                         else if(dst.type == PARAMETER_INPUT ||
258                                     dst.type == PARAMETER_OUTPUT ||
259                                     dst.type == PARAMETER_TEXTURE)
260                         {
261                                 if(version >= 0x0300)
262                                 {
263                                         switch(usage)
264                                         {
265                                         case USAGE_POSITION:     instructionString += "_position";     break;
266                                         case USAGE_BLENDWEIGHT:  instructionString += "_blendweight";  break;
267                                         case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
268                                         case USAGE_NORMAL:       instructionString += "_normal";       break;
269                                         case USAGE_PSIZE:        instructionString += "_psize";        break;
270                                         case USAGE_TEXCOORD:     instructionString += "_texcoord";     break;
271                                         case USAGE_TANGENT:      instructionString += "_tangent";      break;
272                                         case USAGE_BINORMAL:     instructionString += "_binormal";     break;
273                                         case USAGE_TESSFACTOR:   instructionString += "_tessfactor";   break;
274                                         case USAGE_POSITIONT:    instructionString += "_positiont";    break;
275                                         case USAGE_COLOR:        instructionString += "_color";        break;
276                                         case USAGE_FOG:          instructionString += "_fog";          break;
277                                         case USAGE_DEPTH:        instructionString += "_depth";        break;
278                                         case USAGE_SAMPLE:       instructionString += "_sample";       break;
279                                         default:
280                                                 ASSERT(false);
281                                         }
282
283                                         if(usageIndex > 0)
284                                         {
285                                                 std::ostringstream buffer;
286
287                                                 buffer << (int)usageIndex;
288
289                                                 instructionString += buffer.str();
290                                         }
291                                 }
292                                 else ASSERT(dst.type != PARAMETER_OUTPUT);
293
294                                 instructionString += " ";
295
296                                 instructionString += dst.string(shaderType, version);
297                                 instructionString += dst.maskString();
298                         }
299                         else if(dst.type == PARAMETER_MISCTYPE)   // vPos and vFace
300                         {
301                                 instructionString += " ";
302
303                                 instructionString += dst.string(shaderType, version);
304                         }
305                         else ASSERT(false);
306                 }
307
308                 return instructionString;
309         }
310
311         std::string Shader::DestinationParameter::modifierString() const
312         {
313                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
314                 {
315                         return "";
316                 }
317
318                 std::string modifierString;
319
320                 if(integer)
321                 {
322                         modifierString += "_int";
323                 }
324
325                 if(saturate)
326                 {
327                         modifierString += "_sat";
328                 }
329
330                 if(partialPrecision)
331                 {
332                         modifierString += "_pp";
333                 }
334
335                 if(centroid)
336                 {
337                         modifierString += "_centroid";
338                 }
339
340                 return modifierString;
341         }
342
343         std::string Shader::DestinationParameter::shiftString() const
344         {
345                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
346                 {
347                         return "";
348                 }
349
350                 switch(shift)
351                 {
352                 case 0:         return "";
353                 case 1:         return "_x2";
354                 case 2:         return "_x4";
355                 case 3:         return "_x8";
356                 case -1:        return "_d2";
357                 case -2:        return "_d4";
358                 case -3:        return "_d8";
359                 default:
360                         return "";
361                 //      ASSERT(false);   // FIXME
362                 }
363         }
364
365         std::string Shader::DestinationParameter::maskString() const
366         {
367                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
368                 {
369                         return "";
370                 }
371
372                 switch(mask)
373                 {
374                 case 0x0:       return "";
375                 case 0x1:       return ".x";
376                 case 0x2:       return ".y";
377                 case 0x3:       return ".xy";
378                 case 0x4:       return ".z";
379                 case 0x5:       return ".xz";
380                 case 0x6:       return ".yz";
381                 case 0x7:       return ".xyz";
382                 case 0x8:       return ".w";
383                 case 0x9:       return ".xw";
384                 case 0xA:       return ".yw";
385                 case 0xB:       return ".xyw";
386                 case 0xC:       return ".zw";
387                 case 0xD:       return ".xzw";
388                 case 0xE:       return ".yzw";
389                 case 0xF:       return "";
390                 default:
391                         ASSERT(false);
392                 }
393
394                 return "";
395         }
396
397         std::string Shader::SourceParameter::preModifierString() const
398         {
399                 if(type == PARAMETER_VOID)
400                 {
401                         return "";
402                 }
403
404                 switch(modifier)
405                 {
406                 case MODIFIER_NONE:                     return "";
407                 case MODIFIER_NEGATE:           return "-";
408                 case MODIFIER_BIAS:                     return "";
409                 case MODIFIER_BIAS_NEGATE:      return "-";
410                 case MODIFIER_SIGN:                     return "";
411                 case MODIFIER_SIGN_NEGATE:      return "-";
412                 case MODIFIER_COMPLEMENT:       return "1-";
413                 case MODIFIER_X2:                       return "";
414                 case MODIFIER_X2_NEGATE:        return "-";
415                 case MODIFIER_DZ:                       return "";
416                 case MODIFIER_DW:                       return "";
417                 case MODIFIER_ABS:                      return "";
418                 case MODIFIER_ABS_NEGATE:       return "-";
419                 case MODIFIER_NOT:                      return "!";
420                 default:
421                         ASSERT(false);
422                 }
423
424                 return "";
425         }
426
427         std::string Shader::Parameter::relativeString() const
428         {
429                 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
430                 {
431                         if(rel.type == PARAMETER_VOID)
432                         {
433                                 return "";
434                         }
435                         else if(rel.type == PARAMETER_ADDR)
436                         {
437                                 switch(rel.swizzle & 0x03)
438                                 {
439                                 case 0: return "[a0.x]";
440                                 case 1: return "[a0.y]";
441                                 case 2: return "[a0.z]";
442                                 case 3: return "[a0.w]";
443                                 }
444                         }
445                         else if(rel.type == PARAMETER_TEMP)
446                         {
447                                 std::ostringstream buffer;
448                                 buffer << rel.index;
449
450                                 switch(rel.swizzle & 0x03)
451                                 {
452                                 case 0: return "[r" + buffer.str() + ".x]";
453                                 case 1: return "[r" + buffer.str() + ".y]";
454                                 case 2: return "[r" + buffer.str() + ".z]";
455                                 case 3: return "[r" + buffer.str() + ".w]";
456                                 }
457                         }
458                         else if(rel.type == PARAMETER_LOOP)
459                         {
460                                 return "[aL]";
461                         }
462                         else if(rel.type == PARAMETER_CONST)
463                         {
464                                 std::ostringstream buffer;
465                                 buffer << rel.index;
466
467                                 switch(rel.swizzle & 0x03)
468                                 {
469                                 case 0: return "[c" + buffer.str() + ".x]";
470                                 case 1: return "[c" + buffer.str() + ".y]";
471                                 case 2: return "[c" + buffer.str() + ".z]";
472                                 case 3: return "[c" + buffer.str() + ".w]";
473                                 }
474                         }
475                         else ASSERT(false);
476                 }
477
478                 return "";
479         }
480
481         std::string Shader::SourceParameter::postModifierString() const
482         {
483                 if(type == PARAMETER_VOID)
484                 {
485                         return "";
486                 }
487
488                 switch(modifier)
489                 {
490                 case MODIFIER_NONE:                     return "";
491                 case MODIFIER_NEGATE:           return "";
492                 case MODIFIER_BIAS:                     return "_bias";
493                 case MODIFIER_BIAS_NEGATE:      return "_bias";
494                 case MODIFIER_SIGN:                     return "_bx2";
495                 case MODIFIER_SIGN_NEGATE:      return "_bx2";
496                 case MODIFIER_COMPLEMENT:       return "";
497                 case MODIFIER_X2:                       return "_x2";
498                 case MODIFIER_X2_NEGATE:        return "_x2";
499                 case MODIFIER_DZ:                       return "_dz";
500                 case MODIFIER_DW:                       return "_dw";
501                 case MODIFIER_ABS:                      return "_abs";
502                 case MODIFIER_ABS_NEGATE:       return "_abs";
503                 case MODIFIER_NOT:                      return "";
504                 default:
505                         ASSERT(false);
506                 }
507
508                 return "";
509         }
510
511         std::string Shader::SourceParameter::swizzleString() const
512         {
513                 return Instruction::swizzleString(type, swizzle);
514         }
515
516         void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
517         {
518                 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000)   // Version token
519                 {
520                         opcode = (Opcode)token;
521
522                         control = CONTROL_RESERVED0;
523                         predicate = false;
524                         coissue = false;
525                 }
526                 else
527                 {
528                         opcode = (Opcode)(token & 0x0000FFFF);
529                         control = (Control)((token & 0x00FF0000) >> 16);
530
531                         int size = (token & 0x0F000000) >> 24;
532
533                         predicate = (token & 0x10000000) != 0x00000000;
534                         coissue = (token & 0x40000000) != 0x00000000;
535
536                         if(majorVersion < 2)
537                         {
538                                 if(size != 0)
539                                 {
540                                         ASSERT(false);   // Reserved
541                                 }
542                         }
543
544                         if(majorVersion < 2)
545                         {
546                                 if(predicate)
547                                 {
548                                         ASSERT(false);
549                                 }
550                         }
551
552                         if((token & 0x20000000) != 0x00000000)
553                         {
554                                 ASSERT(false);   // Reserved
555                         }
556
557                         if(majorVersion >= 2)
558                         {
559                                 if(coissue)
560                                 {
561                                         ASSERT(false);   // Reserved
562                                 }
563                         }
564
565                         if((token & 0x80000000) != 0x00000000)
566                         {
567                                 ASSERT(false);
568                         }
569                 }
570         }
571
572         void Shader::Instruction::parseDeclarationToken(unsigned long token)
573         {
574                 samplerType = (SamplerType)((token & 0x78000000) >> 27);
575                 usage = (Usage)(token & 0x0000001F);
576                 usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
577         }
578
579         void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
580         {
581                 dst.index = (unsigned short)(token[0] & 0x000007FF);
582                 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
583
584                 // TODO: Check type and index range
585
586                 bool relative = (token[0] & 0x00002000) != 0x00000000;
587                 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
588                 dst.rel.swizzle = 0x00;
589                 dst.rel.scale = 1;
590
591                 if(relative && majorVersion >= 3)
592                 {
593                         dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
594                         dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
595                 }
596                 else if(relative) ASSERT(false);   // Reserved
597
598                 if((token[0] & 0x0000C000) != 0x00000000)
599                 {
600                         ASSERT(false);   // Reserved
601                 }
602
603                 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
604                 dst.saturate = (token[0] & 0x00100000) != 0;
605                 dst.partialPrecision = (token[0] & 0x00200000) != 0;
606                 dst.centroid = (token[0] & 0x00400000) != 0;
607                 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
608
609                 if(majorVersion >= 2)
610                 {
611                         if(dst.shift)
612                         {
613                                 ASSERT(false);   // Reserved
614                         }
615                 }
616
617                 if((token[0] & 0x80000000) != 0x80000000)
618                 {
619                         ASSERT(false);
620                 }
621         }
622
623         void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
624         {
625                 // Defaults
626                 src[i].index = 0;
627                 src[i].type = PARAMETER_VOID;
628                 src[i].modifier = MODIFIER_NONE;
629                 src[i].swizzle = 0xE4;
630                 src[i].rel.type = PARAMETER_VOID;
631                 src[i].rel.swizzle = 0x00;
632                 src[i].rel.scale = 1;
633
634                 switch(opcode)
635                 {
636                 case OPCODE_DEF:
637                         src[0].type = PARAMETER_FLOAT4LITERAL;
638                         src[0].value[i] = *(float*)token;
639                         break;
640                 case OPCODE_DEFB:
641                         src[0].type = PARAMETER_BOOL1LITERAL;
642                         src[0].boolean[0] = *(int*)token;
643                         break;
644                 case OPCODE_DEFI:
645                         src[0].type = PARAMETER_INT4LITERAL;
646                         src[0].integer[i] = *(int*)token;
647                         break;
648                 default:
649                         src[i].index = (unsigned short)(token[0] & 0x000007FF);
650                         src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
651
652                         // FIXME: Check type and index range
653
654                         bool relative = (token[0] & 0x00002000) != 0x00000000;
655                         src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
656
657                         if((token[0] & 0x0000C000) != 0x00000000)
658                         {
659                                 if(opcode != OPCODE_DEF &&
660                                    opcode != OPCODE_DEFI &&
661                                    opcode != OPCODE_DEFB)
662                                 {
663                                         ASSERT(false);
664                                 }
665                         }
666
667                         src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
668                         src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
669
670                         if((token[0] & 0x80000000) != 0x80000000)
671                         {
672                                 if(opcode != OPCODE_DEF &&
673                                    opcode != OPCODE_DEFI &&
674                                    opcode != OPCODE_DEFB)
675                                 {
676                                         ASSERT(false);
677                                 }
678                         }
679
680                         if(relative && majorVersion >= 2)
681                         {
682                                 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
683                                 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
684                         }
685                 }
686         }
687
688         std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
689         {
690                 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
691                 {
692                         return "";
693                 }
694
695                 int x = (swizzle & 0x03) >> 0;
696                 int y = (swizzle & 0x0C) >> 2;
697                 int z = (swizzle & 0x30) >> 4;
698                 int w = (swizzle & 0xC0) >> 6;
699
700                 std::string swizzleString = ".";
701
702                 switch(x)
703                 {
704                 case 0: swizzleString += "x"; break;
705                 case 1: swizzleString += "y"; break;
706                 case 2: swizzleString += "z"; break;
707                 case 3: swizzleString += "w"; break;
708                 }
709
710                 if(!(x == y && y == z && z == w))
711                 {
712                         switch(y)
713                         {
714                         case 0: swizzleString += "x"; break;
715                         case 1: swizzleString += "y"; break;
716                         case 2: swizzleString += "z"; break;
717                         case 3: swizzleString += "w"; break;
718                         }
719
720                         if(!(y == z && z == w))
721                         {
722                                 switch(z)
723                                 {
724                                 case 0: swizzleString += "x"; break;
725                                 case 1: swizzleString += "y"; break;
726                                 case 2: swizzleString += "z"; break;
727                                 case 3: swizzleString += "w"; break;
728                                 }
729
730                                 if(!(z == w))
731                                 {
732                                         switch(w)
733                                         {
734                                         case 0: swizzleString += "x"; break;
735                                         case 1: swizzleString += "y"; break;
736                                         case 2: swizzleString += "z"; break;
737                                         case 3: swizzleString += "w"; break;
738                                         }
739                                 }
740                         }
741                 }
742
743                 return swizzleString;
744         }
745
746         std::string Shader::Instruction::operationString(unsigned short version) const
747         {
748                 switch(opcode)
749                 {
750                 case OPCODE_NULL:                       return "null";
751                 case OPCODE_NOP:                        return "nop";
752                 case OPCODE_MOV:                        return "mov";
753                 case OPCODE_ADD:                        return "add";
754                 case OPCODE_IADD:                       return "iadd";
755                 case OPCODE_SUB:                        return "sub";
756                 case OPCODE_ISUB:                       return "isub";
757                 case OPCODE_MAD:                        return "mad";
758                 case OPCODE_IMAD:                       return "imad";
759                 case OPCODE_MUL:                        return "mul";
760                 case OPCODE_IMUL:                       return "imul";
761                 case OPCODE_RCPX:                       return "rcpx";
762                 case OPCODE_DIV:                        return "div";
763                 case OPCODE_IDIV:                       return "idiv";
764                 case OPCODE_UDIV:                       return "udiv";
765                 case OPCODE_MOD:                        return "mod";
766                 case OPCODE_IMOD:                       return "imod";
767                 case OPCODE_UMOD:                       return "umod";
768                 case OPCODE_SHL:                        return "shl";
769                 case OPCODE_ISHR:                       return "ishr";
770                 case OPCODE_USHR:                       return "ushr";
771                 case OPCODE_RSQX:                       return "rsqx";
772                 case OPCODE_SQRT:                       return "sqrt";
773                 case OPCODE_RSQ:                        return "rsq";
774                 case OPCODE_LEN2:                       return "len2";
775                 case OPCODE_LEN3:                       return "len3";
776                 case OPCODE_LEN4:                       return "len4";
777                 case OPCODE_DIST1:                      return "dist1";
778                 case OPCODE_DIST2:                      return "dist2";
779                 case OPCODE_DIST3:                      return "dist3";
780                 case OPCODE_DIST4:                      return "dist4";
781                 case OPCODE_DP3:                        return "dp3";
782                 case OPCODE_DP4:                        return "dp4";
783                 case OPCODE_DET2:                       return "det2";
784                 case OPCODE_DET3:                       return "det3";
785                 case OPCODE_DET4:                       return "det4";
786                 case OPCODE_MIN:                        return "min";
787                 case OPCODE_IMIN:                       return "imin";
788                 case OPCODE_UMIN:                       return "umin";
789                 case OPCODE_MAX:                        return "max";
790                 case OPCODE_IMAX:                       return "imax";
791                 case OPCODE_UMAX:                       return "umax";
792                 case OPCODE_SLT:                        return "slt";
793                 case OPCODE_SGE:                        return "sge";
794                 case OPCODE_EXP2X:                      return "exp2x";
795                 case OPCODE_LOG2X:                      return "log2x";
796                 case OPCODE_LIT:                        return "lit";
797                 case OPCODE_ATT:                        return "att";
798                 case OPCODE_LRP:                        return "lrp";
799                 case OPCODE_STEP:                       return "step";
800                 case OPCODE_SMOOTH:                     return "smooth";
801                 case OPCODE_FLOATBITSTOINT:      return "floatBitsToInt";
802                 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
803                 case OPCODE_INTBITSTOFLOAT:      return "intBitsToFloat";
804                 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
805                 case OPCODE_PACKSNORM2x16:       return "packSnorm2x16";
806                 case OPCODE_PACKUNORM2x16:       return "packUnorm2x16";
807                 case OPCODE_PACKHALF2x16:        return "packHalf2x16";
808                 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
809                 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
810                 case OPCODE_UNPACKHALF2x16:      return "unpackHalf2x16";
811                 case OPCODE_FRC:                        return "frc";
812                 case OPCODE_M4X4:                       return "m4x4";
813                 case OPCODE_M4X3:                       return "m4x3";
814                 case OPCODE_M3X4:                       return "m3x4";
815                 case OPCODE_M3X3:                       return "m3x3";
816                 case OPCODE_M3X2:                       return "m3x2";
817                 case OPCODE_CALL:                       return "call";
818                 case OPCODE_CALLNZ:                     return "callnz";
819                 case OPCODE_LOOP:                       return "loop";
820                 case OPCODE_RET:                        return "ret";
821                 case OPCODE_ENDLOOP:            return "endloop";
822                 case OPCODE_LABEL:                      return "label";
823                 case OPCODE_DCL:                        return "dcl";
824                 case OPCODE_POWX:                       return "powx";
825                 case OPCODE_CRS:                        return "crs";
826                 case OPCODE_SGN:                        return "sgn";
827                 case OPCODE_ISGN:                       return "isgn";
828                 case OPCODE_ABS:                        return "abs";
829                 case OPCODE_IABS:                       return "iabs";
830                 case OPCODE_NRM2:                       return "nrm2";
831                 case OPCODE_NRM3:                       return "nrm3";
832                 case OPCODE_NRM4:                       return "nrm4";
833                 case OPCODE_SINCOS:                     return "sincos";
834                 case OPCODE_REP:                        return "rep";
835                 case OPCODE_ENDREP:                     return "endrep";
836                 case OPCODE_IF:                         return "if";
837                 case OPCODE_IFC:                        return "ifc";
838                 case OPCODE_ELSE:                       return "else";
839                 case OPCODE_ENDIF:                      return "endif";
840                 case OPCODE_BREAK:                      return "break";
841                 case OPCODE_BREAKC:                     return "breakc";
842                 case OPCODE_MOVA:                       return "mova";
843                 case OPCODE_DEFB:                       return "defb";
844                 case OPCODE_DEFI:                       return "defi";
845                 case OPCODE_TEXCOORD:           return "texcoord";
846                 case OPCODE_TEXKILL:            return "texkill";
847                 case OPCODE_DISCARD:            return "discard";
848                 case OPCODE_TEX:
849                         if(version < 0x0104)    return "tex";
850                         else                                    return "texld";
851                 case OPCODE_TEXBEM:                     return "texbem";
852                 case OPCODE_TEXBEML:            return "texbeml";
853                 case OPCODE_TEXREG2AR:          return "texreg2ar";
854                 case OPCODE_TEXREG2GB:          return "texreg2gb";
855                 case OPCODE_TEXM3X2PAD:         return "texm3x2pad";
856                 case OPCODE_TEXM3X2TEX:         return "texm3x2tex";
857                 case OPCODE_TEXM3X3PAD:         return "texm3x3pad";
858                 case OPCODE_TEXM3X3TEX:         return "texm3x3tex";
859                 case OPCODE_RESERVED0:          return "reserved0";
860                 case OPCODE_TEXM3X3SPEC:        return "texm3x3spec";
861                 case OPCODE_TEXM3X3VSPEC:       return "texm3x3vspec";
862                 case OPCODE_EXPP:                       return "expp";
863                 case OPCODE_LOGP:                       return "logp";
864                 case OPCODE_CND:                        return "cnd";
865                 case OPCODE_DEF:                        return "def";
866                 case OPCODE_TEXREG2RGB:         return "texreg2rgb";
867                 case OPCODE_TEXDP3TEX:          return "texdp3tex";
868                 case OPCODE_TEXM3X2DEPTH:       return "texm3x2depth";
869                 case OPCODE_TEXDP3:                     return "texdp3";
870                 case OPCODE_TEXM3X3:            return "texm3x3";
871                 case OPCODE_TEXDEPTH:           return "texdepth";
872                 case OPCODE_CMP0:                       return "cmp0";
873                 case OPCODE_ICMP:                       return "icmp";
874                 case OPCODE_UCMP:                       return "ucmp";
875                 case OPCODE_SELECT:                     return "select";
876                 case OPCODE_EXTRACT:            return "extract";
877                 case OPCODE_INSERT:                     return "insert";
878                 case OPCODE_BEM:                        return "bem";
879                 case OPCODE_DP2ADD:                     return "dp2add";
880                 case OPCODE_DFDX:                       return "dFdx";
881                 case OPCODE_DFDY:                       return "dFdy";
882                 case OPCODE_FWIDTH:                     return "fwidth";
883                 case OPCODE_TEXLDD:                     return "texldd";
884                 case OPCODE_CMP:                        return "cmp";
885                 case OPCODE_TEXLDL:                     return "texldl";
886                 case OPCODE_TEXOFFSET:          return "texoffset";
887                 case OPCODE_TEXLDLOFFSET:       return "texldloffset";
888                 case OPCODE_TEXELFETCH:         return "texelfetch";
889                 case OPCODE_TEXELFETCHOFFSET: return "texelfetchoffset";
890                 case OPCODE_TEXGRAD:            return "texgrad";
891                 case OPCODE_TEXGRADOFFSET:      return "texgradoffset";
892                 case OPCODE_BREAKP:                     return "breakp";
893                 case OPCODE_TEXSIZE:        return "texsize";
894                 case OPCODE_PHASE:                      return "phase";
895                 case OPCODE_COMMENT:            return "comment";
896                 case OPCODE_END:                        return "end";
897                 case OPCODE_PS_1_0:                     return "ps_1_0";
898                 case OPCODE_PS_1_1:                     return "ps_1_1";
899                 case OPCODE_PS_1_2:                     return "ps_1_2";
900                 case OPCODE_PS_1_3:                     return "ps_1_3";
901                 case OPCODE_PS_1_4:                     return "ps_1_4";
902                 case OPCODE_PS_2_0:                     return "ps_2_0";
903                 case OPCODE_PS_2_x:                     return "ps_2_x";
904                 case OPCODE_PS_3_0:                     return "ps_3_0";
905                 case OPCODE_VS_1_0:                     return "vs_1_0";
906                 case OPCODE_VS_1_1:                     return "vs_1_1";
907                 case OPCODE_VS_2_0:                     return "vs_2_0";
908                 case OPCODE_VS_2_x:                     return "vs_2_x";
909                 case OPCODE_VS_2_sw:            return "vs_2_sw";
910                 case OPCODE_VS_3_0:                     return "vs_3_0";
911                 case OPCODE_VS_3_sw:            return "vs_3_sw";
912                 case OPCODE_WHILE:          return "while";
913                 case OPCODE_ENDWHILE:       return "endwhile";
914                 case OPCODE_COS:            return "cos";
915                 case OPCODE_SIN:            return "sin";
916                 case OPCODE_TAN:            return "tan";
917                 case OPCODE_ACOS:           return "acos";
918                 case OPCODE_ASIN:           return "asin";
919                 case OPCODE_ATAN:           return "atan";
920                 case OPCODE_ATAN2:          return "atan2";
921                 case OPCODE_COSH:           return "cosh";
922                 case OPCODE_SINH:           return "sinh";
923                 case OPCODE_TANH:           return "tanh";
924                 case OPCODE_ACOSH:          return "acosh";
925                 case OPCODE_ASINH:          return "asinh";
926                 case OPCODE_ATANH:          return "atanh";
927                 case OPCODE_DP1:            return "dp1";
928                 case OPCODE_DP2:            return "dp2";
929                 case OPCODE_TRUNC:          return "trunc";
930                 case OPCODE_FLOOR:          return "floor";
931                 case OPCODE_ROUND:          return "round";
932                 case OPCODE_ROUNDEVEN:      return "roundEven";
933                 case OPCODE_CEIL:           return "ceil";
934                 case OPCODE_EXP2:           return "exp2";
935                 case OPCODE_LOG2:           return "log2";
936                 case OPCODE_EXP:            return "exp";
937                 case OPCODE_LOG:            return "log";
938                 case OPCODE_POW:            return "pow";
939                 case OPCODE_F2B:            return "f2b";
940                 case OPCODE_B2F:            return "b2f";
941                 case OPCODE_F2I:            return "f2i";
942                 case OPCODE_I2F:            return "i2f";
943                 case OPCODE_F2U:            return "f2u";
944                 case OPCODE_U2F:            return "u2f";
945                 case OPCODE_B2I:            return "b2i";
946                 case OPCODE_I2B:            return "i2b";
947                 case OPCODE_ALL:            return "all";
948                 case OPCODE_ANY:            return "any";
949                 case OPCODE_NEG:            return "neg";
950                 case OPCODE_INEG:           return "ineg";
951                 case OPCODE_ISNAN:          return "isnan";
952                 case OPCODE_ISINF:          return "isinf";
953                 case OPCODE_NOT:            return "not";
954                 case OPCODE_OR:             return "or";
955                 case OPCODE_XOR:            return "xor";
956                 case OPCODE_AND:            return "and";
957                 case OPCODE_EQ:             return "eq";
958                 case OPCODE_NE:             return "neq";
959                 case OPCODE_FORWARD1:       return "forward1";
960                 case OPCODE_FORWARD2:       return "forward2";
961                 case OPCODE_FORWARD3:       return "forward3";
962                 case OPCODE_FORWARD4:       return "forward4";
963                 case OPCODE_REFLECT1:       return "reflect1";
964                 case OPCODE_REFLECT2:       return "reflect2";
965                 case OPCODE_REFLECT3:       return "reflect3";
966                 case OPCODE_REFLECT4:       return "reflect4";
967                 case OPCODE_REFRACT1:       return "refract1";
968                 case OPCODE_REFRACT2:       return "refract2";
969                 case OPCODE_REFRACT3:       return "refract3";
970                 case OPCODE_REFRACT4:       return "refract4";
971                 case OPCODE_LEAVE:          return "leave";
972                 case OPCODE_CONTINUE:       return "continue";
973                 case OPCODE_TEST:           return "test";
974                 case OPCODE_SWITCH:         return "switch";
975                 case OPCODE_ENDSWITCH:      return "endswitch";
976                 default:
977                         ASSERT(false);
978                 }
979
980                 return "<unknown>";
981         }
982
983         std::string Shader::Instruction::controlString() const
984         {
985                 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
986                 {
987                         if(project) return "p";
988
989                         if(bias) return "b";
990
991                         // FIXME: LOD
992                 }
993
994                 switch(control)
995                 {
996                 case 1: return "_gt";
997                 case 2: return "_eq";
998                 case 3: return "_ge";
999                 case 4: return "_lt";
1000                 case 5: return "_ne";
1001                 case 6: return "_le";
1002                 default:
1003                         return "";
1004                 //      ASSERT(false);   // FIXME
1005                 }
1006         }
1007
1008         std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
1009         {
1010                 std::ostringstream buffer;
1011
1012                 if(type == PARAMETER_FLOAT4LITERAL)
1013                 {
1014                         buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
1015
1016                         return buffer.str();
1017                 }
1018                 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
1019                 {
1020                         buffer << index;
1021
1022                         return typeString(shaderType, version) + buffer.str();
1023                 }
1024                 else
1025                 {
1026                         return typeString(shaderType, version);
1027                 }
1028         }
1029
1030         std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1031         {
1032                 switch(type)
1033                 {
1034                 case PARAMETER_TEMP:                    return "r";
1035                 case PARAMETER_INPUT:                   return "v";
1036                 case PARAMETER_CONST:                   return "c";
1037                 case PARAMETER_TEXTURE:
1038         //      case PARAMETER_ADDR:
1039                         if(shaderType == SHADER_PIXEL)  return "t";
1040                         else                                                    return "a0";
1041                 case PARAMETER_RASTOUT:
1042                         if(index == 0)              return "oPos";
1043                         else if(index == 1)         return "oFog";
1044                         else if(index == 2)         return "oPts";
1045                         else                        ASSERT(false);
1046                 case PARAMETER_ATTROUT:                 return "oD";
1047                 case PARAMETER_TEXCRDOUT:
1048         //      case PARAMETER_OUTPUT:                  return "";
1049                         if(version < 0x0300)            return "oT";
1050                         else                                            return "o";
1051                 case PARAMETER_CONSTINT:                return "i";
1052                 case PARAMETER_COLOROUT:                return "oC";
1053                 case PARAMETER_DEPTHOUT:                return "oDepth";
1054                 case PARAMETER_SAMPLER:                 return "s";
1055         //      case PARAMETER_CONST2:                  return "";
1056         //      case PARAMETER_CONST3:                  return "";
1057         //      case PARAMETER_CONST4:                  return "";
1058                 case PARAMETER_CONSTBOOL:               return "b";
1059                 case PARAMETER_LOOP:                    return "aL";
1060         //      case PARAMETER_TEMPFLOAT16:             return "";
1061                 case PARAMETER_MISCTYPE:
1062                         if(index == 0)                          return "vPos";
1063                         else if(index == 1)                     return "vFace";
1064                         else                                            ASSERT(false);
1065                 case PARAMETER_LABEL:                   return "l";
1066                 case PARAMETER_PREDICATE:               return "p0";
1067                 case PARAMETER_FLOAT4LITERAL:   return "";
1068                 case PARAMETER_BOOL1LITERAL:    return "";
1069                 case PARAMETER_INT4LITERAL:             return "";
1070         //      case PARAMETER_VOID:                    return "";
1071                 default:
1072                         ASSERT(false);
1073                 }
1074
1075                 return "";
1076         }
1077
1078         bool Shader::Instruction::isBranch() const
1079         {
1080                 return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1081         }
1082
1083         bool Shader::Instruction::isCall() const
1084         {
1085                 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1086         }
1087
1088         bool Shader::Instruction::isBreak() const
1089         {
1090                 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1091         }
1092
1093         bool Shader::Instruction::isLoop() const
1094         {
1095                 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1096         }
1097
1098         bool Shader::Instruction::isEndLoop() const
1099         {
1100                 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1101         }
1102
1103         bool Shader::Instruction::isPredicated() const
1104         {
1105                 return predicate ||
1106                        analysisBranch ||
1107                        analysisBreak ||
1108                        analysisContinue ||
1109                        analysisLeave;
1110         }
1111
1112         Shader::Shader() : serialID(serialCounter++)
1113         {
1114                 usedSamplers = 0;
1115         }
1116
1117         Shader::~Shader()
1118         {
1119                 for(unsigned int i = 0; i < instruction.size(); i++)
1120                 {
1121                         delete instruction[i];
1122                         instruction[i] = 0;
1123                 }
1124         }
1125
1126         void Shader::parse(const unsigned long *token)
1127         {
1128                 minorVersion = (unsigned char)(token[0] & 0x000000FF);
1129                 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1130                 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1131
1132                 int length = 0;
1133
1134                 if(shaderType == SHADER_VERTEX)
1135                 {
1136                         length = VertexShader::validate(token);
1137                 }
1138                 else if(shaderType == SHADER_PIXEL)
1139                 {
1140                         length = PixelShader::validate(token);
1141                 }
1142                 else ASSERT(false);
1143
1144                 ASSERT(length != 0);
1145                 instruction.resize(length);
1146
1147                 for(int i = 0; i < length; i++)
1148                 {
1149                         while((*token & 0x0000FFFF) == 0x0000FFFE)   // Comment token
1150                         {
1151                                 int length = (*token & 0x7FFF0000) >> 16;
1152
1153                                 token += length + 1;
1154                         }
1155
1156                         int tokenCount = size(*token);
1157
1158                         instruction[i] = new Instruction(token, tokenCount, majorVersion);
1159
1160                         token += 1 + tokenCount;
1161                 }
1162         }
1163
1164         int Shader::size(unsigned long opcode) const
1165         {
1166                 return size(opcode, version);
1167         }
1168
1169         int Shader::size(unsigned long opcode, unsigned short version)
1170         {
1171                 if(version > 0x0300)
1172                 {
1173                         ASSERT(false);
1174                 }
1175
1176                 static const signed char size[] =
1177                 {
1178                         0,   // NOP = 0
1179                         2,   // MOV
1180                         3,   // ADD
1181                         3,   // SUB
1182                         4,   // MAD
1183                         3,   // MUL
1184                         2,   // RCP
1185                         2,   // RSQ
1186                         3,   // DP3
1187                         3,   // DP4
1188                         3,   // MIN
1189                         3,   // MAX
1190                         3,   // SLT
1191                         3,   // SGE
1192                         2,   // EXP
1193                         2,   // LOG
1194                         2,   // LIT
1195                         3,   // DST
1196                         4,   // LRP
1197                         2,   // FRC
1198                         3,   // M4x4
1199                         3,   // M4x3
1200                         3,   // M3x4
1201                         3,   // M3x3
1202                         3,   // M3x2
1203                         1,   // CALL
1204                         2,   // CALLNZ
1205                         2,   // LOOP
1206                         0,   // RET
1207                         0,   // ENDLOOP
1208                         1,   // LABEL
1209                         2,   // DCL
1210                         3,   // POW
1211                         3,   // CRS
1212                         4,   // SGN
1213                         2,   // ABS
1214                         2,   // NRM
1215                         4,   // SINCOS
1216                         1,   // REP
1217                         0,   // ENDREP
1218                         1,   // IF
1219                         2,   // IFC
1220                         0,   // ELSE
1221                         0,   // ENDIF
1222                         0,   // BREAK
1223                         2,   // BREAKC
1224                         2,   // MOVA
1225                         2,   // DEFB
1226                         5,   // DEFI
1227                         -1,  // 49
1228                         -1,  // 50
1229                         -1,  // 51
1230                         -1,  // 52
1231                         -1,  // 53
1232                         -1,  // 54
1233                         -1,  // 55
1234                         -1,  // 56
1235                         -1,  // 57
1236                         -1,  // 58
1237                         -1,  // 59
1238                         -1,  // 60
1239                         -1,  // 61
1240                         -1,  // 62
1241                         -1,  // 63
1242                         1,   // TEXCOORD = 64
1243                         1,   // TEXKILL
1244                         1,   // TEX
1245                         2,   // TEXBEM
1246                         2,   // TEXBEML
1247                         2,   // TEXREG2AR
1248                         2,   // TEXREG2GB
1249                         2,   // TEXM3x2PAD
1250                         2,   // TEXM3x2TEX
1251                         2,   // TEXM3x3PAD
1252                         2,   // TEXM3x3TEX
1253                         -1,  // RESERVED0
1254                         3,   // TEXM3x3SPEC
1255                         2,   // TEXM3x3VSPEC
1256                         2,   // EXPP
1257                         2,   // LOGP
1258                         4,   // CND
1259                         5,   // DEF
1260                         2,   // TEXREG2RGB
1261                         2,   // TEXDP3TEX
1262                         2,   // TEXM3x2DEPTH
1263                         2,   // TEXDP3
1264                         2,   // TEXM3x3
1265                         1,   // TEXDEPTH
1266                         4,   // CMP
1267                         3,   // BEM
1268                         4,   // DP2ADD
1269                         2,   // DSX
1270                         2,   // DSY
1271                         5,   // TEXLDD
1272                         3,   // SETP
1273                         3,   // TEXLDL
1274                         2,   // BREAKP
1275                         -1,  // 97
1276                         -1,  // 98
1277                         -1,  // 99
1278                         -1,  // 100
1279                         -1,  // 101
1280                         -1,  // 102
1281                         -1,  // 103
1282                         -1,  // 104
1283                         -1,  // 105
1284                         -1,  // 106
1285                         -1,  // 107
1286                         -1,  // 108
1287                         -1,  // 109
1288                         -1,  // 110
1289                         -1,  // 111
1290                         -1,  // 112
1291                 };
1292
1293                 int length = 0;
1294
1295                 if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1296                 {
1297                         return (opcode & 0x7FFF0000) >> 16;
1298                 }
1299
1300                 if(opcode != OPCODE_PS_1_0 &&
1301                    opcode != OPCODE_PS_1_1 &&
1302                    opcode != OPCODE_PS_1_2 &&
1303                    opcode != OPCODE_PS_1_3 &&
1304                    opcode != OPCODE_PS_1_4 &&
1305                    opcode != OPCODE_PS_2_0 &&
1306                    opcode != OPCODE_PS_2_x &&
1307                    opcode != OPCODE_PS_3_0 &&
1308                    opcode != OPCODE_VS_1_0 &&
1309                    opcode != OPCODE_VS_1_1 &&
1310                    opcode != OPCODE_VS_2_0 &&
1311                    opcode != OPCODE_VS_2_x &&
1312                    opcode != OPCODE_VS_2_sw &&
1313                    opcode != OPCODE_VS_3_0 &&
1314                    opcode != OPCODE_VS_3_sw &&
1315                    opcode != OPCODE_PHASE &&
1316                    opcode != OPCODE_END)
1317                 {
1318                         if(version >= 0x0200)
1319                         {
1320                                 length = (opcode & 0x0F000000) >> 24;
1321                         }
1322                         else
1323                         {
1324                                 length = size[opcode & 0x0000FFFF];
1325                         }
1326                 }
1327
1328                 if(length < 0)
1329                 {
1330                         ASSERT(false);
1331                 }
1332
1333                 if(version == 0x0104)
1334                 {
1335                         switch(opcode & 0x0000FFFF)
1336                         {
1337                         case OPCODE_TEX:
1338                                 length += 1;
1339                                 break;
1340                         case OPCODE_TEXCOORD:
1341                                 length += 1;
1342                                 break;
1343                         default:
1344                                 break;
1345                         }
1346                 }
1347
1348                 return length;
1349         }
1350
1351         bool Shader::maskContainsComponent(int mask, int component)
1352         {
1353                 return (mask & (1 << component)) != 0;
1354         }
1355
1356         bool Shader::swizzleContainsComponent(int swizzle, int component)
1357         {
1358                 if((swizzle & 0x03) >> 0 == component) return true;
1359                 if((swizzle & 0x0C) >> 2 == component) return true;
1360                 if((swizzle & 0x30) >> 4 == component) return true;
1361                 if((swizzle & 0xC0) >> 6 == component) return true;
1362
1363                 return false;
1364         }
1365
1366         bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1367         {
1368                 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1369                 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1370                 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1371                 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1372
1373                 return false;
1374         }
1375
1376         bool Shader::containsDynamicBranching() const
1377         {
1378                 return dynamicBranching;
1379         }
1380
1381         bool Shader::containsBreakInstruction() const
1382         {
1383                 return containsBreak;
1384         }
1385
1386         bool Shader::containsContinueInstruction() const
1387         {
1388                 return containsContinue;
1389         }
1390
1391         bool Shader::containsLeaveInstruction() const
1392         {
1393                 return containsLeave;
1394         }
1395
1396         bool Shader::containsDefineInstruction() const
1397         {
1398                 return containsDefine;
1399         }
1400
1401         bool Shader::usesSampler(int index) const
1402         {
1403                 return (usedSamplers & (1 << index)) != 0;
1404         }
1405
1406         int Shader::getSerialID() const
1407         {
1408                 return serialID;
1409         }
1410
1411         size_t Shader::getLength() const
1412         {
1413                 return instruction.size();
1414         }
1415
1416         Shader::ShaderType Shader::getShaderType() const
1417         {
1418                 return shaderType;
1419         }
1420
1421         unsigned short Shader::getVersion() const
1422         {
1423                 return version;
1424         }
1425
1426         void Shader::print(const char *fileName, ...) const
1427         {
1428                 char fullName[1024 + 1];
1429
1430                 va_list vararg;
1431                 va_start(vararg, fileName);
1432                 vsnprintf(fullName, 1024, fileName, vararg);
1433                 va_end(vararg);
1434
1435                 std::ofstream file(fullName, std::ofstream::out);
1436
1437                 for(unsigned int i = 0; i < instruction.size(); i++)
1438                 {
1439                         file << instruction[i]->string(shaderType, version) << std::endl;
1440                 }
1441         }
1442
1443         void Shader::printInstruction(int index, const char *fileName) const
1444         {
1445                 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1446
1447                 file << instruction[index]->string(shaderType, version) << std::endl;
1448         }
1449
1450         void Shader::append(Instruction *instruction)
1451         {
1452                 this->instruction.push_back(instruction);
1453         }
1454
1455         void Shader::declareSampler(int i)
1456         {
1457                 usedSamplers |= 1 << i;
1458         }
1459
1460         const Shader::Instruction *Shader::getInstruction(size_t i) const
1461         {
1462                 ASSERT(i < instruction.size());
1463
1464                 return instruction[i];
1465         }
1466
1467         void Shader::optimize()
1468         {
1469                 optimizeLeave();
1470                 optimizeCall();
1471                 removeNull();
1472         }
1473
1474         void Shader::optimizeLeave()
1475         {
1476                 // A return (leave) right before the end of a function or the shader can be removed
1477                 for(unsigned int i = 0; i < instruction.size(); i++)
1478                 {
1479                         if(instruction[i]->opcode == OPCODE_LEAVE)
1480                         {
1481                                 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1482                                 {
1483                                         instruction[i]->opcode = OPCODE_NULL;
1484                                 }
1485                         }
1486                 }
1487         }
1488
1489         void Shader::optimizeCall()
1490         {
1491                 // Eliminate uncalled functions
1492                 std::set<int> calledFunctions;
1493                 bool rescan = true;
1494
1495                 while(rescan)
1496                 {
1497                         calledFunctions.clear();
1498                         rescan = false;
1499
1500                         for(unsigned int i = 0; i < instruction.size(); i++)
1501                         {
1502                                 if(instruction[i]->isCall())
1503                                 {
1504                                         calledFunctions.insert(instruction[i]->dst.label);
1505                                 }
1506                         }
1507
1508                         if(!calledFunctions.empty())
1509                         {
1510                                 for(unsigned int i = 0; i < instruction.size(); i++)
1511                                 {
1512                                         if(instruction[i]->opcode == OPCODE_LABEL)
1513                                         {
1514                                                 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1515                                                 {
1516                                                         for( ; i < instruction.size(); i++)
1517                                                         {
1518                                                                 Opcode oldOpcode = instruction[i]->opcode;
1519                                                                 instruction[i]->opcode = OPCODE_NULL;
1520
1521                                                                 if(oldOpcode == OPCODE_RET)
1522                                                                 {
1523                                                                         rescan = true;
1524                                                                         break;
1525                                                                 }
1526                                                         }
1527                                                 }
1528                                         }
1529                                 }
1530                         }
1531                 }
1532
1533                 // Optimize the entry call
1534                 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1535                 {
1536                         if(calledFunctions.size() == 1)
1537                         {
1538                                 instruction[0]->opcode = OPCODE_NULL;
1539                                 instruction[1]->opcode = OPCODE_NULL;
1540
1541                                 for(size_t i = 2; i < instruction.size(); i++)
1542                                 {
1543                                         if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1544                                         {
1545                                                 instruction[i]->opcode = OPCODE_NULL;
1546                                         }
1547                                 }
1548                         }
1549                 }
1550         }
1551
1552         void Shader::removeNull()
1553         {
1554                 size_t size = 0;
1555                 for(size_t i = 0; i < instruction.size(); i++)
1556                 {
1557                         if(instruction[i]->opcode != OPCODE_NULL)
1558                         {
1559                                 instruction[size] = instruction[i];
1560                                 size++;
1561                         }
1562                         else
1563                         {
1564                                 delete instruction[i];
1565                         }
1566                 }
1567
1568                 instruction.resize(size);
1569         }
1570
1571         void Shader::analyzeDirtyConstants()
1572         {
1573                 dirtyConstantsF = 0;
1574                 dirtyConstantsI = 0;
1575                 dirtyConstantsB = 0;
1576
1577                 for(unsigned int i = 0; i < instruction.size(); i++)
1578                 {
1579                         switch(instruction[i]->opcode)
1580                         {
1581                         case OPCODE_DEF:
1582                                 if(instruction[i]->dst.index + 1 > dirtyConstantsF)
1583                                 {
1584                                         dirtyConstantsF = instruction[i]->dst.index + 1;
1585                                 }
1586                                 break;
1587                         case OPCODE_DEFI:
1588                                 if(instruction[i]->dst.index + 1 > dirtyConstantsI)
1589                                 {
1590                                         dirtyConstantsI = instruction[i]->dst.index + 1;
1591                                 }
1592                                 break;
1593                         case OPCODE_DEFB:
1594                                 if(instruction[i]->dst.index + 1 > dirtyConstantsB)
1595                                 {
1596                                         dirtyConstantsB = instruction[i]->dst.index + 1;
1597                                 }
1598                                 break;
1599                         default:
1600                                 break;
1601                         }
1602                 }
1603         }
1604
1605         void Shader::analyzeDynamicBranching()
1606         {
1607                 dynamicBranching = false;
1608                 containsLeave = false;
1609                 containsBreak = false;
1610                 containsContinue = false;
1611                 containsDefine = false;
1612
1613                 // Determine global presence of branching instructions
1614                 for(unsigned int i = 0; i < instruction.size(); i++)
1615                 {
1616                         switch(instruction[i]->opcode)
1617                         {
1618                         case OPCODE_CALLNZ:
1619                         case OPCODE_IF:
1620                         case OPCODE_IFC:
1621                         case OPCODE_BREAK:
1622                         case OPCODE_BREAKC:
1623                         case OPCODE_CMP:
1624                         case OPCODE_BREAKP:
1625                         case OPCODE_LEAVE:
1626                         case OPCODE_CONTINUE:
1627                                 if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL)
1628                                 {
1629                                         dynamicBranching = true;
1630                                 }
1631
1632                                 if(instruction[i]->opcode == OPCODE_LEAVE)
1633                                 {
1634                                         containsLeave = true;
1635                                 }
1636
1637                                 if(instruction[i]->isBreak())
1638                                 {
1639                                         containsBreak = true;
1640                                 }
1641
1642                                 if(instruction[i]->opcode == OPCODE_CONTINUE)
1643                                 {
1644                                         containsContinue = true;
1645                                 }
1646                         case OPCODE_DEF:
1647                         case OPCODE_DEFB:
1648                         case OPCODE_DEFI:
1649                                 containsDefine = true;
1650                         default:
1651                                 break;
1652                         }
1653                 }
1654
1655                 // Conservatively determine which instructions are affected by dynamic branching
1656                 int branchDepth = 0;
1657                 int breakDepth = 0;
1658                 int continueDepth = 0;
1659                 bool leaveReturn = false;
1660                 unsigned int functionBegin = 0;
1661
1662                 for(unsigned int i = 0; i < instruction.size(); i++)
1663                 {
1664                         // If statements
1665                         if(instruction[i]->isBranch())
1666                         {
1667                                 branchDepth++;
1668                         }
1669                         else if(instruction[i]->opcode == OPCODE_ENDIF)
1670                         {
1671                                 branchDepth--;
1672                         }
1673
1674                         if(branchDepth > 0)
1675                         {
1676                                 instruction[i]->analysisBranch = true;
1677
1678                                 if(instruction[i]->isCall())
1679                                 {
1680                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1681                                 }
1682                         }
1683
1684                         // Break statemement
1685                         if(instruction[i]->isBreak())
1686                         {
1687                                 breakDepth++;
1688                         }
1689
1690                         if(breakDepth > 0)
1691                         {
1692                                 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1693                                 {
1694                                         breakDepth++;
1695                                 }
1696                                 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1697                                 {
1698                                         breakDepth--;
1699                                 }
1700
1701                                 instruction[i]->analysisBreak = true;
1702
1703                                 if(instruction[i]->isCall())
1704                                 {
1705                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1706                                 }
1707                         }
1708
1709                         // Continue statement
1710                         if(instruction[i]->opcode == OPCODE_CONTINUE)
1711                         {
1712                                 continueDepth++;
1713                         }
1714
1715                         if(continueDepth > 0)
1716                         {
1717                                 if(instruction[i]->isLoop() || instruction[i]->opcode == OPCODE_SWITCH)   // Nested loop or switch, don't make the end of it disable the break execution mask
1718                                 {
1719                                         continueDepth++;
1720                                 }
1721                                 else if(instruction[i]->isEndLoop() || instruction[i]->opcode == OPCODE_ENDSWITCH)
1722                                 {
1723                                         continueDepth--;
1724                                 }
1725
1726                                 instruction[i]->analysisContinue = true;
1727
1728                                 if(instruction[i]->isCall())
1729                                 {
1730                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1731                                 }
1732                         }
1733
1734                         // Return (leave) statement
1735                         if(instruction[i]->opcode == OPCODE_LEAVE)
1736                         {
1737                                 leaveReturn = true;
1738
1739                                 // Mark loop body instructions prior to the return statement
1740                                 for(unsigned int l = functionBegin; l < i; l++)
1741                                 {
1742                                         if(instruction[l]->isLoop())
1743                                         {
1744                                                 for(unsigned int r = l + 1; r < i; r++)
1745                                                 {
1746                                                         instruction[r]->analysisLeave = true;
1747                                                 }
1748
1749                                                 break;
1750                                         }
1751                                 }
1752                         }
1753                         else if(instruction[i]->opcode == OPCODE_RET)   // End of the function
1754                         {
1755                                 leaveReturn = false;
1756                         }
1757                         else if(instruction[i]->opcode == OPCODE_LABEL)
1758                         {
1759                                 functionBegin = i;
1760                         }
1761
1762                         if(leaveReturn)
1763                         {
1764                                 instruction[i]->analysisLeave = true;
1765
1766                                 if(instruction[i]->isCall())
1767                                 {
1768                                         markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1769                                 }
1770                         }
1771                 }
1772         }
1773
1774         void Shader::markFunctionAnalysis(unsigned int functionLabel, Analysis flag)
1775         {
1776                 bool marker = false;
1777                 for(unsigned int i = 0; i < instruction.size(); i++)
1778                 {
1779                         if(!marker)
1780                         {
1781                                 if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel)
1782                                 {
1783                                         marker = true;
1784                                 }
1785                         }
1786                         else
1787                         {
1788                                 if(instruction[i]->opcode == OPCODE_RET)
1789                                 {
1790                                         break;
1791                                 }
1792                                 else if(instruction[i]->isCall())
1793                                 {
1794                                         markFunctionAnalysis(instruction[i]->dst.label, flag);
1795                                 }
1796
1797                                 instruction[i]->analysis |= flag;
1798                         }
1799                 }
1800         }
1801
1802         void Shader::analyzeSamplers()
1803         {
1804                 for(unsigned int i = 0; i < instruction.size(); i++)
1805                 {
1806                         switch(instruction[i]->opcode)
1807                         {
1808                         case OPCODE_TEX:
1809                         case OPCODE_TEXBEM:
1810                         case OPCODE_TEXBEML:
1811                         case OPCODE_TEXREG2AR:
1812                         case OPCODE_TEXREG2GB:
1813                         case OPCODE_TEXM3X2TEX:
1814                         case OPCODE_TEXM3X3TEX:
1815                         case OPCODE_TEXM3X3SPEC:
1816                         case OPCODE_TEXM3X3VSPEC:
1817                         case OPCODE_TEXREG2RGB:
1818                         case OPCODE_TEXDP3TEX:
1819                         case OPCODE_TEXM3X2DEPTH:
1820                         case OPCODE_TEXLDD:
1821                         case OPCODE_TEXLDL:
1822                         case OPCODE_TEXOFFSET:
1823                         case OPCODE_TEXLDLOFFSET:
1824                         case OPCODE_TEXELFETCH:
1825                         case OPCODE_TEXELFETCHOFFSET:
1826                         case OPCODE_TEXGRAD:
1827                         case OPCODE_TEXGRADOFFSET:
1828                                 {
1829                                         Parameter &dst = instruction[i]->dst;
1830                                         Parameter &src1 = instruction[i]->src[1];
1831
1832                                         if(majorVersion >= 2)
1833                                         {
1834                                                 usedSamplers |= 1 << src1.index;
1835                                         }
1836                                         else
1837                                         {
1838                                                 usedSamplers |= 1 << dst.index;
1839                                         }
1840                                 }
1841                                 break;
1842                         default:
1843                                 break;
1844                         }
1845                 }
1846         }
1847
1848         // Assigns a unique index to each call instruction, on a per label basis.
1849         // This is used to know what basic block to return to.
1850         void Shader::analyzeCallSites()
1851         {
1852                 int callSiteIndex[2048] = {0};
1853
1854                 for(unsigned int i = 0; i < instruction.size(); i++)
1855                 {
1856                         if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ)
1857                         {
1858                                 int label = instruction[i]->dst.label;
1859
1860                                 instruction[i]->dst.callSite = callSiteIndex[label]++;
1861                         }
1862                 }
1863         }
1864
1865         void Shader::analyzeDynamicIndexing()
1866         {
1867                 dynamicallyIndexedTemporaries = false;
1868                 dynamicallyIndexedInput = false;
1869                 dynamicallyIndexedOutput = false;
1870
1871                 for(unsigned int i = 0; i < instruction.size(); i++)
1872                 {
1873                         if(instruction[i]->dst.rel.type == PARAMETER_ADDR ||
1874                            instruction[i]->dst.rel.type == PARAMETER_LOOP ||
1875                            instruction[i]->dst.rel.type == PARAMETER_TEMP ||
1876                            instruction[i]->dst.rel.type == PARAMETER_CONST)
1877                         {
1878                                 switch(instruction[i]->dst.type)
1879                                 {
1880                                 case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1881                                 case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1882                                 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1883                                 default: break;
1884                                 }
1885                         }
1886
1887                         for(int j = 0; j < 3; j++)
1888                         {
1889                                 if(instruction[i]->src[j].rel.type == PARAMETER_ADDR ||
1890                                    instruction[i]->src[j].rel.type == PARAMETER_LOOP ||
1891                                    instruction[i]->src[j].rel.type == PARAMETER_TEMP ||
1892                                    instruction[i]->src[j].rel.type == PARAMETER_CONST)
1893                                 {
1894                                         switch(instruction[i]->src[j].type)
1895                                         {
1896                                         case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
1897                                         case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
1898                                         case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
1899                                         default: break;
1900                                         }
1901                                 }
1902                         }
1903                 }
1904         }
1905 }