1 // SwiftShader Software Renderer
3 // Copyright(c) 2005-2013 TransGaming Inc.
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
14 #include "VertexShader.hpp"
15 #include "PixelShader.hpp"
26 volatile int Shader::serialCounter = 1;
28 Shader::Opcode Shader::OPCODE_DP(int i)
32 default: ASSERT(false);
33 case 1: return OPCODE_DP1;
34 case 2: return OPCODE_DP2;
35 case 3: return OPCODE_DP3;
36 case 4: return OPCODE_DP4;
40 Shader::Opcode Shader::OPCODE_LEN(int i)
44 default: ASSERT(false);
45 case 1: return OPCODE_ABS;
46 case 2: return OPCODE_LEN2;
47 case 3: return OPCODE_LEN3;
48 case 4: return OPCODE_LEN4;
52 Shader::Opcode Shader::OPCODE_DIST(int i)
56 default: ASSERT(false);
57 case 1: return OPCODE_DIST1;
58 case 2: return OPCODE_DIST2;
59 case 3: return OPCODE_DIST3;
60 case 4: return OPCODE_DIST4;
64 Shader::Opcode Shader::OPCODE_NRM(int i)
68 default: ASSERT(false);
69 case 1: return OPCODE_SGN;
70 case 2: return OPCODE_NRM2;
71 case 3: return OPCODE_NRM3;
72 case 4: return OPCODE_NRM4;
76 Shader::Opcode Shader::OPCODE_FORWARD(int i)
80 default: ASSERT(false);
81 case 1: return OPCODE_FORWARD1;
82 case 2: return OPCODE_FORWARD2;
83 case 3: return OPCODE_FORWARD3;
84 case 4: return OPCODE_FORWARD4;
88 Shader::Opcode Shader::OPCODE_REFLECT(int i)
92 default: ASSERT(false);
93 case 1: return OPCODE_REFLECT1;
94 case 2: return OPCODE_REFLECT2;
95 case 3: return OPCODE_REFLECT3;
96 case 4: return OPCODE_REFLECT4;
100 Shader::Opcode Shader::OPCODE_REFRACT(int i)
104 default: ASSERT(false);
105 case 1: return OPCODE_REFRACT1;
106 case 2: return OPCODE_REFRACT2;
107 case 3: return OPCODE_REFRACT3;
108 case 4: return OPCODE_REFRACT4;
112 Shader::Instruction::Instruction(Opcode opcode) : opcode(opcode), analysis(0)
114 control = CONTROL_RESERVED0;
117 predicateNot = false;
118 predicateSwizzle = 0xE4;
121 samplerType = SAMPLER_UNKNOWN;
122 usage = USAGE_POSITION;
126 Shader::Instruction::Instruction(const unsigned long *token, int size, unsigned char majorVersion) : analysis(0)
128 parseOperationToken(*token++, majorVersion);
130 samplerType = SAMPLER_UNKNOWN;
131 usage = USAGE_POSITION;
134 if(opcode == OPCODE_IF ||
135 opcode == OPCODE_IFC ||
136 opcode == OPCODE_LOOP ||
137 opcode == OPCODE_REP ||
138 opcode == OPCODE_BREAKC ||
139 opcode == OPCODE_BREAKP) // No destination operand
141 if(size > 0) parseSourceToken(0, token++, majorVersion);
142 if(size > 1) parseSourceToken(1, token++, majorVersion);
143 if(size > 2) parseSourceToken(2, token++, majorVersion);
144 if(size > 3) ASSERT(false);
146 else if(opcode == OPCODE_DCL)
148 parseDeclarationToken(*token++);
149 parseDestinationToken(token++, majorVersion);
155 parseDestinationToken(token, majorVersion);
157 if(dst.rel.type != PARAMETER_VOID && majorVersion >= 3)
171 predicateNot = (Modifier)((*token & 0x0F000000) >> 24) == MODIFIER_NOT;
172 predicateSwizzle = (unsigned char)((*token & 0x00FF0000) >> 16);
178 for(int i = 0; size > 0; i++)
180 parseSourceToken(i, token, majorVersion);
185 if(src[i].rel.type != PARAMETER_VOID && majorVersion >= 2)
194 Shader::Instruction::~Instruction()
198 std::string Shader::Instruction::string(ShaderType shaderType, unsigned short version) const
200 std::string instructionString;
202 if(opcode != OPCODE_DCL)
204 instructionString += coissue ? "+ " : "";
208 instructionString += predicateNot ? "(!p0" : "(p0";
209 instructionString += swizzleString(PARAMETER_PREDICATE, predicateSwizzle);
210 instructionString += ") ";
213 instructionString += operationString(version) + controlString() + dst.shiftString() + dst.modifierString();
215 if(dst.type != PARAMETER_VOID)
217 instructionString += " " + dst.string(shaderType, version) +
218 dst.relativeString() +
222 for(int i = 0; i < 4; i++)
224 if(src[i].type != PARAMETER_VOID)
226 instructionString += (dst.type != PARAMETER_VOID || i > 0) ? ", " : " ";
227 instructionString += src[i].preModifierString() +
228 src[i].string(shaderType, version) +
229 src[i].relativeString() +
230 src[i].postModifierString() +
231 src[i].swizzleString();
237 instructionString += "dcl";
239 if(dst.type == PARAMETER_SAMPLER)
243 case SAMPLER_UNKNOWN: instructionString += " "; break;
244 case SAMPLER_1D: instructionString += "_1d "; break;
245 case SAMPLER_2D: instructionString += "_2d "; break;
246 case SAMPLER_CUBE: instructionString += "_cube "; break;
247 case SAMPLER_VOLUME: instructionString += "_volume "; break;
252 instructionString += dst.string(shaderType, version);
254 else if(dst.type == PARAMETER_INPUT ||
255 dst.type == PARAMETER_OUTPUT ||
256 dst.type == PARAMETER_TEXTURE)
258 if(version >= 0x0300)
262 case USAGE_POSITION: instructionString += "_position"; break;
263 case USAGE_BLENDWEIGHT: instructionString += "_blendweight"; break;
264 case USAGE_BLENDINDICES: instructionString += "_blendindices"; break;
265 case USAGE_NORMAL: instructionString += "_normal"; break;
266 case USAGE_PSIZE: instructionString += "_psize"; break;
267 case USAGE_TEXCOORD: instructionString += "_texcoord"; break;
268 case USAGE_TANGENT: instructionString += "_tangent"; break;
269 case USAGE_BINORMAL: instructionString += "_binormal"; break;
270 case USAGE_TESSFACTOR: instructionString += "_tessfactor"; break;
271 case USAGE_POSITIONT: instructionString += "_positiont"; break;
272 case USAGE_COLOR: instructionString += "_color"; break;
273 case USAGE_FOG: instructionString += "_fog"; break;
274 case USAGE_DEPTH: instructionString += "_depth"; break;
275 case USAGE_SAMPLE: instructionString += "_sample"; break;
282 std::ostringstream buffer;
284 buffer << (int)usageIndex;
286 instructionString += buffer.str();
289 else ASSERT(dst.type != PARAMETER_OUTPUT);
291 instructionString += " ";
293 instructionString += dst.string(shaderType, version);
294 instructionString += dst.maskString();
296 else if(dst.type == PARAMETER_MISCTYPE) // vPos and vFace
298 instructionString += " ";
300 instructionString += dst.string(shaderType, version);
305 return instructionString;
308 std::string Shader::DestinationParameter::modifierString() const
310 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
315 std::string modifierString;
319 modifierString += "_int";
324 modifierString += "_sat";
329 modifierString += "_pp";
334 modifierString += "_centroid";
337 return modifierString;
340 std::string Shader::DestinationParameter::shiftString() const
342 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
350 case 1: return "_x2";
351 case 2: return "_x4";
352 case 3: return "_x8";
353 case -1: return "_d2";
354 case -2: return "_d4";
355 case -3: return "_d8";
358 // ASSERT(false); // FIXME
362 std::string Shader::DestinationParameter::maskString() const
364 if(type == PARAMETER_VOID || type == PARAMETER_LABEL)
372 case 0x1: return ".x";
373 case 0x2: return ".y";
374 case 0x3: return ".xy";
375 case 0x4: return ".z";
376 case 0x5: return ".xz";
377 case 0x6: return ".yz";
378 case 0x7: return ".xyz";
379 case 0x8: return ".w";
380 case 0x9: return ".xw";
381 case 0xA: return ".yw";
382 case 0xB: return ".xyw";
383 case 0xC: return ".zw";
384 case 0xD: return ".xzw";
385 case 0xE: return ".yzw";
394 std::string Shader::SourceParameter::preModifierString() const
396 if(type == PARAMETER_VOID)
403 case MODIFIER_NONE: return "";
404 case MODIFIER_NEGATE: return "-";
405 case MODIFIER_BIAS: return "";
406 case MODIFIER_BIAS_NEGATE: return "-";
407 case MODIFIER_SIGN: return "";
408 case MODIFIER_SIGN_NEGATE: return "-";
409 case MODIFIER_COMPLEMENT: return "1-";
410 case MODIFIER_X2: return "";
411 case MODIFIER_X2_NEGATE: return "-";
412 case MODIFIER_DZ: return "";
413 case MODIFIER_DW: return "";
414 case MODIFIER_ABS: return "";
415 case MODIFIER_ABS_NEGATE: return "-";
416 case MODIFIER_NOT: return "!";
424 std::string Shader::Parameter::relativeString() const
426 if(type == PARAMETER_CONST || type == PARAMETER_INPUT || type == PARAMETER_OUTPUT || type == PARAMETER_TEMP)
428 if(rel.type == PARAMETER_VOID)
432 else if(rel.type == PARAMETER_ADDR)
434 switch(rel.swizzle & 0x03)
436 case 0: return "[a0.x]";
437 case 1: return "[a0.y]";
438 case 2: return "[a0.z]";
439 case 3: return "[a0.w]";
442 else if(rel.type == PARAMETER_TEMP)
444 std::ostringstream buffer;
447 switch(rel.swizzle & 0x03)
449 case 0: return "[r" + buffer.str() + ".x]";
450 case 1: return "[r" + buffer.str() + ".y]";
451 case 2: return "[r" + buffer.str() + ".z]";
452 case 3: return "[r" + buffer.str() + ".w]";
455 else if(rel.type == PARAMETER_LOOP)
465 std::string Shader::SourceParameter::postModifierString() const
467 if(type == PARAMETER_VOID)
474 case MODIFIER_NONE: return "";
475 case MODIFIER_NEGATE: return "";
476 case MODIFIER_BIAS: return "_bias";
477 case MODIFIER_BIAS_NEGATE: return "_bias";
478 case MODIFIER_SIGN: return "_bx2";
479 case MODIFIER_SIGN_NEGATE: return "_bx2";
480 case MODIFIER_COMPLEMENT: return "";
481 case MODIFIER_X2: return "_x2";
482 case MODIFIER_X2_NEGATE: return "_x2";
483 case MODIFIER_DZ: return "_dz";
484 case MODIFIER_DW: return "_dw";
485 case MODIFIER_ABS: return "_abs";
486 case MODIFIER_ABS_NEGATE: return "_abs";
487 case MODIFIER_NOT: return "";
495 std::string Shader::SourceParameter::swizzleString() const
497 return Instruction::swizzleString(type, swizzle);
500 void Shader::Instruction::parseOperationToken(unsigned long token, unsigned char majorVersion)
502 if((token & 0xFFFF0000) == 0xFFFF0000 || (token & 0xFFFF0000) == 0xFFFE0000) // Version token
504 opcode = (Opcode)token;
506 control = CONTROL_RESERVED0;
512 opcode = (Opcode)(token & 0x0000FFFF);
513 control = (Control)((token & 0x00FF0000) >> 16);
515 int size = (token & 0x0F000000) >> 24;
517 predicate = (token & 0x10000000) != 0x00000000;
518 coissue = (token & 0x40000000) != 0x00000000;
524 ASSERT(false); // Reserved
536 if((token & 0x20000000) != 0x00000000)
538 ASSERT(false); // Reserved
541 if(majorVersion >= 2)
545 ASSERT(false); // Reserved
549 if((token & 0x80000000) != 0x00000000)
556 void Shader::Instruction::parseDeclarationToken(unsigned long token)
558 samplerType = (SamplerType)((token & 0x78000000) >> 27);
559 usage = (Usage)(token & 0x0000001F);
560 usageIndex = (unsigned char)((token & 0x000F0000) >> 16);
563 void Shader::Instruction::parseDestinationToken(const unsigned long *token, unsigned char majorVersion)
565 dst.index = (unsigned short)(token[0] & 0x000007FF);
566 dst.type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
568 // TODO: Check type and index range
570 bool relative = (token[0] & 0x00002000) != 0x00000000;
571 dst.rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
572 dst.rel.swizzle = 0x00;
575 if(relative && majorVersion >= 3)
577 dst.rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
578 dst.rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
580 else if(relative) ASSERT(false); // Reserved
582 if((token[0] & 0x0000C000) != 0x00000000)
584 ASSERT(false); // Reserved
587 dst.mask = (unsigned char)((token[0] & 0x000F0000) >> 16);
588 dst.saturate = (token[0] & 0x00100000) != 0;
589 dst.partialPrecision = (token[0] & 0x00200000) != 0;
590 dst.centroid = (token[0] & 0x00400000) != 0;
591 dst.shift = (signed char)((token[0] & 0x0F000000) >> 20) >> 4;
593 if(majorVersion >= 2)
597 ASSERT(false); // Reserved
601 if((token[0] & 0x80000000) != 0x80000000)
607 void Shader::Instruction::parseSourceToken(int i, const unsigned long *token, unsigned char majorVersion)
611 src[i].type = PARAMETER_VOID;
612 src[i].modifier = MODIFIER_NONE;
613 src[i].swizzle = 0xE4;
614 src[i].rel.type = PARAMETER_VOID;
615 src[i].rel.swizzle = 0x00;
616 src[i].rel.scale = 1;
621 src[0].type = PARAMETER_FLOAT4LITERAL;
622 src[0].value[i] = *(float*)token;
625 src[0].type = PARAMETER_BOOL1LITERAL;
626 src[0].boolean[0] = *(int*)token;
629 src[0].type = PARAMETER_INT4LITERAL;
630 src[0].integer[i] = *(int*)token;
633 src[i].index = (unsigned short)(token[0] & 0x000007FF);
634 src[i].type = (ParameterType)(((token[0] & 0x00001800) >> 8) | ((token[0] & 0x70000000) >> 28));
636 // FIXME: Check type and index range
638 bool relative = (token[0] & 0x00002000) != 0x00000000;
639 src[i].rel.type = relative ? PARAMETER_ADDR : PARAMETER_VOID;
641 if((token[0] & 0x0000C000) != 0x00000000)
643 if(opcode != OPCODE_DEF &&
644 opcode != OPCODE_DEFI &&
645 opcode != OPCODE_DEFB)
651 src[i].swizzle = (unsigned char)((token[0] & 0x00FF0000) >> 16);
652 src[i].modifier = (Modifier)((token[0] & 0x0F000000) >> 24);
654 if((token[0] & 0x80000000) != 0x80000000)
656 if(opcode != OPCODE_DEF &&
657 opcode != OPCODE_DEFI &&
658 opcode != OPCODE_DEFB)
664 if(relative && majorVersion >= 2)
666 src[i].rel.type = (ParameterType)(((token[1] & 0x00001800) >> 8) | ((token[1] & 0x70000000) >> 28));
667 src[i].rel.swizzle = (unsigned char)((token[1] & 0x00FF0000) >> 16);
672 std::string Shader::Instruction::swizzleString(ParameterType type, unsigned char swizzle)
674 if(type == PARAMETER_VOID || type == PARAMETER_LABEL || swizzle == 0xE4)
679 int x = (swizzle & 0x03) >> 0;
680 int y = (swizzle & 0x0C) >> 2;
681 int z = (swizzle & 0x30) >> 4;
682 int w = (swizzle & 0xC0) >> 6;
684 std::string swizzleString = ".";
688 case 0: swizzleString += "x"; break;
689 case 1: swizzleString += "y"; break;
690 case 2: swizzleString += "z"; break;
691 case 3: swizzleString += "w"; break;
694 if(!(x == y && y == z && z == w))
698 case 0: swizzleString += "x"; break;
699 case 1: swizzleString += "y"; break;
700 case 2: swizzleString += "z"; break;
701 case 3: swizzleString += "w"; break;
704 if(!(y == z && z == w))
708 case 0: swizzleString += "x"; break;
709 case 1: swizzleString += "y"; break;
710 case 2: swizzleString += "z"; break;
711 case 3: swizzleString += "w"; break;
718 case 0: swizzleString += "x"; break;
719 case 1: swizzleString += "y"; break;
720 case 2: swizzleString += "z"; break;
721 case 3: swizzleString += "w"; break;
727 return swizzleString;
730 std::string Shader::Instruction::operationString(unsigned short version) const
734 case OPCODE_NULL: return "null";
735 case OPCODE_NOP: return "nop";
736 case OPCODE_MOV: return "mov";
737 case OPCODE_ADD: return "add";
738 case OPCODE_IADD: return "iadd";
739 case OPCODE_SUB: return "sub";
740 case OPCODE_ISUB: return "isub";
741 case OPCODE_MAD: return "mad";
742 case OPCODE_IMAD: return "imad";
743 case OPCODE_MUL: return "mul";
744 case OPCODE_IMUL: return "imul";
745 case OPCODE_RCPX: return "rcpx";
746 case OPCODE_DIV: return "div";
747 case OPCODE_IDIV: return "idiv";
748 case OPCODE_UDIV: return "udiv";
749 case OPCODE_MOD: return "mod";
750 case OPCODE_IMOD: return "imod";
751 case OPCODE_UMOD: return "umod";
752 case OPCODE_SHL: return "shl";
753 case OPCODE_ISHR: return "ishr";
754 case OPCODE_USHR: return "ushr";
755 case OPCODE_RSQX: return "rsqx";
756 case OPCODE_SQRT: return "sqrt";
757 case OPCODE_RSQ: return "rsq";
758 case OPCODE_LEN2: return "len2";
759 case OPCODE_LEN3: return "len3";
760 case OPCODE_LEN4: return "len4";
761 case OPCODE_DIST1: return "dist1";
762 case OPCODE_DIST2: return "dist2";
763 case OPCODE_DIST3: return "dist3";
764 case OPCODE_DIST4: return "dist4";
765 case OPCODE_DP3: return "dp3";
766 case OPCODE_DP4: return "dp4";
767 case OPCODE_DET2: return "det2";
768 case OPCODE_DET3: return "det3";
769 case OPCODE_DET4: return "det4";
770 case OPCODE_MIN: return "min";
771 case OPCODE_IMIN: return "imin";
772 case OPCODE_UMIN: return "umin";
773 case OPCODE_MAX: return "max";
774 case OPCODE_IMAX: return "imax";
775 case OPCODE_UMAX: return "umax";
776 case OPCODE_SLT: return "slt";
777 case OPCODE_SGE: return "sge";
778 case OPCODE_EXP2X: return "exp2x";
779 case OPCODE_LOG2X: return "log2x";
780 case OPCODE_LIT: return "lit";
781 case OPCODE_ATT: return "att";
782 case OPCODE_LRP: return "lrp";
783 case OPCODE_STEP: return "step";
784 case OPCODE_SMOOTH: return "smooth";
785 case OPCODE_FLOATBITSTOINT: return "floatBitsToInt";
786 case OPCODE_FLOATBITSTOUINT: return "floatBitsToUInt";
787 case OPCODE_INTBITSTOFLOAT: return "intBitsToFloat";
788 case OPCODE_UINTBITSTOFLOAT: return "uintBitsToFloat";
789 case OPCODE_PACKSNORM2x16: return "packSnorm2x16";
790 case OPCODE_PACKUNORM2x16: return "packUnorm2x16";
791 case OPCODE_PACKHALF2x16: return "packHalf2x16";
792 case OPCODE_UNPACKSNORM2x16: return "unpackSnorm2x16";
793 case OPCODE_UNPACKUNORM2x16: return "unpackUnorm2x16";
794 case OPCODE_UNPACKHALF2x16: return "unpackHalf2x16";
795 case OPCODE_FRC: return "frc";
796 case OPCODE_M4X4: return "m4x4";
797 case OPCODE_M4X3: return "m4x3";
798 case OPCODE_M3X4: return "m3x4";
799 case OPCODE_M3X3: return "m3x3";
800 case OPCODE_M3X2: return "m3x2";
801 case OPCODE_CALL: return "call";
802 case OPCODE_CALLNZ: return "callnz";
803 case OPCODE_LOOP: return "loop";
804 case OPCODE_RET: return "ret";
805 case OPCODE_ENDLOOP: return "endloop";
806 case OPCODE_LABEL: return "label";
807 case OPCODE_DCL: return "dcl";
808 case OPCODE_POWX: return "powx";
809 case OPCODE_CRS: return "crs";
810 case OPCODE_SGN: return "sgn";
811 case OPCODE_ABS: return "abs";
812 case OPCODE_NRM2: return "nrm2";
813 case OPCODE_NRM3: return "nrm3";
814 case OPCODE_NRM4: return "nrm4";
815 case OPCODE_SINCOS: return "sincos";
816 case OPCODE_REP: return "rep";
817 case OPCODE_ENDREP: return "endrep";
818 case OPCODE_IF: return "if";
819 case OPCODE_IFC: return "ifc";
820 case OPCODE_ELSE: return "else";
821 case OPCODE_ENDIF: return "endif";
822 case OPCODE_BREAK: return "break";
823 case OPCODE_BREAKC: return "breakc";
824 case OPCODE_MOVA: return "mova";
825 case OPCODE_DEFB: return "defb";
826 case OPCODE_DEFI: return "defi";
827 case OPCODE_TEXCOORD: return "texcoord";
828 case OPCODE_TEXKILL: return "texkill";
829 case OPCODE_DISCARD: return "discard";
831 if(version < 0x0104) return "tex";
833 case OPCODE_TEXBEM: return "texbem";
834 case OPCODE_TEXBEML: return "texbeml";
835 case OPCODE_TEXREG2AR: return "texreg2ar";
836 case OPCODE_TEXREG2GB: return "texreg2gb";
837 case OPCODE_TEXM3X2PAD: return "texm3x2pad";
838 case OPCODE_TEXM3X2TEX: return "texm3x2tex";
839 case OPCODE_TEXM3X3PAD: return "texm3x3pad";
840 case OPCODE_TEXM3X3TEX: return "texm3x3tex";
841 case OPCODE_RESERVED0: return "reserved0";
842 case OPCODE_TEXM3X3SPEC: return "texm3x3spec";
843 case OPCODE_TEXM3X3VSPEC: return "texm3x3vspec";
844 case OPCODE_EXPP: return "expp";
845 case OPCODE_LOGP: return "logp";
846 case OPCODE_CND: return "cnd";
847 case OPCODE_DEF: return "def";
848 case OPCODE_TEXREG2RGB: return "texreg2rgb";
849 case OPCODE_TEXDP3TEX: return "texdp3tex";
850 case OPCODE_TEXM3X2DEPTH: return "texm3x2depth";
851 case OPCODE_TEXDP3: return "texdp3";
852 case OPCODE_TEXM3X3: return "texm3x3";
853 case OPCODE_TEXDEPTH: return "texdepth";
854 case OPCODE_CMP0: return "cmp0";
855 case OPCODE_ICMP: return "icmp";
856 case OPCODE_UCMP: return "ucmp";
857 case OPCODE_SELECT: return "select";
858 case OPCODE_EXTRACT: return "extract";
859 case OPCODE_INSERT: return "insert";
860 case OPCODE_BEM: return "bem";
861 case OPCODE_DP2ADD: return "dp2add";
862 case OPCODE_DFDX: return "dFdx";
863 case OPCODE_DFDY: return "dFdy";
864 case OPCODE_FWIDTH: return "fwidth";
865 case OPCODE_TEXLDD: return "texldd";
866 case OPCODE_CMP: return "cmp";
867 case OPCODE_TEXLDL: return "texldl";
868 case OPCODE_BREAKP: return "breakp";
869 case OPCODE_PHASE: return "phase";
870 case OPCODE_COMMENT: return "comment";
871 case OPCODE_END: return "end";
872 case OPCODE_PS_1_0: return "ps_1_0";
873 case OPCODE_PS_1_1: return "ps_1_1";
874 case OPCODE_PS_1_2: return "ps_1_2";
875 case OPCODE_PS_1_3: return "ps_1_3";
876 case OPCODE_PS_1_4: return "ps_1_4";
877 case OPCODE_PS_2_0: return "ps_2_0";
878 case OPCODE_PS_2_x: return "ps_2_x";
879 case OPCODE_PS_3_0: return "ps_3_0";
880 case OPCODE_VS_1_0: return "vs_1_0";
881 case OPCODE_VS_1_1: return "vs_1_1";
882 case OPCODE_VS_2_0: return "vs_2_0";
883 case OPCODE_VS_2_x: return "vs_2_x";
884 case OPCODE_VS_2_sw: return "vs_2_sw";
885 case OPCODE_VS_3_0: return "vs_3_0";
886 case OPCODE_VS_3_sw: return "vs_3_sw";
887 case OPCODE_WHILE: return "while";
888 case OPCODE_ENDWHILE: return "endwhile";
889 case OPCODE_COS: return "cos";
890 case OPCODE_SIN: return "sin";
891 case OPCODE_TAN: return "tan";
892 case OPCODE_ACOS: return "acos";
893 case OPCODE_ASIN: return "asin";
894 case OPCODE_ATAN: return "atan";
895 case OPCODE_ATAN2: return "atan2";
896 case OPCODE_COSH: return "cosh";
897 case OPCODE_SINH: return "sinh";
898 case OPCODE_TANH: return "tanh";
899 case OPCODE_ACOSH: return "acosh";
900 case OPCODE_ASINH: return "asinh";
901 case OPCODE_ATANH: return "atanh";
902 case OPCODE_DP1: return "dp1";
903 case OPCODE_DP2: return "dp2";
904 case OPCODE_TRUNC: return "trunc";
905 case OPCODE_FLOOR: return "floor";
906 case OPCODE_ROUND: return "round";
907 case OPCODE_ROUNDEVEN: return "roundEven";
908 case OPCODE_CEIL: return "ceil";
909 case OPCODE_EXP2: return "exp2";
910 case OPCODE_LOG2: return "log2";
911 case OPCODE_EXP: return "exp";
912 case OPCODE_LOG: return "log";
913 case OPCODE_POW: return "pow";
914 case OPCODE_F2B: return "f2b";
915 case OPCODE_B2F: return "b2f";
916 case OPCODE_F2I: return "f2i";
917 case OPCODE_I2F: return "i2f";
918 case OPCODE_F2U: return "f2u";
919 case OPCODE_U2F: return "u2f";
920 case OPCODE_B2I: return "b2i";
921 case OPCODE_I2B: return "i2b";
922 case OPCODE_B2U: return "b2u";
923 case OPCODE_U2B: return "u2b";
924 case OPCODE_ALL: return "all";
925 case OPCODE_ANY: return "any";
926 case OPCODE_NEG: return "neg";
927 case OPCODE_INEG: return "ineg";
928 case OPCODE_ISNAN: return "isnan";
929 case OPCODE_ISINF: return "isinf";
930 case OPCODE_NOT: return "not";
931 case OPCODE_OR: return "or";
932 case OPCODE_XOR: return "xor";
933 case OPCODE_AND: return "and";
934 case OPCODE_EQ: return "eq";
935 case OPCODE_NE: return "neq";
936 case OPCODE_FORWARD1: return "forward1";
937 case OPCODE_FORWARD2: return "forward2";
938 case OPCODE_FORWARD3: return "forward3";
939 case OPCODE_FORWARD4: return "forward4";
940 case OPCODE_REFLECT1: return "reflect1";
941 case OPCODE_REFLECT2: return "reflect2";
942 case OPCODE_REFLECT3: return "reflect3";
943 case OPCODE_REFLECT4: return "reflect4";
944 case OPCODE_REFRACT1: return "refract1";
945 case OPCODE_REFRACT2: return "refract2";
946 case OPCODE_REFRACT3: return "refract3";
947 case OPCODE_REFRACT4: return "refract4";
948 case OPCODE_LEAVE: return "leave";
949 case OPCODE_CONTINUE: return "continue";
950 case OPCODE_TEST: return "test";
958 std::string Shader::Instruction::controlString() const
960 if(opcode != OPCODE_LOOP && opcode != OPCODE_BREAKC && opcode != OPCODE_IFC && opcode != OPCODE_CMP)
962 if(project) return "p";
971 case 1: return "_gt";
972 case 2: return "_eq";
973 case 3: return "_ge";
974 case 4: return "_lt";
975 case 5: return "_ne";
976 case 6: return "_le";
979 // ASSERT(false); // FIXME
983 std::string Shader::Parameter::string(ShaderType shaderType, unsigned short version) const
985 std::ostringstream buffer;
987 if(type == PARAMETER_FLOAT4LITERAL)
989 buffer << '{' << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << '}';
993 else if(type != PARAMETER_RASTOUT && !(type == PARAMETER_ADDR && shaderType == SHADER_VERTEX) && type != PARAMETER_LOOP && type != PARAMETER_PREDICATE && type != PARAMETER_MISCTYPE)
997 return typeString(shaderType, version) + buffer.str();
1001 return typeString(shaderType, version);
1005 std::string Shader::Parameter::typeString(ShaderType shaderType, unsigned short version) const
1009 case PARAMETER_TEMP: return "r";
1010 case PARAMETER_INPUT: return "v";
1011 case PARAMETER_CONST: return "c";
1012 case PARAMETER_TEXTURE:
1013 // case PARAMETER_ADDR:
1014 if(shaderType == SHADER_PIXEL) return "t";
1016 case PARAMETER_RASTOUT:
1017 if(index == 0) return "oPos";
1018 else if(index == 1) return "oFog";
1019 else if(index == 2) return "oPts";
1021 case PARAMETER_ATTROUT: return "oD";
1022 case PARAMETER_TEXCRDOUT:
1023 // case PARAMETER_OUTPUT: return "";
1024 if(version < 0x0300) return "oT";
1026 case PARAMETER_CONSTINT: return "i";
1027 case PARAMETER_COLOROUT: return "oC";
1028 case PARAMETER_DEPTHOUT: return "oDepth";
1029 case PARAMETER_SAMPLER: return "s";
1030 // case PARAMETER_CONST2: return "";
1031 // case PARAMETER_CONST3: return "";
1032 // case PARAMETER_CONST4: return "";
1033 case PARAMETER_CONSTBOOL: return "b";
1034 case PARAMETER_LOOP: return "aL";
1035 // case PARAMETER_TEMPFLOAT16: return "";
1036 case PARAMETER_MISCTYPE:
1037 if(index == 0) return "vPos";
1038 else if(index == 1) return "vFace";
1040 case PARAMETER_LABEL: return "l";
1041 case PARAMETER_PREDICATE: return "p0";
1042 case PARAMETER_FLOAT4LITERAL: return "";
1043 case PARAMETER_BOOL1LITERAL: return "";
1044 case PARAMETER_INT4LITERAL: return "";
1045 // case PARAMETER_VOID: return "";
1053 bool Shader::Instruction::isBranch() const
1055 return opcode == OPCODE_IF || opcode == OPCODE_IFC;
1058 bool Shader::Instruction::isCall() const
1060 return opcode == OPCODE_CALL || opcode == OPCODE_CALLNZ;
1063 bool Shader::Instruction::isBreak() const
1065 return opcode == OPCODE_BREAK || opcode == OPCODE_BREAKC || opcode == OPCODE_BREAKP;
1068 bool Shader::Instruction::isLoop() const
1070 return opcode == OPCODE_LOOP || opcode == OPCODE_REP || opcode == OPCODE_WHILE;
1073 bool Shader::Instruction::isEndLoop() const
1075 return opcode == OPCODE_ENDLOOP || opcode == OPCODE_ENDREP || opcode == OPCODE_ENDWHILE;
1078 bool Shader::Instruction::isPredicated() const
1087 Shader::Shader() : serialID(serialCounter++)
1094 for(unsigned int i = 0; i < instruction.size(); i++)
1096 delete instruction[i];
1101 void Shader::parse(const unsigned long *token)
1103 minorVersion = (unsigned char)(token[0] & 0x000000FF);
1104 majorVersion = (unsigned char)((token[0] & 0x0000FF00) >> 8);
1105 shaderType = (ShaderType)((token[0] & 0xFFFF0000) >> 16);
1109 if(shaderType == SHADER_VERTEX)
1111 length = VertexShader::validate(token);
1113 else if(shaderType == SHADER_PIXEL)
1115 length = PixelShader::validate(token);
1119 ASSERT(length != 0);
1120 instruction.resize(length);
1122 for(int i = 0; i < length; i++)
1124 while((*token & 0x0000FFFF) == 0x0000FFFE) // Comment token
1126 int length = (*token & 0x7FFF0000) >> 16;
1128 token += length + 1;
1131 int tokenCount = size(*token);
1133 instruction[i] = new Instruction(token, tokenCount, majorVersion);
1135 token += 1 + tokenCount;
1139 int Shader::size(unsigned long opcode) const
1141 return size(opcode, version);
1144 int Shader::size(unsigned long opcode, unsigned short version)
1146 if(version > 0x0300)
1151 static const char size[] =
1270 if((opcode & 0x0000FFFF) == OPCODE_COMMENT)
1272 return (opcode & 0x7FFF0000) >> 16;
1275 if(opcode != OPCODE_PS_1_0 &&
1276 opcode != OPCODE_PS_1_1 &&
1277 opcode != OPCODE_PS_1_2 &&
1278 opcode != OPCODE_PS_1_3 &&
1279 opcode != OPCODE_PS_1_4 &&
1280 opcode != OPCODE_PS_2_0 &&
1281 opcode != OPCODE_PS_2_x &&
1282 opcode != OPCODE_PS_3_0 &&
1283 opcode != OPCODE_VS_1_0 &&
1284 opcode != OPCODE_VS_1_1 &&
1285 opcode != OPCODE_VS_2_0 &&
1286 opcode != OPCODE_VS_2_x &&
1287 opcode != OPCODE_VS_2_sw &&
1288 opcode != OPCODE_VS_3_0 &&
1289 opcode != OPCODE_VS_3_sw &&
1290 opcode != OPCODE_PHASE &&
1291 opcode != OPCODE_END)
1293 if(version >= 0x0200)
1295 length = (opcode & 0x0F000000) >> 24;
1299 length = size[opcode & 0x0000FFFF];
1308 if(version == 0x0104)
1310 switch(opcode & 0x0000FFFF)
1315 case OPCODE_TEXCOORD:
1326 bool Shader::maskContainsComponent(int mask, int component)
1328 return (mask & (1 << component)) != 0;
1331 bool Shader::swizzleContainsComponent(int swizzle, int component)
1333 if((swizzle & 0x03) >> 0 == component) return true;
1334 if((swizzle & 0x0C) >> 2 == component) return true;
1335 if((swizzle & 0x30) >> 4 == component) return true;
1336 if((swizzle & 0xC0) >> 6 == component) return true;
1341 bool Shader::swizzleContainsComponentMasked(int swizzle, int component, int mask)
1343 if(mask & 0x1) if((swizzle & 0x03) >> 0 == component) return true;
1344 if(mask & 0x2) if((swizzle & 0x0C) >> 2 == component) return true;
1345 if(mask & 0x4) if((swizzle & 0x30) >> 4 == component) return true;
1346 if(mask & 0x8) if((swizzle & 0xC0) >> 6 == component) return true;
1351 bool Shader::containsDynamicBranching() const
1353 return dynamicBranching;
1356 bool Shader::containsBreakInstruction() const
1358 return containsBreak;
1361 bool Shader::containsContinueInstruction() const
1363 return containsContinue;
1366 bool Shader::containsLeaveInstruction() const
1368 return containsLeave;
1371 bool Shader::containsDefineInstruction() const
1373 return containsDefine;
1376 bool Shader::usesSampler(int index) const
1378 return (usedSamplers & (1 << index)) != 0;
1381 int Shader::getSerialID() const
1386 size_t Shader::getLength() const
1388 return instruction.size();
1391 Shader::ShaderType Shader::getShaderType() const
1396 unsigned short Shader::getVersion() const
1401 void Shader::print(const char *fileName, ...) const
1403 char fullName[1024 + 1];
1406 va_start(vararg, fileName);
1407 vsnprintf(fullName, 1024, fileName, vararg);
1410 std::ofstream file(fullName, std::ofstream::out);
1412 for(unsigned int i = 0; i < instruction.size(); i++)
1414 file << instruction[i]->string(shaderType, version) << std::endl;
1418 void Shader::printInstruction(int index, const char *fileName) const
1420 std::ofstream file(fileName, std::ofstream::out | std::ofstream::app);
1422 file << instruction[index]->string(shaderType, version) << std::endl;
1425 void Shader::append(Instruction *instruction)
1427 this->instruction.push_back(instruction);
1430 void Shader::declareSampler(int i)
1432 usedSamplers |= 1 << i;
1435 const Shader::Instruction *Shader::getInstruction(unsigned int i) const
1437 ASSERT(i < instruction.size());
1439 return instruction[i];
1442 void Shader::optimize()
1449 void Shader::optimizeLeave()
1451 // A return (leave) right before the end of a function or the shader can be removed
1452 for(unsigned int i = 0; i < instruction.size(); i++)
1454 if(instruction[i]->opcode == OPCODE_LEAVE)
1456 if(i == instruction.size() - 1 || instruction[i + 1]->opcode == OPCODE_RET)
1458 instruction[i]->opcode = OPCODE_NULL;
1464 void Shader::optimizeCall()
1466 // Eliminate uncalled functions
1467 std::set<int> calledFunctions;
1472 calledFunctions.clear();
1475 for(unsigned int i = 0; i < instruction.size(); i++)
1477 if(instruction[i]->isCall())
1479 calledFunctions.insert(instruction[i]->dst.label);
1483 if(!calledFunctions.empty())
1485 for(unsigned int i = 0; i < instruction.size(); i++)
1487 if(instruction[i]->opcode == OPCODE_LABEL)
1489 if(calledFunctions.find(instruction[i]->dst.label) == calledFunctions.end())
1491 for( ; i < instruction.size(); i++)
1493 Opcode oldOpcode = instruction[i]->opcode;
1494 instruction[i]->opcode = OPCODE_NULL;
1496 if(oldOpcode == OPCODE_RET)
1508 // Optimize the entry call
1509 if(instruction.size() >= 2 && instruction[0]->opcode == OPCODE_CALL && instruction[1]->opcode == OPCODE_RET)
1511 if(calledFunctions.size() == 1)
1513 instruction[0]->opcode = OPCODE_NULL;
1514 instruction[1]->opcode = OPCODE_NULL;
1516 for(size_t i = 2; i < instruction.size(); i++)
1518 if(instruction[i]->opcode == OPCODE_LABEL || instruction[i]->opcode == OPCODE_RET)
1520 instruction[i]->opcode = OPCODE_NULL;
1527 void Shader::removeNull()
1530 for(size_t i = 0; i < instruction.size(); i++)
1532 if(instruction[i]->opcode != OPCODE_NULL)
1534 instruction[size] = instruction[i];
1539 delete instruction[i];
1543 instruction.resize(size);
1546 void Shader::analyzeDirtyConstants()
1548 dirtyConstantsF = 0;
1549 dirtyConstantsI = 0;
1550 dirtyConstantsB = 0;
1552 for(unsigned int i = 0; i < instruction.size(); i++)
1554 switch(instruction[i]->opcode)
1557 if(instruction[i]->dst.index + 1 > dirtyConstantsF)
1559 dirtyConstantsF = instruction[i]->dst.index + 1;
1563 if(instruction[i]->dst.index + 1 > dirtyConstantsI)
1565 dirtyConstantsI = instruction[i]->dst.index + 1;
1569 if(instruction[i]->dst.index + 1 > dirtyConstantsB)
1571 dirtyConstantsB = instruction[i]->dst.index + 1;
1578 void Shader::analyzeDynamicBranching()
1580 dynamicBranching = false;
1581 containsLeave = false;
1582 containsBreak = false;
1583 containsContinue = false;
1584 containsDefine = false;
1586 // Determine global presence of branching instructions
1587 for(unsigned int i = 0; i < instruction.size(); i++)
1589 switch(instruction[i]->opcode)
1599 case OPCODE_CONTINUE:
1600 if(instruction[i]->src[0].type != PARAMETER_CONSTBOOL)
1602 dynamicBranching = true;
1605 if(instruction[i]->opcode == OPCODE_LEAVE)
1607 containsLeave = true;
1610 if(instruction[i]->isBreak())
1612 containsBreak = true;
1615 if(instruction[i]->opcode == OPCODE_CONTINUE)
1617 containsContinue = true;
1622 containsDefine = true;
1626 // Conservatively determine which instructions are affected by dynamic branching
1627 int branchDepth = 0;
1629 int continueDepth = 0;
1630 bool leaveReturn = false;
1632 for(unsigned int i = 0; i < instruction.size(); i++)
1635 if(instruction[i]->isBranch())
1639 else if(instruction[i]->opcode == OPCODE_ENDIF)
1646 instruction[i]->analysisBranch = true;
1648 if(instruction[i]->isCall())
1650 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1654 // Break statemement
1655 if(instruction[i]->isBreak())
1662 if(instruction[i]->isLoop()) // Nested loop, don't make the end of it disable the break execution mask
1666 else if(instruction[i]->isEndLoop())
1671 instruction[i]->analysisBreak = true;
1673 if(instruction[i]->isCall())
1675 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_BRANCH);
1679 // Continue statement
1680 if(instruction[i]->opcode == OPCODE_CONTINUE)
1685 if(continueDepth > 0)
1687 if(instruction[i]->isLoop()) // Nested loop, don't make the end of it disable the break execution mask
1691 else if(instruction[i]->isEndLoop())
1696 instruction[i]->analysisContinue = true;
1698 if(instruction[i]->isCall())
1700 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_CONTINUE);
1704 // Return (leave) statement
1705 if(instruction[i]->opcode == OPCODE_LEAVE)
1709 else if(instruction[i]->opcode == OPCODE_RET) // End of the function
1711 leaveReturn = false;
1716 instruction[i]->analysisLeave = true;
1718 if(instruction[i]->isCall())
1720 markFunctionAnalysis(instruction[i]->dst.label, ANALYSIS_LEAVE);
1726 void Shader::markFunctionAnalysis(int functionLabel, Analysis flag)
1728 bool marker = false;
1729 for(unsigned int i = 0; i < instruction.size(); i++)
1733 if(instruction[i]->opcode == OPCODE_LABEL && instruction[i]->dst.label == functionLabel)
1740 if(instruction[i]->opcode == OPCODE_RET)
1744 else if(instruction[i]->isCall())
1746 markFunctionAnalysis(instruction[i]->dst.label, flag);
1749 instruction[i]->analysis |= flag;
1754 void Shader::analyzeSamplers()
1756 for(unsigned int i = 0; i < instruction.size(); i++)
1758 switch(instruction[i]->opcode)
1762 case OPCODE_TEXBEML:
1763 case OPCODE_TEXREG2AR:
1764 case OPCODE_TEXREG2GB:
1765 case OPCODE_TEXM3X2TEX:
1766 case OPCODE_TEXM3X3TEX:
1767 case OPCODE_TEXM3X3SPEC:
1768 case OPCODE_TEXM3X3VSPEC:
1769 case OPCODE_TEXREG2RGB:
1770 case OPCODE_TEXDP3TEX:
1771 case OPCODE_TEXM3X2DEPTH:
1775 Parameter &dst = instruction[i]->dst;
1776 Parameter &src1 = instruction[i]->src[1];
1778 if(majorVersion >= 2)
1780 usedSamplers |= 1 << src1.index;
1784 usedSamplers |= 1 << dst.index;
1792 // Assigns a unique index to each call instruction, on a per label basis.
1793 // This is used to know what basic block to return to.
1794 void Shader::analyzeCallSites()
1796 int callSiteIndex[2048] = {0};
1798 for(unsigned int i = 0; i < instruction.size(); i++)
1800 if(instruction[i]->opcode == OPCODE_CALL || instruction[i]->opcode == OPCODE_CALLNZ)
1802 int label = instruction[i]->dst.label;
1804 instruction[i]->dst.callSite = callSiteIndex[label]++;
1809 void Shader::analyzeDynamicIndexing()
1811 dynamicallyIndexedTemporaries = false;
1812 dynamicallyIndexedInput = false;
1813 dynamicallyIndexedOutput = false;
1815 for(unsigned int i = 0; i < instruction.size(); i++)
1817 if(instruction[i]->dst.rel.type == PARAMETER_ADDR ||
1818 instruction[i]->dst.rel.type == PARAMETER_LOOP ||
1819 instruction[i]->dst.rel.type == PARAMETER_TEMP ||
1820 instruction[i]->dst.rel.type == PARAMETER_CONST)
1822 switch(instruction[i]->dst.type)
1824 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break;
1825 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break;
1826 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break;
1830 for(int j = 0; j < 3; j++)
1832 if(instruction[i]->src[j].rel.type == PARAMETER_ADDR ||
1833 instruction[i]->src[j].rel.type == PARAMETER_LOOP ||
1834 instruction[i]->src[j].rel.type == PARAMETER_TEMP ||
1835 instruction[i]->src[j].rel.type == PARAMETER_CONST)
1837 switch(instruction[i]->src[j].type)
1839 case PARAMETER_TEMP: dynamicallyIndexedTemporaries = true; break;
1840 case PARAMETER_INPUT: dynamicallyIndexedInput = true; break;
1841 case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true; break;