OSDN Git Service

Fix Clang errors on Linux.
[android-x86/external-swiftshader.git] / src / OpenGL / compiler / OutputASM.cpp
1 // SwiftShader Software Renderer\r
2 //\r
3 // Copyright(c) 2005-2013 TransGaming Inc.\r
4 //\r
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,\r
6 // transcribed, stored in a retrieval system, translated into any human or computer\r
7 // language by any means, or disclosed to third parties without the explicit written\r
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express\r
9 // or implied, including but not limited to any patent rights, are granted to you.\r
10 //\r
11 \r
12 #include "OutputASM.h"\r
13 #include "Common/Math.hpp"\r
14 \r
15 #include "common/debug.h"\r
16 #include "InfoSink.h"\r
17 \r
18 #include "libGLESv2/Shader.h"\r
19 \r
20 #include <GLES2/gl2.h>\r
21 #include <GLES2/gl2ext.h>\r
22 #include <GLES3/gl3.h>\r
23 \r
24 namespace glsl\r
25 {\r
26         // Integer to TString conversion\r
27         TString str(int i)\r
28         {\r
29                 char buffer[20];\r
30                 sprintf(buffer, "%d", i);\r
31                 return buffer;\r
32         }\r
33 \r
34         class Temporary : public TIntermSymbol\r
35         {\r
36         public:\r
37                 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)\r
38                 {\r
39                 }\r
40 \r
41                 ~Temporary()\r
42                 {\r
43                         assembler->freeTemporary(this);\r
44                 }\r
45 \r
46         private:\r
47                 OutputASM *const assembler;\r
48         };\r
49 \r
50         class Constant : public TIntermConstantUnion\r
51         {\r
52         public:\r
53                 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))\r
54                 {\r
55                         constants[0].setFConst(x);\r
56                         constants[1].setFConst(y);\r
57                         constants[2].setFConst(z);\r
58                         constants[3].setFConst(w);\r
59                 }\r
60 \r
61                 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))\r
62                 {\r
63                         constants[0].setBConst(b);\r
64                 }\r
65 \r
66                 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))\r
67                 {\r
68                         constants[0].setIConst(i);\r
69                 }\r
70 \r
71                 ~Constant()\r
72                 {\r
73                 }\r
74 \r
75         private:\r
76                 ConstantUnion constants[4];\r
77         };\r
78 \r
79         Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :\r
80                 type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo)\r
81         {\r
82         }\r
83 \r
84         UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,\r
85                                    TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :\r
86                 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),\r
87                 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)\r
88         {\r
89         }\r
90 \r
91         BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor)\r
92                 : mCurrentOffset(0), isRowMajor(rowMajor)\r
93         {\r
94         }\r
95 \r
96         BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)\r
97         {\r
98                 int arrayStride;\r
99                 int matrixStride;\r
100 \r
101                 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);\r
102 \r
103                 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),\r
104                                                  static_cast<int>(arrayStride * BytesPerComponent),\r
105                                                  static_cast<int>(matrixStride * BytesPerComponent),\r
106                                                  isRowMajor);\r
107 \r
108                 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);\r
109 \r
110                 return memberInfo;\r
111         }\r
112 \r
113         // static\r
114         size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)\r
115         {\r
116                 return (info.offset / BytesPerComponent) / ComponentsPerRegister;\r
117         }\r
118 \r
119         // static\r
120         size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)\r
121         {\r
122                 return (info.offset / BytesPerComponent) % ComponentsPerRegister;\r
123         }\r
124 \r
125         void BlockLayoutEncoder::nextRegister()\r
126         {\r
127                 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);\r
128         }\r
129 \r
130         Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor)\r
131         {\r
132         }\r
133 \r
134         void Std140BlockEncoder::enterAggregateType()\r
135         {\r
136                 nextRegister();\r
137         }\r
138 \r
139         void Std140BlockEncoder::exitAggregateType()\r
140         {\r
141                 nextRegister();\r
142         }\r
143 \r
144         void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)\r
145         {\r
146                 size_t baseAlignment = 0;\r
147                 int matrixStride = 0;\r
148                 int arrayStride = 0;\r
149 \r
150                 if(type.isMatrix())\r
151                 {\r
152                         baseAlignment = ComponentsPerRegister;\r
153                         matrixStride = ComponentsPerRegister;\r
154 \r
155                         if(arraySize > 0)\r
156                         {\r
157                                 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();\r
158                                 arrayStride = ComponentsPerRegister * numRegisters;\r
159                         }\r
160                 }\r
161                 else if(arraySize > 0)\r
162                 {\r
163                         baseAlignment = ComponentsPerRegister;\r
164                         arrayStride = ComponentsPerRegister;\r
165                 }\r
166                 else\r
167                 {\r
168                         const int numComponents = type.getElementSize();\r
169                         baseAlignment = (numComponents == 3 ? 4u : static_cast<size_t>(numComponents));\r
170                 }\r
171 \r
172                 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);\r
173 \r
174                 *matrixStrideOut = matrixStride;\r
175                 *arrayStrideOut = arrayStride;\r
176         }\r
177 \r
178         void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)\r
179         {\r
180                 if(arraySize > 0)\r
181                 {\r
182                         mCurrentOffset += arrayStride * arraySize;\r
183                 }\r
184                 else if(type.isMatrix())\r
185                 {\r
186                         ASSERT(matrixStride == ComponentsPerRegister);\r
187                         const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();\r
188                         mCurrentOffset += ComponentsPerRegister * numRegisters;\r
189                 }\r
190                 else\r
191                 {\r
192                         mCurrentOffset += type.getElementSize();\r
193                 }\r
194         }\r
195 \r
196         Attribute::Attribute()\r
197         {\r
198                 type = GL_NONE;\r
199                 arraySize = 0;\r
200                 registerIndex = 0;\r
201         }\r
202 \r
203         Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)\r
204         {\r
205                 this->type = type;\r
206                 this->name = name;\r
207                 this->arraySize = arraySize;\r
208                 this->location = location;\r
209                 this->registerIndex = registerIndex;\r
210         }\r
211 \r
212         sw::PixelShader *Shader::getPixelShader() const\r
213         {\r
214                 return 0;\r
215         }\r
216 \r
217         sw::VertexShader *Shader::getVertexShader() const\r
218         {\r
219                 return 0;\r
220         }\r
221 \r
222         OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)\r
223         {\r
224                 TString name = TFunction::unmangleName(nodeName);\r
225 \r
226                 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D")\r
227                 {\r
228                         method = IMPLICIT;\r
229                 }\r
230                 else if(name == "texture2DProj" || name == "textureProj")\r
231                 {\r
232                         method = IMPLICIT;\r
233                         proj = true;\r
234                 }\r
235                 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")\r
236                 {\r
237                         method = LOD;\r
238                 }\r
239                 else if(name == "texture2DProjLod" || name == "textureProjLod")\r
240                 {\r
241                         method = LOD;\r
242                         proj = true;\r
243                 }\r
244                 else if(name == "textureSize")\r
245                 {\r
246                         method = SIZE;\r
247                 }\r
248                 else if(name == "textureOffset")\r
249                 {\r
250                         method = IMPLICIT;\r
251                         offset = true;\r
252                 }\r
253                 else if(name == "textureProjOffset")\r
254                 {\r
255                         method = IMPLICIT;\r
256                         offset = true;\r
257                         proj = true;\r
258                 }\r
259                 else if(name == "textureLodOffset")\r
260                 {\r
261                         method = LOD;\r
262                         offset = true;\r
263                 }\r
264                 else if(name == "textureProjLodOffset")\r
265                 {\r
266                         method = LOD;\r
267                         proj = true;\r
268                         offset = true;\r
269                 }\r
270                 else if(name == "texelFetch")\r
271                 {\r
272                         method = FETCH;\r
273                 }\r
274                 else if(name == "texelFetchOffset")\r
275                 {\r
276                         method = FETCH;\r
277                         offset = true;\r
278                 }\r
279                 else if(name == "textureGrad")\r
280                 {\r
281                         method = GRAD;\r
282                 }\r
283                 else if(name == "textureGradOffset")\r
284                 {\r
285                         method = GRAD;\r
286                         offset = true;\r
287                 }\r
288                 else if(name == "textureProjGrad")\r
289                 {\r
290                         method = GRAD;\r
291                         proj = true;\r
292                 }\r
293                 else if(name == "textureProjGradOffset")\r
294                 {\r
295                         method = GRAD;\r
296                         proj = true;\r
297                         offset = true;\r
298                 }\r
299                 else UNREACHABLE(0);\r
300         }\r
301 \r
302         OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)\r
303         {\r
304                 shader = 0;\r
305                 pixelShader = 0;\r
306                 vertexShader = 0;\r
307 \r
308                 if(shaderObject)\r
309                 {\r
310                         shader = shaderObject->getShader();\r
311                         pixelShader = shaderObject->getPixelShader();\r
312                         vertexShader = shaderObject->getVertexShader();\r
313                 }\r
314 \r
315                 functionArray.push_back(Function(0, "main(", 0, 0));\r
316                 currentFunction = 0;\r
317                 outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData\r
318         }\r
319 \r
320         OutputASM::~OutputASM()\r
321         {\r
322         }\r
323 \r
324         void OutputASM::output()\r
325         {\r
326                 if(shader)\r
327                 {\r
328                         emitShader(GLOBAL);\r
329 \r
330                         if(functionArray.size() > 1)   // Only call main() when there are other functions\r
331                         {\r
332                                 Instruction *callMain = emit(sw::Shader::OPCODE_CALL);\r
333                                 callMain->dst.type = sw::Shader::PARAMETER_LABEL;\r
334                                 callMain->dst.index = 0;   // main()\r
335 \r
336                                 emit(sw::Shader::OPCODE_RET);\r
337                         }\r
338 \r
339                         emitShader(FUNCTION);\r
340                 }\r
341         }\r
342 \r
343         void OutputASM::emitShader(Scope scope)\r
344         {\r
345                 emitScope = scope;\r
346                 currentScope = GLOBAL;\r
347                 mContext.getTreeRoot()->traverse(this);\r
348         }\r
349 \r
350         void OutputASM::freeTemporary(Temporary *temporary)\r
351         {\r
352                 free(temporaries, temporary);\r
353         }\r
354 \r
355         sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const\r
356         {\r
357                 TBasicType baseType = in->getType().getBasicType();\r
358 \r
359                 switch(op)\r
360                 {\r
361                 case sw::Shader::OPCODE_NEG:\r
362                         switch(baseType)\r
363                         {\r
364                         case EbtInt:\r
365                         case EbtUInt:\r
366                                 return sw::Shader::OPCODE_INEG;\r
367                         case EbtFloat:\r
368                         default:\r
369                                 return op;\r
370                         }\r
371                 case sw::Shader::OPCODE_ADD:\r
372                         switch(baseType)\r
373                         {\r
374                         case EbtInt:\r
375                         case EbtUInt:\r
376                                 return sw::Shader::OPCODE_IADD;\r
377                         case EbtFloat:\r
378                         default:\r
379                                 return op;\r
380                         }\r
381                 case sw::Shader::OPCODE_SUB:\r
382                         switch(baseType)\r
383                         {\r
384                         case EbtInt:\r
385                         case EbtUInt:\r
386                                 return sw::Shader::OPCODE_ISUB;\r
387                         case EbtFloat:\r
388                         default:\r
389                                 return op;\r
390                         }\r
391                 case sw::Shader::OPCODE_MUL:\r
392                         switch(baseType)\r
393                         {\r
394                         case EbtInt:\r
395                         case EbtUInt:\r
396                                 return sw::Shader::OPCODE_IMUL;\r
397                         case EbtFloat:\r
398                         default:\r
399                                 return op;\r
400                         }\r
401                 case sw::Shader::OPCODE_DIV:\r
402                         switch(baseType)\r
403                         {\r
404                         case EbtInt:\r
405                                 return sw::Shader::OPCODE_IDIV;\r
406                         case EbtUInt:\r
407                                 return sw::Shader::OPCODE_UDIV;\r
408                         case EbtFloat:\r
409                         default:\r
410                                 return op;\r
411                         }\r
412                 case sw::Shader::OPCODE_IMOD:\r
413                         return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;\r
414                 case sw::Shader::OPCODE_ISHR:\r
415                         return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;\r
416                 case sw::Shader::OPCODE_MIN:\r
417                         switch(baseType)\r
418                         {\r
419                         case EbtInt:\r
420                                 return sw::Shader::OPCODE_IMIN;\r
421                         case EbtUInt:\r
422                                 return sw::Shader::OPCODE_UMIN;\r
423                         case EbtFloat:\r
424                         default:\r
425                                 return op;\r
426                         }\r
427                 case sw::Shader::OPCODE_MAX:\r
428                         switch(baseType)\r
429                         {\r
430                         case EbtInt:\r
431                                 return sw::Shader::OPCODE_IMAX;\r
432                         case EbtUInt:\r
433                                 return sw::Shader::OPCODE_UMAX;\r
434                         case EbtFloat:\r
435                         default:\r
436                                 return op;\r
437                         }\r
438                 default:\r
439                         return op;\r
440                 }\r
441         }\r
442 \r
443         void OutputASM::visitSymbol(TIntermSymbol *symbol)\r
444         {\r
445                 // Vertex varyings don't have to be actively used to successfully link\r
446                 // against pixel shaders that use them. So make sure they're declared.\r
447                 if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)\r
448                 {\r
449                         if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings\r
450                         {\r
451                                 declareVarying(symbol, -1);\r
452                         }\r
453                 }\r
454         }\r
455 \r
456         bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)\r
457         {\r
458                 if(currentScope != emitScope)\r
459                 {\r
460                         return false;\r
461                 }\r
462 \r
463                 TIntermTyped *result = node;\r
464                 TIntermTyped *left = node->getLeft();\r
465                 TIntermTyped *right = node->getRight();\r
466                 const TType &leftType = left->getType();\r
467                 const TType &rightType = right->getType();\r
468                 const TType &resultType = node->getType();\r
469 \r
470                 switch(node->getOp())\r
471                 {\r
472                 case EOpAssign:\r
473                         if(visit == PostVisit)\r
474                         {\r
475                                 assignLvalue(left, right);\r
476                                 copy(result, right);\r
477                         }\r
478                         break;\r
479                 case EOpInitialize:\r
480                         if(visit == PostVisit)\r
481                         {\r
482                                 copy(left, right);\r
483                         }\r
484                         break;\r
485                 case EOpMatrixTimesScalarAssign:\r
486                         if(visit == PostVisit)\r
487                         {\r
488                                 for(int i = 0; i < leftType.getNominalSize(); i++)\r
489                                 {\r
490                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
491                                         mul->dst.index += i;\r
492                                         argument(mul->src[0], left, i);\r
493                                 }\r
494 \r
495                                 assignLvalue(left, result);\r
496                         }\r
497                         break;\r
498                 case EOpVectorTimesMatrixAssign:\r
499                         if(visit == PostVisit)\r
500                         {\r
501                                 int size = leftType.getNominalSize();\r
502 \r
503                                 for(int i = 0; i < size; i++)\r
504                                 {\r
505                                         Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, left, right);\r
506                                         dot->dst.mask = 1 << i;\r
507                                         argument(dot->src[1], right, i);\r
508                                 }\r
509 \r
510                                 assignLvalue(left, result);\r
511                         }\r
512                         break;\r
513                 case EOpMatrixTimesMatrixAssign:\r
514                         if(visit == PostVisit)\r
515                         {\r
516                                 int dim = leftType.getNominalSize();\r
517 \r
518                                 for(int i = 0; i < dim; i++)\r
519                                 {\r
520                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
521                                         mul->dst.index += i;\r
522                                         argument(mul->src[1], right, i);\r
523                                         mul->src[1].swizzle = 0x00;\r
524 \r
525                                         for(int j = 1; j < dim; j++)\r
526                                         {\r
527                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, left, right, result);\r
528                                                 mad->dst.index += i;\r
529                                                 argument(mad->src[0], left, j);\r
530                                                 argument(mad->src[1], right, i);\r
531                                                 mad->src[1].swizzle = j * 0x55;\r
532                                                 argument(mad->src[2], result, i);\r
533                                         }\r
534                                 }\r
535 \r
536                                 assignLvalue(left, result);\r
537                         }\r
538                         break;\r
539                 case EOpIndexDirect:\r
540                         if(visit == PostVisit)\r
541                         {\r
542                                 int index = right->getAsConstantUnion()->getIConst(0);\r
543 \r
544                                 if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock())\r
545                                 {\r
546                                         ASSERT(left->isArray());\r
547                                         copy(result, left, index * left->elementRegisterCount());\r
548                                 }\r
549                                 else if(result->isRegister())\r
550                                 {\r
551                                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
552 \r
553                                         if(left->isRegister())\r
554                                         {\r
555                                                 mov->src[0].swizzle = index;\r
556                                         }\r
557                                         else if(left->isArray())\r
558                                         {\r
559                                                 argument(mov->src[0], left, index * left->elementRegisterCount());\r
560                                         }\r
561                                         else if(left->isMatrix())\r
562                                         {\r
563                                                 ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error\r
564                                                 argument(mov->src[0], left, index);\r
565                                         }\r
566                                         else UNREACHABLE(0);\r
567                                 }\r
568                                 else UNREACHABLE(0);\r
569                         }\r
570                         break;\r
571                 case EOpIndexIndirect:\r
572                         if(visit == PostVisit)\r
573                         {\r
574                                 if(left->isArray() || left->isMatrix())\r
575                                 {\r
576                                         for(int index = 0; index < result->totalRegisterCount(); index++)\r
577                                         {\r
578                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
579                                                 mov->dst.index += index;\r
580                                                 mov->dst.mask = writeMask(result, index);\r
581                                                 argument(mov->src[0], left, index);\r
582 \r
583                                                 if(left->totalRegisterCount() > 1)\r
584                                                 {\r
585                                                         sw::Shader::SourceParameter relativeRegister;\r
586                                                         argument(relativeRegister, right);\r
587 \r
588                                                         mov->src[0].rel.type = relativeRegister.type;\r
589                                                         mov->src[0].rel.index = relativeRegister.index;\r
590                                                         mov->src[0].rel.scale = result->totalRegisterCount();\r
591                                                         mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
592                                                 }\r
593                                         }\r
594                                 }\r
595                                 else if(left->isRegister())\r
596                                 {\r
597                                         emit(sw::Shader::OPCODE_EXTRACT, result, left, right);\r
598                                 }\r
599                                 else UNREACHABLE(0);\r
600                         }\r
601                         break;\r
602                 case EOpIndexDirectStruct:\r
603                 case EOpIndexDirectInterfaceBlock:\r
604                         if(visit == PostVisit)\r
605                         {\r
606                                 ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));\r
607 \r
608                                 const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?\r
609                                                            leftType.getStruct()->fields() :\r
610                                                            leftType.getInterfaceBlock()->fields();\r
611                                 int index = right->getAsConstantUnion()->getIConst(0);\r
612                                 int fieldOffset = 0;\r
613 \r
614                                 for(int i = 0; i < index; i++)\r
615                                 {\r
616                                         fieldOffset += fields[i]->type()->totalRegisterCount();\r
617                                 }\r
618 \r
619                                 copy(result, left, fieldOffset);\r
620                         }\r
621                         break;\r
622                 case EOpVectorSwizzle:\r
623                         if(visit == PostVisit)\r
624                         {\r
625                                 int swizzle = 0;\r
626                                 TIntermAggregate *components = right->getAsAggregate();\r
627 \r
628                                 if(components)\r
629                                 {\r
630                                         TIntermSequence &sequence = components->getSequence();\r
631                                         int component = 0;\r
632 \r
633                                         for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)\r
634                                         {\r
635                                                 TIntermConstantUnion *element = (*sit)->getAsConstantUnion();\r
636 \r
637                                                 if(element)\r
638                                                 {\r
639                                                         int i = element->getUnionArrayPointer()[0].getIConst();\r
640                                                         swizzle |= i << (component * 2);\r
641                                                         component++;\r
642                                                 }\r
643                                                 else UNREACHABLE(0);\r
644                                         }\r
645                                 }\r
646                                 else UNREACHABLE(0);\r
647 \r
648                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
649                                 mov->src[0].swizzle = swizzle;\r
650                         }\r
651                         break;\r
652                 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;\r
653                 case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;\r
654                 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;\r
655                 case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;\r
656                 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;\r
657                 case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;\r
658                 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;\r
659                 case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;\r
660                 case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;\r
661                 case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;\r
662                 case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;\r
663                 case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;\r
664                 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;\r
665                 case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;\r
666                 case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;\r
667                 case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;\r
668                 case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;\r
669                 case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;\r
670                 case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;\r
671                 case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;\r
672                 case EOpEqual:\r
673                         if(visit == PostVisit)\r
674                         {\r
675                                 emitBinary(sw::Shader::OPCODE_EQ, result, left, right);\r
676 \r
677                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
678                                 {\r
679                                         Temporary equal(this);\r
680                                         Instruction *eq = emit(sw::Shader::OPCODE_EQ, &equal, left, right);\r
681                                         argument(eq->src[0], left, index);\r
682                                         argument(eq->src[1], right, index);\r
683                                         emit(sw::Shader::OPCODE_AND, result, result, &equal);\r
684                                 }\r
685                         }\r
686                         break;\r
687                 case EOpNotEqual:\r
688                         if(visit == PostVisit)\r
689                         {\r
690                                 emitBinary(sw::Shader::OPCODE_NE, result, left, right);\r
691 \r
692                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
693                                 {\r
694                                         Temporary notEqual(this);\r
695                                         Instruction *eq = emit(sw::Shader::OPCODE_NE, &notEqual, left, right);\r
696                                         argument(eq->src[0], left, index);\r
697                                         argument(eq->src[1], right, index);\r
698                                         emit(sw::Shader::OPCODE_OR, result, result, &notEqual);\r
699                                 }\r
700                         }\r
701                         break;\r
702                 case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;\r
703                 case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;\r
704                 case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;\r
705                 case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;\r
706                 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;\r
707                 case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;\r
708                 case EOpMatrixTimesScalar:\r
709                         if(visit == PostVisit)\r
710                         {\r
711                                 for(int i = 0; i < leftType.getNominalSize(); i++)\r
712                                 {\r
713                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
714                                         mul->dst.index += i;\r
715                                         argument(mul->src[0], left, i);\r
716                                 }\r
717                         }\r
718                         break;\r
719                 case EOpVectorTimesMatrix:\r
720                         if(visit == PostVisit)\r
721                         {\r
722                                 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());\r
723 \r
724                                 int size = rightType.getNominalSize();\r
725                                 for(int i = 0; i < size; i++)\r
726                                 {\r
727                                         Instruction *dot = emit(dpOpcode, result, left, right);\r
728                                         dot->dst.mask = 1 << i;\r
729                                         argument(dot->src[1], right, i);\r
730                                 }\r
731                         }\r
732                         break;\r
733                 case EOpMatrixTimesVector:\r
734                         if(visit == PostVisit)\r
735                         {\r
736                                 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
737                                 mul->src[1].swizzle = 0x00;\r
738 \r
739                                 int size = rightType.getNominalSize();\r
740                                 for(int i = 1; i < size; i++)\r
741                                 {\r
742                                         Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, left, right, result);\r
743                                         argument(mad->src[0], left, i);\r
744                                         mad->src[1].swizzle = i * 0x55;\r
745                                 }\r
746                         }\r
747                         break;\r
748                 case EOpMatrixTimesMatrix:\r
749                         if(visit == PostVisit)\r
750                         {\r
751                                 int dim = leftType.getNominalSize();\r
752 \r
753                                 int size = rightType.getNominalSize();\r
754                                 for(int i = 0; i < size; i++)\r
755                                 {\r
756                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
757                                         mul->dst.index += i;\r
758                                         argument(mul->src[1], right, i);\r
759                                         mul->src[1].swizzle = 0x00;\r
760 \r
761                                         for(int j = 1; j < dim; j++)\r
762                                         {\r
763                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, left, right, result);\r
764                                                 mad->dst.index += i;\r
765                                                 argument(mad->src[0], left, j);\r
766                                                 argument(mad->src[1], right, i);\r
767                                                 mad->src[1].swizzle = j * 0x55;\r
768                                                 argument(mad->src[2], result, i);\r
769                                         }\r
770                                 }\r
771                         }\r
772                         break;\r
773                 case EOpLogicalOr:\r
774                         if(trivial(right, 6))\r
775                         {\r
776                                 if(visit == PostVisit)\r
777                                 {\r
778                                         emit(sw::Shader::OPCODE_OR, result, left, right);\r
779                                 }\r
780                         }\r
781                         else   // Short-circuit evaluation\r
782                         {\r
783                                 if(visit == InVisit)\r
784                                 {\r
785                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
786                                         Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);\r
787                                         ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;\r
788                                 }\r
789                                 else if(visit == PostVisit)\r
790                                 {\r
791                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
792                                         emit(sw::Shader::OPCODE_ENDIF);\r
793                                 }\r
794                         }\r
795                         break;\r
796                 case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;\r
797                 case EOpLogicalAnd:\r
798                         if(trivial(right, 6))\r
799                         {\r
800                                 if(visit == PostVisit)\r
801                                 {\r
802                                         emit(sw::Shader::OPCODE_AND, result, left, right);\r
803                                 }\r
804                         }\r
805                         else   // Short-circuit evaluation\r
806                         {\r
807                                 if(visit == InVisit)\r
808                                 {\r
809                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
810                                         emit(sw::Shader::OPCODE_IF, 0, result);\r
811                                 }\r
812                                 else if(visit == PostVisit)\r
813                                 {\r
814                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
815                                         emit(sw::Shader::OPCODE_ENDIF);\r
816                                 }\r
817                         }\r
818                         break;\r
819                 default: UNREACHABLE(node->getOp());\r
820                 }\r
821 \r
822                 return true;\r
823         }\r
824 \r
825         void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)\r
826         {\r
827                 switch(size)\r
828                 {\r
829                 case 1: // Used for cofactor computation only\r
830                         {\r
831                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
832                                 bool isMov = (row == col);\r
833                                 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;\r
834                                 Instruction *mov = emit(op, result, arg);\r
835                                 mov->src[0].index += isMov ? 1 - row : row;\r
836                                 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);\r
837                                 mov->dst.index += outCol;\r
838                                 mov->dst.mask = 1 << outRow;\r
839                         }\r
840                         break;\r
841                 case 2:\r
842                         {\r
843                                 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy\r
844 \r
845                                 bool isCofactor = (col >= 0) && (row >= 0);\r
846                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
847                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
848                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
849 \r
850                                 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, arg, arg);\r
851                                 det->src[0].index += negate ? col1 : col0;\r
852                                 det->src[1].index += negate ? col0 : col1;\r
853                                 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];\r
854                                 det->dst.index += outCol;\r
855                                 det->dst.mask = 1 << outRow;\r
856                         }\r
857                         break;\r
858                 case 3:\r
859                         {\r
860                                 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw\r
861 \r
862                                 bool isCofactor = (col >= 0) && (row >= 0);\r
863                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
864                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
865                                 int col2 = (isCofactor && (col <= 2)) ? 3 : 2;\r
866                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
867 \r
868                                 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, arg, arg, arg);\r
869                                 det->src[0].index += col0;\r
870                                 det->src[1].index += negate ? col2 : col1;\r
871                                 det->src[2].index += negate ? col1 : col2;\r
872                                 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];\r
873                                 det->dst.index += outCol;\r
874                                 det->dst.mask = 1 << outRow;\r
875                         }\r
876                         break;\r
877                 case 4:\r
878                         {\r
879                                 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, arg, arg, arg, arg);\r
880                                 det->src[1].index += 1;\r
881                                 det->src[2].index += 2;\r
882                                 det->src[3].index += 3;\r
883                                 det->dst.index += outCol;\r
884                                 det->dst.mask = 1 << outRow;\r
885                         }\r
886                         break;\r
887                 default:\r
888                         UNREACHABLE(size);\r
889                         break;\r
890                 }\r
891         }\r
892 \r
893         bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)\r
894         {\r
895                 if(currentScope != emitScope)\r
896                 {\r
897                         return false;\r
898                 }\r
899 \r
900                 TIntermTyped *result = node;\r
901                 TIntermTyped *arg = node->getOperand();\r
902                 TBasicType basicType = arg->getType().getBasicType();\r
903 \r
904                 union\r
905                 {\r
906                         float f;\r
907                         int i;\r
908                 } one_value;\r
909 \r
910                 if(basicType == EbtInt || basicType == EbtUInt)\r
911                 {\r
912                         one_value.i = 1;\r
913                 }\r
914                 else\r
915                 {\r
916                         one_value.f = 1.0f;\r
917                 }\r
918 \r
919                 Constant one(one_value.f, one_value.f, one_value.f, one_value.f);\r
920                 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);\r
921                 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);\r
922 \r
923                 switch(node->getOp())\r
924                 {\r
925                 case EOpNegative:\r
926                         if(visit == PostVisit)\r
927                         {\r
928                                 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);\r
929                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
930                                 {\r
931                                         Instruction *neg = emit(negOpcode, result, arg);\r
932                                         neg->dst.index += index;\r
933                                         argument(neg->src[0], arg, index);\r
934                                 }\r
935                         }\r
936                         break;\r
937                 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
938                 case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
939                 case EOpPostIncrement:\r
940                         if(visit == PostVisit)\r
941                         {\r
942                                 copy(result, arg);\r
943 \r
944                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
945                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
946                                 {\r
947                                         Instruction *add = emit(addOpcode, arg, arg, &one);\r
948                                         add->dst.index += index;\r
949                                         argument(add->src[0], arg, index);\r
950                                 }\r
951 \r
952                                 assignLvalue(arg, arg);\r
953                         }\r
954                         break;\r
955                 case EOpPostDecrement:\r
956                         if(visit == PostVisit)\r
957                         {\r
958                                 copy(result, arg);\r
959 \r
960                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
961                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
962                                 {\r
963                                         Instruction *sub = emit(subOpcode, arg, arg, &one);\r
964                                         sub->dst.index += index;\r
965                                         argument(sub->src[0], arg, index);\r
966                                 }\r
967 \r
968                                 assignLvalue(arg, arg);\r
969                         }\r
970                         break;\r
971                 case EOpPreIncrement:\r
972                         if(visit == PostVisit)\r
973                         {\r
974                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
975                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
976                                 {\r
977                                         Instruction *add = emit(addOpcode, result, arg, &one);\r
978                                         add->dst.index += index;\r
979                                         argument(add->src[0], arg, index);\r
980                                 }\r
981 \r
982                                 assignLvalue(arg, result);\r
983                         }\r
984                         break;\r
985                 case EOpPreDecrement:\r
986                         if(visit == PostVisit)\r
987                         {\r
988                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
989                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
990                                 {\r
991                                         Instruction *sub = emit(subOpcode, result, arg, &one);\r
992                                         sub->dst.index += index;\r
993                                         argument(sub->src[0], arg, index);\r
994                                 }\r
995 \r
996                                 assignLvalue(arg, result);\r
997                         }\r
998                         break;\r
999                 case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;\r
1000                 case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;\r
1001                 case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;\r
1002                 case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;\r
1003                 case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;\r
1004                 case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;\r
1005                 case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;\r
1006                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;\r
1007                 case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;\r
1008                 case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;\r
1009                 case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;\r
1010                 case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;\r
1011                 case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;\r
1012                 case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;\r
1013                 case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;\r
1014                 case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;\r
1015                 case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;\r
1016                 case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;\r
1017                 case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;\r
1018                 case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;\r
1019                 case EOpAbs:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ABS, result, arg); break;\r
1020                 case EOpSign:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SGN, result, arg); break;\r
1021                 case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;\r
1022                 case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;\r
1023                 case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;\r
1024                 case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;\r
1025                 case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;\r
1026                 case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;\r
1027                 case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;\r
1028                 case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;\r
1029                 case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;\r
1030                 case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;\r
1031                 case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;\r
1032                 case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;\r
1033                 case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;\r
1034                 case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;\r
1035                 case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;\r
1036                 case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;\r
1037                 case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;\r
1038                 case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;\r
1039                 case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;\r
1040                 case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;\r
1041                 case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;\r
1042                 case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;\r
1043                 case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;\r
1044                 case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;\r
1045                 case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;\r
1046                 case EOpTranspose:\r
1047                         if(visit == PostVisit)\r
1048                         {\r
1049                                 int numCols = arg->getNominalSize();\r
1050                                 int numRows = arg->getSecondarySize();\r
1051                                 for(int i = 0; i < numCols; ++i)\r
1052                                 {\r
1053                                         for(int j = 0; j < numRows; ++j)\r
1054                                         {\r
1055                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, arg);\r
1056                                                 mov->src[0].index += i;\r
1057                                                 mov->src[0].swizzle = 0x55 * j;\r
1058                                                 mov->dst.index += j;\r
1059                                                 mov->dst.mask = 1 << i;\r
1060                                         }\r
1061                                 }\r
1062                         }\r
1063                         break;\r
1064                 case EOpDeterminant:\r
1065                         if(visit == PostVisit)\r
1066                         {\r
1067                                 int size = arg->getNominalSize();\r
1068                                 ASSERT(size == arg->getSecondarySize());\r
1069 \r
1070                                 emitDeterminant(result, arg, size);\r
1071                         }\r
1072                         break;\r
1073                 case EOpInverse:\r
1074                         if(visit == PostVisit)\r
1075                         {\r
1076                                 int size = arg->getNominalSize();\r
1077                                 ASSERT(size == arg->getSecondarySize());\r
1078 \r
1079                                 // Compute transposed matrix of cofactors\r
1080                                 for(int i = 0; i < size; ++i)\r
1081                                 {\r
1082                                         for(int j = 0; j < size; ++j)\r
1083                                         {\r
1084                                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
1085                                                 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant\r
1086                                                 emitDeterminant(result, arg, size - 1, j, i, i, j);\r
1087                                         }\r
1088                                 }\r
1089 \r
1090                                 // Compute 1 / determinant\r
1091                                 Temporary invDet(this);\r
1092                                 emitDeterminant(&invDet, arg, size);\r
1093                                 Constant one(1.0f, 1.0f, 1.0f, 1.0f);\r
1094                                 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);\r
1095                                 div->src[1].swizzle = 0x00; // xxxx\r
1096 \r
1097                                 // Divide transposed matrix of cofactors by determinant\r
1098                                 for(int i = 0; i < size; ++i)\r
1099                                 {\r
1100                                         Instruction *div = emit(sw::Shader::OPCODE_MUL, result, result, &invDet);\r
1101                                         div->src[0].index += i;\r
1102                                         div->dst.index += i;\r
1103                                 }\r
1104                         }\r
1105                         break;\r
1106                 default: UNREACHABLE(node->getOp());\r
1107                 }\r
1108 \r
1109                 return true;\r
1110         }\r
1111 \r
1112         bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)\r
1113         {\r
1114                 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)\r
1115                 {\r
1116                         return false;\r
1117                 }\r
1118 \r
1119                 Constant zero(0.0f, 0.0f, 0.0f, 0.0f);\r
1120 \r
1121                 TIntermTyped *result = node;\r
1122                 const TType &resultType = node->getType();\r
1123                 TIntermSequence &arg = node->getSequence();\r
1124                 int argumentCount = arg.size();\r
1125 \r
1126                 switch(node->getOp())\r
1127                 {\r
1128                 case EOpSequence:           break;\r
1129                 case EOpDeclaration:        break;\r
1130                 case EOpPrototype:          break;\r
1131                 case EOpComma:\r
1132                         if(visit == PostVisit)\r
1133                         {\r
1134                                 copy(result, arg[1]);\r
1135                         }\r
1136                         break;\r
1137                 case EOpFunction:\r
1138                         if(visit == PreVisit)\r
1139                         {\r
1140                                 const TString &name = node->getName();\r
1141 \r
1142                                 if(emitScope == FUNCTION)\r
1143                                 {\r
1144                                         if(functionArray.size() > 1)   // No need for a label when there's only main()\r
1145                                         {\r
1146                                                 Instruction *label = emit(sw::Shader::OPCODE_LABEL);\r
1147                                                 label->dst.type = sw::Shader::PARAMETER_LABEL;\r
1148 \r
1149                                                 const Function *function = findFunction(name);\r
1150                                                 ASSERT(function);   // Should have been added during global pass\r
1151                                                 label->dst.index = function->label;\r
1152                                                 currentFunction = function->label;\r
1153                                         }\r
1154                                 }\r
1155                                 else if(emitScope == GLOBAL)\r
1156                                 {\r
1157                                         if(name != "main(")\r
1158                                         {\r
1159                                                 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();\r
1160                                                 functionArray.push_back(Function(functionArray.size(), name, &arguments, node));\r
1161                                         }\r
1162                                 }\r
1163                                 else UNREACHABLE(emitScope);\r
1164 \r
1165                                 currentScope = FUNCTION;\r
1166                         }\r
1167                         else if(visit == PostVisit)\r
1168                         {\r
1169                                 if(emitScope == FUNCTION)\r
1170                                 {\r
1171                                         if(functionArray.size() > 1)   // No need to return when there's only main()\r
1172                                         {\r
1173                                                 emit(sw::Shader::OPCODE_RET);\r
1174                                         }\r
1175                                 }\r
1176 \r
1177                                 currentScope = GLOBAL;\r
1178                         }\r
1179                         break;\r
1180                 case EOpFunctionCall:\r
1181                         if(visit == PostVisit)\r
1182                         {\r
1183                                 if(node->isUserDefined())\r
1184                                 {\r
1185                                         const TString &name = node->getName();\r
1186                                         const Function *function = findFunction(name);\r
1187 \r
1188                                         if(!function)\r
1189                                         {\r
1190                                                 mContext.error(node->getLine(), "function definition not found", name.c_str());\r
1191                                                 return false;\r
1192                                         }\r
1193 \r
1194                                         TIntermSequence &arguments = *function->arg;\r
1195 \r
1196                                         for(int i = 0; i < argumentCount; i++)\r
1197                                         {\r
1198                                                 TIntermTyped *in = arguments[i]->getAsTyped();\r
1199 \r
1200                                                 if(in->getQualifier() == EvqIn ||\r
1201                                                    in->getQualifier() == EvqInOut ||\r
1202                                                    in->getQualifier() == EvqConstReadOnly)\r
1203                                                 {\r
1204                                                         copy(in, arg[i]);\r
1205                                                 }\r
1206                                         }\r
1207 \r
1208                                         Instruction *call = emit(sw::Shader::OPCODE_CALL);\r
1209                                         call->dst.type = sw::Shader::PARAMETER_LABEL;\r
1210                                         call->dst.index = function->label;\r
1211 \r
1212                                         if(function->ret && function->ret->getType().getBasicType() != EbtVoid)\r
1213                                         {\r
1214                                                 copy(result, function->ret);\r
1215                                         }\r
1216 \r
1217                                         for(int i = 0; i < argumentCount; i++)\r
1218                                         {\r
1219                                                 TIntermTyped *argument = arguments[i]->getAsTyped();\r
1220                                                 TIntermTyped *out = arg[i]->getAsTyped();\r
1221 \r
1222                                                 if(argument->getQualifier() == EvqOut ||\r
1223                                                    argument->getQualifier() == EvqInOut)\r
1224                                                 {\r
1225                                                         copy(out, argument);\r
1226                                                 }\r
1227                                         }\r
1228                                 }\r
1229                                 else\r
1230                                 {\r
1231                                         const TextureFunction textureFunction(node->getName());\r
1232                                         switch(textureFunction.method)\r
1233                                         {\r
1234                                         case TextureFunction::IMPLICIT:\r
1235                                                 {\r
1236                                                         TIntermTyped *t = arg[1]->getAsTyped();\r
1237 \r
1238                                                         TIntermNode* offset = textureFunction.offset ? arg[2] : 0;\r
1239 \r
1240                                                         if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3))\r
1241                                                         {\r
1242                                                                 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,\r
1243                                                                                         result, arg[1], arg[0], offset);\r
1244                                                                 if(textureFunction.proj)\r
1245                                                                 {\r
1246                                                                         tex->project = true;\r
1247 \r
1248                                                                         switch(t->getNominalSize())\r
1249                                                                         {\r
1250                                                                         case 2: tex->src[0].swizzle = 0x54; break; // xyyy\r
1251                                                                         case 3: tex->src[0].swizzle = 0xA4; break; // xyzz\r
1252                                                                         case 4: break; // xyzw\r
1253                                                                         default:\r
1254                                                                                 UNREACHABLE(t->getNominalSize());\r
1255                                                                                 break;\r
1256                                                                         }\r
1257                                                                 }\r
1258                                                         }\r
1259                                                         else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))   // bias\r
1260                                                         {\r
1261                                                                 Temporary proj(this);\r
1262                                                                 if(textureFunction.proj)\r
1263                                                                 {\r
1264                                                                         Instruction *div = emit(sw::Shader::OPCODE_DIV, &proj, arg[1], arg[1]);\r
1265                                                                         div->dst.mask = 0x3;\r
1266 \r
1267                                                                         switch(t->getNominalSize())\r
1268                                                                         {\r
1269                                                                         case 2:\r
1270                                                                         case 3:\r
1271                                                                         case 4:\r
1272                                                                                 div->src[1].swizzle = 0x55 * (t->getNominalSize() - 1);\r
1273                                                                                 break;\r
1274                                                                         default:\r
1275                                                                                 UNREACHABLE(t->getNominalSize());\r
1276                                                                                 break;\r
1277                                                                         }\r
1278                                                                 }\r
1279                                                                 else\r
1280                                                                 {\r
1281                                                                         emit(sw::Shader::OPCODE_MOV, &proj, arg[1]);\r
1282                                                                 }\r
1283 \r
1284                                                                 Instruction *bias = emit(sw::Shader::OPCODE_MOV, &proj, arg[textureFunction.offset ? 3 : 2]);\r
1285                                                                 bias->dst.mask = 0x8;\r
1286 \r
1287                                                                 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,\r
1288                                                                                         result, &proj, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction\r
1289                                                                 tex->bias = true;\r
1290                                                         }\r
1291                                                         else UNREACHABLE(argumentCount);\r
1292                                                 }\r
1293                                                 break;\r
1294                                         case TextureFunction::LOD:\r
1295                                                 {\r
1296                                                         TIntermTyped *t = arg[1]->getAsTyped();\r
1297                                                         Temporary proj(this);\r
1298 \r
1299                                                         if(textureFunction.proj)\r
1300                                                         {\r
1301                                                                         Instruction *div = emit(sw::Shader::OPCODE_DIV, &proj, arg[1], arg[1]);\r
1302                                                                         div->dst.mask = 0x3;\r
1303 \r
1304                                                                 switch(t->getNominalSize())\r
1305                                                                 {\r
1306                                                                 case 2:\r
1307                                                                 case 3:\r
1308                                                                 case 4:\r
1309                                                                         div->src[1].swizzle = 0x55 * (t->getNominalSize() - 1);\r
1310                                                                         break;\r
1311                                                                 default:\r
1312                                                                         UNREACHABLE(t->getNominalSize());\r
1313                                                                         break;\r
1314                                                                 }\r
1315                                                         }\r
1316                                                         else\r
1317                                                         {\r
1318                                                                 emit(sw::Shader::OPCODE_MOV, &proj, arg[1]);\r
1319                                                         }\r
1320 \r
1321                                                         Instruction *lod = emit(sw::Shader::OPCODE_MOV, &proj, arg[2]);\r
1322                                                         lod->dst.mask = 0x8;\r
1323 \r
1324                                                         emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL,\r
1325                                                              result, &proj, arg[0], textureFunction.offset ? arg[3] : 0);\r
1326                                                 }\r
1327                                                 break;\r
1328                                         case TextureFunction::FETCH:\r
1329                                                 {\r
1330                                                         TIntermTyped *t = arg[1]->getAsTyped();\r
1331 \r
1332                                                         if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))\r
1333                                                         {\r
1334                                                                 TIntermNode* offset = textureFunction.offset ? arg[3] : 0;\r
1335 \r
1336                                                                 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH,\r
1337                                                                      result, arg[1], arg[0], arg[2], offset);\r
1338                                                         }\r
1339                                                         else UNREACHABLE(argumentCount);\r
1340                                                 }\r
1341                                                 break;\r
1342                                         case TextureFunction::GRAD:\r
1343                                                 {\r
1344                                                         TIntermTyped *t = arg[1]->getAsTyped();\r
1345 \r
1346                                                         if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5))\r
1347                                                         {\r
1348                                                                 Temporary uvwb(this);\r
1349 \r
1350                                                                 if(textureFunction.proj)\r
1351                                                                 {\r
1352                                                                         Instruction *div = emit(sw::Shader::OPCODE_DIV, &uvwb, arg[1], arg[1]);\r
1353                                                                         div->dst.mask = 0x3;\r
1354 \r
1355                                                                         switch(t->getNominalSize())\r
1356                                                                         {\r
1357                                                                         case 2:\r
1358                                                                         case 3:\r
1359                                                                         case 4:\r
1360                                                                                 div->src[1].swizzle = 0x55 * (t->getNominalSize() - 1);\r
1361                                                                                 break;\r
1362                                                                         default:\r
1363                                                                                 UNREACHABLE(t->getNominalSize());\r
1364                                                                                 break;\r
1365                                                                         }\r
1366                                                                 }\r
1367                                                                 else\r
1368                                                                 {\r
1369                                                                         emit(sw::Shader::OPCODE_MOV, &uvwb, arg[1]);\r
1370                                                                 }\r
1371 \r
1372                                                                 TIntermNode* offset = textureFunction.offset ? arg[4] : 0;\r
1373 \r
1374                                                                 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD,\r
1375                                                                      result, &uvwb, arg[0], arg[2], arg[3], offset);\r
1376                                                         }\r
1377                                                         else UNREACHABLE(argumentCount);\r
1378                                                 }\r
1379                                                 break;\r
1380                                         case TextureFunction::SIZE:\r
1381                                                 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]);\r
1382                                                 break;\r
1383                                         default:\r
1384                                                 UNREACHABLE(textureFunction.method);\r
1385                                         }\r
1386                                 }\r
1387                         }\r
1388                         break;\r
1389                 case EOpParameters:\r
1390                         break;\r
1391                 case EOpConstructFloat:\r
1392                 case EOpConstructVec2:\r
1393                 case EOpConstructVec3:\r
1394                 case EOpConstructVec4:\r
1395                 case EOpConstructBool:\r
1396                 case EOpConstructBVec2:\r
1397                 case EOpConstructBVec3:\r
1398                 case EOpConstructBVec4:\r
1399                 case EOpConstructInt:\r
1400                 case EOpConstructIVec2:\r
1401                 case EOpConstructIVec3:\r
1402                 case EOpConstructIVec4:\r
1403                 case EOpConstructUInt:\r
1404                 case EOpConstructUVec2:\r
1405                 case EOpConstructUVec3:\r
1406                 case EOpConstructUVec4:\r
1407                         if(visit == PostVisit)\r
1408                         {\r
1409                                 int component = 0;\r
1410 \r
1411                                 for(int i = 0; i < argumentCount; i++)\r
1412                                 {\r
1413                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1414                                         int size = argi->getNominalSize();\r
1415 \r
1416                                         if(!argi->isMatrix())\r
1417                                         {\r
1418                                                 Instruction *mov = emitCast(result, argi);\r
1419                                                 mov->dst.mask = (0xF << component) & 0xF;\r
1420                                                 mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1421 \r
1422                                                 component += size;\r
1423                                         }\r
1424                                         else   // Matrix\r
1425                                         {\r
1426                                                 int column = 0;\r
1427 \r
1428                                                 while(component < resultType.getNominalSize())\r
1429                                                 {\r
1430                                                         Instruction *mov = emitCast(result, argi);\r
1431                                                         mov->dst.mask = (0xF << component) & 0xF;\r
1432                                                         mov->src[0].index += column;\r
1433                                                         mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1434 \r
1435                                                         column++;\r
1436                                                         component += size;\r
1437                                                 }\r
1438                                         }\r
1439                                 }\r
1440                         }\r
1441                         break;\r
1442                 case EOpConstructMat2:\r
1443                 case EOpConstructMat2x3:\r
1444                 case EOpConstructMat2x4:\r
1445                 case EOpConstructMat3x2:\r
1446                 case EOpConstructMat3:\r
1447                 case EOpConstructMat3x4:\r
1448                 case EOpConstructMat4x2:\r
1449                 case EOpConstructMat4x3:\r
1450                 case EOpConstructMat4:\r
1451                         if(visit == PostVisit)\r
1452                         {\r
1453                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1454                                 const int outCols = result->getNominalSize();\r
1455                                 const int outRows = result->getSecondarySize();\r
1456 \r
1457                                 if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix\r
1458                                 {\r
1459                                         for(int i = 0; i < outCols; i++)\r
1460                                         {\r
1461                                                 Instruction *init = emit(sw::Shader::OPCODE_MOV, result, &zero);\r
1462                                                 init->dst.index += i;\r
1463                                                 Instruction *mov = emitCast(result, arg0);\r
1464                                                 mov->dst.index += i;\r
1465                                                 mov->dst.mask = 1 << i;\r
1466                                                 ASSERT(mov->src[0].swizzle == 0x00);\r
1467                                         }\r
1468                                 }\r
1469                                 else if(arg0->isMatrix())\r
1470                                 {\r
1471                                         const int inCols = arg0->getNominalSize();\r
1472                                         const int inRows = arg0->getSecondarySize();\r
1473 \r
1474                                         for(int i = 0; i < outCols; i++)\r
1475                                         {\r
1476                                                 if(i >= inCols || outRows > inRows)\r
1477                                                 {\r
1478                                                         // Initialize to identity matrix\r
1479                                                         Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));\r
1480                                                         Instruction *mov = emitCast(result, &col);\r
1481                                                         mov->dst.index += i;\r
1482                                                 }\r
1483 \r
1484                                                 if(i < inCols)\r
1485                                                 {\r
1486                                                         Instruction *mov = emitCast(result, arg0);\r
1487                                                         mov->dst.index += i;\r
1488                                                         mov->dst.mask = 0xF >> (4 - inRows);\r
1489                                                         argument(mov->src[0], arg0, i);\r
1490                                                 }\r
1491                                         }\r
1492                                 }\r
1493                                 else\r
1494                                 {\r
1495                                         int column = 0;\r
1496                                         int row = 0;\r
1497 \r
1498                                         for(int i = 0; i < argumentCount; i++)\r
1499                                         {\r
1500                                                 TIntermTyped *argi = arg[i]->getAsTyped();\r
1501                                                 int size = argi->getNominalSize();\r
1502                                                 int element = 0;\r
1503 \r
1504                                                 while(element < size)\r
1505                                                 {\r
1506                                                         Instruction *mov = emitCast(result, argi);\r
1507                                                         mov->dst.index += column;\r
1508                                                         mov->dst.mask = (0xF << row) & 0xF;\r
1509                                                         mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;\r
1510 \r
1511                                                         int end = row + size - element;\r
1512                                                         column = end >= outRows ? column + 1 : column;\r
1513                                                         element = element + outRows - row;\r
1514                                                         row = end >= outRows ? 0 : end;\r
1515                                                 }\r
1516                                         }\r
1517                                 }\r
1518                         }\r
1519                         break;\r
1520                 case EOpConstructStruct:\r
1521                         if(visit == PostVisit)\r
1522                         {\r
1523                                 int offset = 0;\r
1524                                 for(int i = 0; i < argumentCount; i++)\r
1525                                 {\r
1526                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1527                                         int size = argi->totalRegisterCount();\r
1528 \r
1529                                         for(int index = 0; index < size; index++)\r
1530                                         {\r
1531                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, argi);\r
1532                                                 mov->dst.index += index + offset;\r
1533                                                 mov->dst.mask = writeMask(result, offset + index);\r
1534                                                 argument(mov->src[0], argi, index);\r
1535                                         }\r
1536 \r
1537                                         offset += size;\r
1538                                 }\r
1539                         }\r
1540                         break;\r
1541                 case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;\r
1542                 case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;\r
1543                 case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;\r
1544                 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;\r
1545                 case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;\r
1546                 case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;\r
1547                 case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;\r
1548                 case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;\r
1549                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;\r
1550                 case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;\r
1551                 case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;\r
1552                 case EOpClamp:\r
1553                         if(visit == PostVisit)\r
1554                         {\r
1555                                 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);\r
1556                                 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);\r
1557                         }\r
1558                         break;\r
1559                 case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;\r
1560                 case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;\r
1561                 case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;\r
1562                 case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;\r
1563                 case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;\r
1564                 case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;\r
1565                 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1566                 case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;\r
1567                 case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1568                 case EOpMul:\r
1569                         if(visit == PostVisit)\r
1570                         {\r
1571                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1572                                 TIntermTyped *arg1 = arg[1]->getAsTyped();\r
1573                                 ASSERT((arg0->getNominalSize() == arg1->getNominalSize()) && (arg0->getSecondarySize() == arg1->getSecondarySize()));\r
1574 \r
1575                                 int size = arg0->getNominalSize();\r
1576                                 for(int i = 0; i < size; i++)\r
1577                                 {\r
1578                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, arg[0], arg[1]);\r
1579                                         mul->dst.index += i;\r
1580                                         argument(mul->src[0], arg[0], i);\r
1581                                         argument(mul->src[1], arg[1], i);\r
1582                                 }\r
1583                         }\r
1584                         break;\r
1585                 case EOpOuterProduct:\r
1586                         if(visit == PostVisit)\r
1587                         {\r
1588                                 for(int i = 0; i < dim(arg[1]); i++)\r
1589                                 {\r
1590                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, arg[0], arg[1]);\r
1591                                         mul->dst.index += i;\r
1592                                         mul->src[1].swizzle = 0x55 * i;\r
1593                                 }\r
1594                         }\r
1595                         break;\r
1596                 default: UNREACHABLE(node->getOp());\r
1597                 }\r
1598 \r
1599                 return true;\r
1600         }\r
1601 \r
1602         bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)\r
1603         {\r
1604                 if(currentScope != emitScope)\r
1605                 {\r
1606                         return false;\r
1607                 }\r
1608 \r
1609                 TIntermTyped *condition = node->getCondition();\r
1610                 TIntermNode *trueBlock = node->getTrueBlock();\r
1611                 TIntermNode *falseBlock = node->getFalseBlock();\r
1612                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
1613 \r
1614                 condition->traverse(this);\r
1615 \r
1616                 if(node->usesTernaryOperator())\r
1617                 {\r
1618                         if(constantCondition)\r
1619                         {\r
1620                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1621 \r
1622                                 if(trueCondition)\r
1623                                 {\r
1624                                         trueBlock->traverse(this);\r
1625                                         copy(node, trueBlock);\r
1626                                 }\r
1627                                 else\r
1628                                 {\r
1629                                         falseBlock->traverse(this);\r
1630                                         copy(node, falseBlock);\r
1631                                 }\r
1632                         }\r
1633                         else if(trivial(node, 6))   // Fast to compute both potential results and no side effects\r
1634                         {\r
1635                                 trueBlock->traverse(this);\r
1636                                 falseBlock->traverse(this);\r
1637                                 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);\r
1638                         }\r
1639                         else\r
1640                         {\r
1641                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1642 \r
1643                                 if(trueBlock)\r
1644                                 {\r
1645                                         trueBlock->traverse(this);\r
1646                                         copy(node, trueBlock);\r
1647                                 }\r
1648 \r
1649                                 if(falseBlock)\r
1650                                 {\r
1651                                         emit(sw::Shader::OPCODE_ELSE);\r
1652                                         falseBlock->traverse(this);\r
1653                                         copy(node, falseBlock);\r
1654                                 }\r
1655 \r
1656                                 emit(sw::Shader::OPCODE_ENDIF);\r
1657                         }\r
1658                 }\r
1659                 else  // if/else statement\r
1660                 {\r
1661                         if(constantCondition)\r
1662                         {\r
1663                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1664 \r
1665                                 if(trueCondition)\r
1666                                 {\r
1667                                         if(trueBlock)\r
1668                                         {\r
1669                                                 trueBlock->traverse(this);\r
1670                                         }\r
1671                                 }\r
1672                                 else\r
1673                                 {\r
1674                                         if(falseBlock)\r
1675                                         {\r
1676                                                 falseBlock->traverse(this);\r
1677                                         }\r
1678                                 }\r
1679                         }\r
1680                         else\r
1681                         {\r
1682                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1683 \r
1684                                 if(trueBlock)\r
1685                                 {\r
1686                                         trueBlock->traverse(this);\r
1687                                 }\r
1688 \r
1689                                 if(falseBlock)\r
1690                                 {\r
1691                                         emit(sw::Shader::OPCODE_ELSE);\r
1692                                         falseBlock->traverse(this);\r
1693                                 }\r
1694 \r
1695                                 emit(sw::Shader::OPCODE_ENDIF);\r
1696                         }\r
1697                 }\r
1698 \r
1699                 return false;\r
1700         }\r
1701 \r
1702         bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)\r
1703         {\r
1704                 if(currentScope != emitScope)\r
1705                 {\r
1706                         return false;\r
1707                 }\r
1708 \r
1709                 unsigned int iterations = loopCount(node);\r
1710 \r
1711                 if(iterations == 0)\r
1712                 {\r
1713                         return false;\r
1714                 }\r
1715 \r
1716                 bool unroll = (iterations <= 4);\r
1717 \r
1718                 if(unroll)\r
1719                 {\r
1720                         DetectLoopDiscontinuity detectLoopDiscontinuity;\r
1721                         unroll = !detectLoopDiscontinuity.traverse(node);\r
1722                 }\r
1723 \r
1724                 TIntermNode *init = node->getInit();\r
1725                 TIntermTyped *condition = node->getCondition();\r
1726                 TIntermTyped *expression = node->getExpression();\r
1727                 TIntermNode *body = node->getBody();\r
1728 \r
1729                 if(node->getType() == ELoopDoWhile)\r
1730                 {\r
1731                         Temporary iterate(this);\r
1732                         Constant True(true);\r
1733                         emit(sw::Shader::OPCODE_MOV, &iterate, &True);\r
1734 \r
1735                         emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while\r
1736 \r
1737                         if(body)\r
1738                         {\r
1739                                 body->traverse(this);\r
1740                         }\r
1741 \r
1742                         emit(sw::Shader::OPCODE_TEST);\r
1743 \r
1744                         condition->traverse(this);\r
1745                         emit(sw::Shader::OPCODE_MOV, &iterate, condition);\r
1746 \r
1747                         emit(sw::Shader::OPCODE_ENDWHILE);\r
1748                 }\r
1749                 else\r
1750                 {\r
1751                         if(init)\r
1752                         {\r
1753                                 init->traverse(this);\r
1754                         }\r
1755 \r
1756                         if(unroll)\r
1757                         {\r
1758                                 for(unsigned int i = 0; i < iterations; i++)\r
1759                                 {\r
1760                                 //      condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop\r
1761 \r
1762                                         if(body)\r
1763                                         {\r
1764                                                 body->traverse(this);\r
1765                                         }\r
1766 \r
1767                                         if(expression)\r
1768                                         {\r
1769                                                 expression->traverse(this);\r
1770                                         }\r
1771                                 }\r
1772                         }\r
1773                         else\r
1774                         {\r
1775                                 if(condition)\r
1776                                 {\r
1777                                         condition->traverse(this);\r
1778                                 }\r
1779 \r
1780                                 emit(sw::Shader::OPCODE_WHILE, 0, condition);\r
1781 \r
1782                                 if(body)\r
1783                                 {\r
1784                                         body->traverse(this);\r
1785                                 }\r
1786 \r
1787                                 emit(sw::Shader::OPCODE_TEST);\r
1788 \r
1789                                 if(expression)\r
1790                                 {\r
1791                                         expression->traverse(this);\r
1792                                 }\r
1793 \r
1794                                 if(condition)\r
1795                                 {\r
1796                                         condition->traverse(this);\r
1797                                 }\r
1798 \r
1799                                 emit(sw::Shader::OPCODE_ENDWHILE);\r
1800                         }\r
1801                 }\r
1802 \r
1803                 return false;\r
1804         }\r
1805 \r
1806         bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)\r
1807         {\r
1808                 if(currentScope != emitScope)\r
1809                 {\r
1810                         return false;\r
1811                 }\r
1812 \r
1813                 switch(node->getFlowOp())\r
1814                 {\r
1815                 case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;\r
1816                 case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;\r
1817                 case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;\r
1818                 case EOpReturn:\r
1819                         if(visit == PostVisit)\r
1820                         {\r
1821                                 TIntermTyped *value = node->getExpression();\r
1822 \r
1823                                 if(value)\r
1824                                 {\r
1825                                         copy(functionArray[currentFunction].ret, value);\r
1826                                 }\r
1827 \r
1828                                 emit(sw::Shader::OPCODE_LEAVE);\r
1829                         }\r
1830                         break;\r
1831                 default: UNREACHABLE(node->getFlowOp());\r
1832                 }\r
1833 \r
1834                 return true;\r
1835         }\r
1836 \r
1837         bool OutputASM::isSamplerRegister(TIntermTyped *operand)\r
1838         {\r
1839                 return operand && isSamplerRegister(operand->getType());\r
1840         }\r
1841 \r
1842         bool OutputASM::isSamplerRegister(const TType &type)\r
1843         {\r
1844                 // A sampler register's qualifiers can be:\r
1845                 // - EvqUniform: The sampler uniform is used as is in the code (default case).\r
1846                 // - EvqTemporary: The sampler is indexed. It's still a sampler register.\r
1847                 // - EvqIn (and other similar types): The sampler has been passed as a function argument. At this point,\r
1848                 //                                    the sampler has been copied and is no longer a sampler register.\r
1849                 return IsSampler(type.getBasicType()) && (type.getQualifier() == EvqUniform || type.getQualifier() == EvqTemporary);\r
1850         }\r
1851 \r
1852         Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4, int index)\r
1853         {\r
1854                 if(isSamplerRegister(dst))\r
1855                 {\r
1856                         op = sw::Shader::OPCODE_NULL;   // Can't assign to a sampler, but this is hit when indexing sampler arrays\r
1857                 }\r
1858 \r
1859                 Instruction *instruction = new Instruction(op);\r
1860 \r
1861                 if(dst)\r
1862                 {\r
1863                         instruction->dst.type = registerType(dst);\r
1864                         instruction->dst.index = registerIndex(dst) + index;\r
1865                         instruction->dst.mask = writeMask(dst);\r
1866                         instruction->dst.integer = (dst->getBasicType() == EbtInt);\r
1867                 }\r
1868 \r
1869                 argument(instruction->src[0], src0, index);\r
1870                 argument(instruction->src[1], src1, index);\r
1871                 argument(instruction->src[2], src2, index);\r
1872                 argument(instruction->src[3], src3, index);\r
1873                 argument(instruction->src[4], src4, index);\r
1874 \r
1875                 shader->append(instruction);\r
1876 \r
1877                 return instruction;\r
1878         }\r
1879 \r
1880         Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)\r
1881         {\r
1882                 switch(src->getBasicType())\r
1883                 {\r
1884                 case EbtBool:\r
1885                         switch(dst->getBasicType())\r
1886                         {\r
1887                         case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, src);\r
1888                         case EbtUInt:  return emit(sw::Shader::OPCODE_B2U, dst, src);\r
1889                         case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, src);\r
1890                         default:       break;\r
1891                         }\r
1892                         break;\r
1893                 case EbtInt:\r
1894                         switch(dst->getBasicType())\r
1895                         {\r
1896                         case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, src);\r
1897                         case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, src);\r
1898                         default:       break;\r
1899                         }\r
1900                         break;\r
1901                 case EbtUInt:\r
1902                         switch(dst->getBasicType())\r
1903                         {\r
1904                         case EbtBool:  return emit(sw::Shader::OPCODE_U2B, dst, src);\r
1905                         case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, src);\r
1906                         default:       break;\r
1907                         }\r
1908                         break;\r
1909                 case EbtFloat:\r
1910                         switch(dst->getBasicType())\r
1911                         {\r
1912                         case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, src);\r
1913                         case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, src);\r
1914                         case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, src);\r
1915                         default:      break;\r
1916                         }\r
1917                         break;\r
1918                 default:\r
1919                         break;\r
1920                 }\r
1921 \r
1922                 return emit(sw::Shader::OPCODE_MOV, dst, src);\r
1923         }\r
1924 \r
1925         void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)\r
1926         {\r
1927                 for(int index = 0; index < dst->elementRegisterCount(); index++)\r
1928                 {\r
1929                         emit(op, dst, src0, src1, src2, 0, 0, index);\r
1930                 }\r
1931         }\r
1932 \r
1933         void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)\r
1934         {\r
1935                 emitBinary(op, result, src0, src1);\r
1936                 assignLvalue(lhs, result);\r
1937         }\r
1938 \r
1939         void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)\r
1940         {\r
1941                 sw::Shader::Opcode opcode;\r
1942                 switch(left->getAsTyped()->getBasicType())\r
1943                 {\r
1944                 case EbtBool:\r
1945                 case EbtInt:\r
1946                         opcode = sw::Shader::OPCODE_ICMP;\r
1947                         break;\r
1948                 case EbtUInt:\r
1949                         opcode = sw::Shader::OPCODE_UCMP;\r
1950                         break;\r
1951                 default:\r
1952                         opcode = sw::Shader::OPCODE_CMP;\r
1953                         break;\r
1954                 }\r
1955 \r
1956                 Instruction *cmp = emit(opcode, dst, left, right);\r
1957                 cmp->control = cmpOp;\r
1958                 argument(cmp->src[0], left, index);\r
1959                 argument(cmp->src[1], right, index);\r
1960         }\r
1961 \r
1962         int componentCount(const TType &type, int registers)\r
1963         {\r
1964                 if(registers == 0)\r
1965                 {\r
1966                         return 0;\r
1967                 }\r
1968 \r
1969                 if(type.isArray() && registers >= type.elementRegisterCount())\r
1970                 {\r
1971                         int index = registers / type.elementRegisterCount();\r
1972                         registers -= index * type.elementRegisterCount();\r
1973                         return index * type.getElementSize() + componentCount(type, registers);\r
1974                 }\r
1975 \r
1976                 if(type.isStruct() || type.isInterfaceBlock())\r
1977                 {\r
1978                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
1979                         int elements = 0;\r
1980 \r
1981                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
1982                         {\r
1983                                 const TType &fieldType = *((*field)->type());\r
1984 \r
1985                                 if(fieldType.totalRegisterCount() <= registers)\r
1986                                 {\r
1987                                         registers -= fieldType.totalRegisterCount();\r
1988                                         elements += fieldType.getObjectSize();\r
1989                                 }\r
1990                                 else   // Register within this field\r
1991                                 {\r
1992                                         return elements + componentCount(fieldType, registers);\r
1993                                 }\r
1994                         }\r
1995                 }\r
1996                 else if(type.isMatrix())\r
1997                 {\r
1998                         return registers * type.registerSize();\r
1999                 }\r
2000 \r
2001                 UNREACHABLE(0);\r
2002                 return 0;\r
2003         }\r
2004 \r
2005         int registerSize(const TType &type, int registers)\r
2006         {\r
2007                 if(registers == 0)\r
2008                 {\r
2009                         if(type.isStruct())\r
2010                         {\r
2011                                 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);\r
2012                         }\r
2013 \r
2014                         return type.registerSize();\r
2015                 }\r
2016 \r
2017                 if(type.isArray() && registers >= type.elementRegisterCount())\r
2018                 {\r
2019                         int index = registers / type.elementRegisterCount();\r
2020                         registers -= index * type.elementRegisterCount();\r
2021                         return registerSize(type, registers);\r
2022                 }\r
2023 \r
2024                 if(type.isStruct() || type.isInterfaceBlock())\r
2025                 {\r
2026                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
2027                         int elements = 0;\r
2028 \r
2029                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
2030                         {\r
2031                                 const TType &fieldType = *((*field)->type());\r
2032 \r
2033                                 if(fieldType.totalRegisterCount() <= registers)\r
2034                                 {\r
2035                                         registers -= fieldType.totalRegisterCount();\r
2036                                         elements += fieldType.getObjectSize();\r
2037                                 }\r
2038                                 else   // Register within this field\r
2039                                 {\r
2040                                         return registerSize(fieldType, registers);\r
2041                                 }\r
2042                         }\r
2043                 }\r
2044                 else if(type.isMatrix())\r
2045                 {\r
2046                         return registerSize(type, 0);\r
2047                 }\r
2048 \r
2049                 UNREACHABLE(0);\r
2050                 return 0;\r
2051         }\r
2052 \r
2053         void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)\r
2054         {\r
2055                 if(argument)\r
2056                 {\r
2057                         TIntermTyped *arg = argument->getAsTyped();\r
2058                         const TType &type = arg->getType();\r
2059                         index = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;\r
2060 \r
2061                         int size = registerSize(type, index);\r
2062 \r
2063                         parameter.type = registerType(arg);\r
2064 \r
2065                         if(arg->getQualifier() == EvqConstExpr)\r
2066                         {\r
2067                                 int component = componentCount(type, index);\r
2068                                 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();\r
2069 \r
2070                                 for(int i = 0; i < 4; i++)\r
2071                                 {\r
2072                                         if(size == 1)   // Replicate\r
2073                                         {\r
2074                                                 parameter.value[i] = constants[component + 0].getAsFloat();\r
2075                                         }\r
2076                                         else if(i < size)\r
2077                                         {\r
2078                                                 parameter.value[i] = constants[component + i].getAsFloat();\r
2079                                         }\r
2080                                         else\r
2081                                         {\r
2082                                                 parameter.value[i] = 0.0f;\r
2083                                         }\r
2084                                 }\r
2085                         }\r
2086                         else\r
2087                         {\r
2088                                 parameter.index = registerIndex(arg) + index;\r
2089 \r
2090                                 if(isSamplerRegister(arg))\r
2091                                 {\r
2092                                         TIntermBinary *binary = argument->getAsBinaryNode();\r
2093 \r
2094                                         if(binary)\r
2095                                         {\r
2096                                                 TIntermTyped *left = binary->getLeft();\r
2097                                                 TIntermTyped *right = binary->getRight();\r
2098 \r
2099                                                 switch(binary->getOp())\r
2100                                                 {\r
2101                                                 case EOpIndexDirect:\r
2102                                                         parameter.index += right->getAsConstantUnion()->getIConst(0);\r
2103                                                         break;\r
2104                                                 case EOpIndexIndirect:\r
2105                                                         if(left->getArraySize() > 1)\r
2106                                                         {\r
2107                                                                 parameter.rel.type = registerType(binary->getRight());\r
2108                                                                 parameter.rel.index = registerIndex(binary->getRight());\r
2109                                                                 parameter.rel.scale = 1;\r
2110                                                                 parameter.rel.deterministic = true;\r
2111                                                         }\r
2112                                                         break;\r
2113                                                 case EOpIndexDirectStruct:\r
2114                                                 case EOpIndexDirectInterfaceBlock:\r
2115                                                         parameter.index += right->getAsConstantUnion()->getIConst(0);\r
2116                                                         break;\r
2117                                                 default:\r
2118                                                         UNREACHABLE(binary->getOp());\r
2119                                                 }\r
2120                                         }\r
2121                                 }\r
2122                         }\r
2123 \r
2124                         if(!IsSampler(arg->getBasicType()))\r
2125                         {\r
2126                                 parameter.swizzle = readSwizzle(arg, size);\r
2127                         }\r
2128                 }\r
2129         }\r
2130 \r
2131         void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)\r
2132         {\r
2133                 for(int index = 0; index < dst->totalRegisterCount(); index++)\r
2134                 {\r
2135                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, src);\r
2136                         mov->dst.index += index;\r
2137                         mov->dst.mask = writeMask(dst, index);\r
2138                         argument(mov->src[0], src, offset + index);\r
2139                 }\r
2140         }\r
2141 \r
2142         int swizzleElement(int swizzle, int index)\r
2143         {\r
2144                 return (swizzle >> (index * 2)) & 0x03;\r
2145         }\r
2146 \r
2147         int swizzleSwizzle(int leftSwizzle, int rightSwizzle)\r
2148         {\r
2149                 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |\r
2150                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |\r
2151                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |\r
2152                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);\r
2153         }\r
2154 \r
2155         void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)\r
2156         {\r
2157                 if(src &&\r
2158                         ((src->isVector() && (!dst->isVector() || (dst->getNominalSize() != dst->getNominalSize()))) ||\r
2159                          (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))\r
2160                 {\r
2161                         return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");\r
2162                 }\r
2163 \r
2164                 TIntermBinary *binary = dst->getAsBinaryNode();\r
2165 \r
2166                 if(binary && binary->getOp() == EOpIndexIndirect && dst->isScalar())\r
2167                 {\r
2168                         Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);\r
2169 \r
2170                         Temporary address(this);\r
2171                         lvalue(insert->dst, address, dst);\r
2172 \r
2173                         insert->src[0].type = insert->dst.type;\r
2174                         insert->src[0].index = insert->dst.index;\r
2175                         insert->src[0].rel = insert->dst.rel;\r
2176                         argument(insert->src[1], src);\r
2177                         argument(insert->src[2], binary->getRight());\r
2178 \r
2179                         shader->append(insert);\r
2180                 }\r
2181                 else\r
2182                 {\r
2183                         for(int offset = 0; offset < dst->totalRegisterCount(); offset++)\r
2184                         {\r
2185                                 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);\r
2186 \r
2187                                 Temporary address(this);\r
2188                                 int swizzle = lvalue(mov->dst, address, dst);\r
2189                                 mov->dst.index += offset;\r
2190 \r
2191                                 if(offset > 0)\r
2192                                 {\r
2193                                         mov->dst.mask = writeMask(dst, offset);\r
2194                                 }\r
2195 \r
2196                                 argument(mov->src[0], src, offset);\r
2197                                 mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);\r
2198 \r
2199                                 shader->append(mov);\r
2200                         }\r
2201                 }\r
2202         }\r
2203 \r
2204         int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)\r
2205         {\r
2206                 TIntermTyped *result = node;\r
2207                 TIntermBinary *binary = node->getAsBinaryNode();\r
2208                 TIntermSymbol *symbol = node->getAsSymbolNode();\r
2209 \r
2210                 if(binary)\r
2211                 {\r
2212                         TIntermTyped *left = binary->getLeft();\r
2213                         TIntermTyped *right = binary->getRight();\r
2214 \r
2215                         int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side\r
2216 \r
2217                         switch(binary->getOp())\r
2218                         {\r
2219                         case EOpIndexDirect:\r
2220                                 {\r
2221                                         int rightIndex = right->getAsConstantUnion()->getIConst(0);\r
2222 \r
2223                                         if(left->isRegister())\r
2224                                         {\r
2225                                                 int leftMask = dst.mask;\r
2226 \r
2227                                                 dst.mask = 1;\r
2228                                                 while((leftMask & dst.mask) == 0)\r
2229                                                 {\r
2230                                                         dst.mask = dst.mask << 1;\r
2231                                                 }\r
2232 \r
2233                                                 int element = swizzleElement(leftSwizzle, rightIndex);\r
2234                                                 dst.mask = 1 << element;\r
2235 \r
2236                                                 return element;\r
2237                                         }\r
2238                                         else if(left->isArray() || left->isMatrix())\r
2239                                         {\r
2240                                                 dst.index += rightIndex * result->totalRegisterCount();\r
2241                                                 return 0xE4;\r
2242                                         }\r
2243                                         else UNREACHABLE(0);\r
2244                                 }\r
2245                                 break;\r
2246                         case EOpIndexIndirect:\r
2247                                 {\r
2248                                         if(left->isRegister())\r
2249                                         {\r
2250                                                 // Requires INSERT instruction (handled by calling function)\r
2251                                         }\r
2252                                         else if(left->isArray() || left->isMatrix())\r
2253                                         {\r
2254                                                 int scale = result->totalRegisterCount();\r
2255 \r
2256                                                 if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly\r
2257                                                 {\r
2258                                                         if(left->totalRegisterCount() > 1)\r
2259                                                         {\r
2260                                                                 sw::Shader::SourceParameter relativeRegister;\r
2261                                                                 argument(relativeRegister, right);\r
2262 \r
2263                                                                 dst.rel.index = relativeRegister.index;\r
2264                                                                 dst.rel.type = relativeRegister.type;\r
2265                                                                 dst.rel.scale = scale;\r
2266                                                                 dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
2267                                                         }\r
2268                                                 }\r
2269                                                 else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register\r
2270                                                 {\r
2271                                                         if(scale == 1)\r
2272                                                         {\r
2273                                                                 Constant oldScale((int)dst.rel.scale);\r
2274                                                                 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);\r
2275                                                                 mad->src[0].index = dst.rel.index;\r
2276                                                                 mad->src[0].type = dst.rel.type;\r
2277                                                         }\r
2278                                                         else\r
2279                                                         {\r
2280                                                                 Constant oldScale((int)dst.rel.scale);\r
2281                                                                 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);\r
2282                                                                 mul->src[0].index = dst.rel.index;\r
2283                                                                 mul->src[0].type = dst.rel.type;\r
2284 \r
2285                                                                 Constant newScale(scale);\r
2286                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2287                                                         }\r
2288 \r
2289                                                         dst.rel.type = sw::Shader::PARAMETER_TEMP;\r
2290                                                         dst.rel.index = registerIndex(&address);\r
2291                                                         dst.rel.scale = 1;\r
2292                                                 }\r
2293                                                 else   // Just add the new index to the address register\r
2294                                                 {\r
2295                                                         if(scale == 1)\r
2296                                                         {\r
2297                                                                 emit(sw::Shader::OPCODE_IADD, &address, &address, right);\r
2298                                                         }\r
2299                                                         else\r
2300                                                         {\r
2301                                                                 Constant newScale(scale);\r
2302                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2303                                                         }\r
2304                                                 }\r
2305                                         }\r
2306                                         else UNREACHABLE(0);\r
2307                                 }\r
2308                                 break;\r
2309                         case EOpIndexDirectStruct:\r
2310                         case EOpIndexDirectInterfaceBlock:\r
2311                                 {\r
2312                                         const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?\r
2313                                                                left->getType().getStruct()->fields() :\r
2314                                                                left->getType().getInterfaceBlock()->fields();\r
2315                                         int index = right->getAsConstantUnion()->getIConst(0);\r
2316                                         int fieldOffset = 0;\r
2317 \r
2318                                         for(int i = 0; i < index; i++)\r
2319                                         {\r
2320                                                 fieldOffset += fields[i]->type()->totalRegisterCount();\r
2321                                         }\r
2322 \r
2323                                         dst.type = registerType(left);\r
2324                                         dst.index += fieldOffset;\r
2325                                         dst.mask = writeMask(right);\r
2326 \r
2327                                         return 0xE4;\r
2328                                 }\r
2329                                 break;\r
2330                         case EOpVectorSwizzle:\r
2331                                 {\r
2332                                         ASSERT(left->isRegister());\r
2333 \r
2334                                         int leftMask = dst.mask;\r
2335 \r
2336                                         int swizzle = 0;\r
2337                                         int rightMask = 0;\r
2338 \r
2339                                         TIntermSequence &sequence = right->getAsAggregate()->getSequence();\r
2340 \r
2341                                         for(unsigned int i = 0; i < sequence.size(); i++)\r
2342                                         {\r
2343                                                 int index = sequence[i]->getAsConstantUnion()->getIConst(0);\r
2344 \r
2345                                                 int element = swizzleElement(leftSwizzle, index);\r
2346                                                 rightMask = rightMask | (1 << element);\r
2347                                                 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);\r
2348                                         }\r
2349 \r
2350                                         dst.mask = leftMask & rightMask;\r
2351 \r
2352                                         return swizzle;\r
2353                                 }\r
2354                                 break;\r
2355                         default:\r
2356                                 UNREACHABLE(binary->getOp());   // Not an l-value operator\r
2357                                 break;\r
2358                         }\r
2359                 }\r
2360                 else if(symbol)\r
2361                 {\r
2362                         dst.type = registerType(symbol);\r
2363                         dst.index = registerIndex(symbol);\r
2364                         dst.mask = writeMask(symbol);\r
2365                         return 0xE4;\r
2366                 }\r
2367 \r
2368                 return 0xE4;\r
2369         }\r
2370 \r
2371         sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)\r
2372         {\r
2373                 if(isSamplerRegister(operand))\r
2374                 {\r
2375                         return sw::Shader::PARAMETER_SAMPLER;\r
2376                 }\r
2377 \r
2378                 const TQualifier qualifier = operand->getQualifier();\r
2379                 if((EvqFragColor == qualifier) || (EvqFragData == qualifier))\r
2380                 {\r
2381                         if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||\r
2382                            ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))\r
2383                         {\r
2384                                 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");\r
2385                         }\r
2386                         outputQualifier = qualifier;\r
2387                 }\r
2388 \r
2389                 switch(qualifier)\r
2390                 {\r
2391                 case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;\r
2392                 case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;\r
2393                 case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float\r
2394                 case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;\r
2395                 case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;\r
2396                 case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;\r
2397                 case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;\r
2398                 case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;\r
2399                 case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;\r
2400                 case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;\r
2401                 case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend\r
2402                 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend\r
2403                 case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;\r
2404                 case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;\r
2405                 case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;\r
2406                 case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;\r
2407                 case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;\r
2408                 case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;\r
2409                 case EvqUniform:             return sw::Shader::PARAMETER_CONST;\r
2410                 case EvqIn:                  return sw::Shader::PARAMETER_TEMP;\r
2411                 case EvqOut:                 return sw::Shader::PARAMETER_TEMP;\r
2412                 case EvqInOut:               return sw::Shader::PARAMETER_TEMP;\r
2413                 case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;\r
2414                 case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;\r
2415                 case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;\r
2416                 case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;\r
2417                 case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;\r
2418                 case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;\r
2419                 case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;\r
2420                 case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;\r
2421                 case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;\r
2422                 case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;\r
2423                 default: UNREACHABLE(qualifier);\r
2424                 }\r
2425 \r
2426                 return sw::Shader::PARAMETER_VOID;\r
2427         }\r
2428 \r
2429         unsigned int OutputASM::registerIndex(TIntermTyped *operand)\r
2430         {\r
2431                 if(isSamplerRegister(operand))\r
2432                 {\r
2433                         return samplerRegister(operand);\r
2434                 }\r
2435 \r
2436                 switch(operand->getQualifier())\r
2437                 {\r
2438                 case EvqTemporary:           return temporaryRegister(operand);\r
2439                 case EvqGlobal:              return temporaryRegister(operand);\r
2440                 case EvqConstExpr:           UNREACHABLE(EvqConstExpr);\r
2441                 case EvqAttribute:           return attributeRegister(operand);\r
2442                 case EvqVaryingIn:           return varyingRegister(operand);\r
2443                 case EvqVaryingOut:          return varyingRegister(operand);\r
2444                 case EvqVertexIn:            return attributeRegister(operand);\r
2445                 case EvqFragmentOut:         return fragmentOutputRegister(operand);\r
2446                 case EvqVertexOut:           return varyingRegister(operand);\r
2447                 case EvqFragmentIn:          return varyingRegister(operand);\r
2448                 case EvqInvariantVaryingIn:  return varyingRegister(operand);\r
2449                 case EvqInvariantVaryingOut: return varyingRegister(operand);\r
2450                 case EvqSmooth:              return varyingRegister(operand);\r
2451                 case EvqFlat:                return varyingRegister(operand);\r
2452                 case EvqCentroidOut:         return varyingRegister(operand);\r
2453                 case EvqSmoothIn:            return varyingRegister(operand);\r
2454                 case EvqFlatIn:              return varyingRegister(operand);\r
2455                 case EvqCentroidIn:          return varyingRegister(operand);\r
2456                 case EvqUniform:             return uniformRegister(operand);\r
2457                 case EvqIn:                  return temporaryRegister(operand);\r
2458                 case EvqOut:                 return temporaryRegister(operand);\r
2459                 case EvqInOut:               return temporaryRegister(operand);\r
2460                 case EvqConstReadOnly:       return temporaryRegister(operand);\r
2461                 case EvqPosition:            return varyingRegister(operand);\r
2462                 case EvqPointSize:           return varyingRegister(operand);\r
2463                 case EvqInstanceID:          vertexShader->instanceIdDeclared = true; return 0;\r
2464                 case EvqFragCoord:           pixelShader->vPosDeclared = true;  return 0;\r
2465                 case EvqFrontFacing:         pixelShader->vFaceDeclared = true; return 1;\r
2466                 case EvqPointCoord:          return varyingRegister(operand);\r
2467                 case EvqFragColor:           return 0;\r
2468                 case EvqFragData:            return 0;\r
2469                 case EvqFragDepth:           return 0;\r
2470                 default: UNREACHABLE(operand->getQualifier());\r
2471                 }\r
2472 \r
2473                 return 0;\r
2474         }\r
2475 \r
2476         int OutputASM::writeMask(TIntermTyped *destination, int index)\r
2477         {\r
2478                 if(destination->getQualifier() == EvqPointSize)\r
2479                 {\r
2480                         return 0x2;   // Point size stored in the y component\r
2481                 }\r
2482 \r
2483                 return 0xF >> (4 - registerSize(destination->getType(), index));\r
2484         }\r
2485 \r
2486         int OutputASM::readSwizzle(TIntermTyped *argument, int size)\r
2487         {\r
2488                 if(argument->getQualifier() == EvqPointSize)\r
2489                 {\r
2490                         return 0x55;   // Point size stored in the y component\r
2491                 }\r
2492 \r
2493                 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw\r
2494 \r
2495                 return swizzleSize[size];\r
2496         }\r
2497 \r
2498         // Conservatively checks whether an expression is fast to compute and has no side effects\r
2499         bool OutputASM::trivial(TIntermTyped *expression, int budget)\r
2500         {\r
2501                 if(!expression->isRegister())\r
2502                 {\r
2503                         return false;\r
2504                 }\r
2505 \r
2506                 return cost(expression, budget) >= 0;\r
2507         }\r
2508 \r
2509         // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)\r
2510         int OutputASM::cost(TIntermNode *expression, int budget)\r
2511         {\r
2512                 if(budget < 0)\r
2513                 {\r
2514                         return budget;\r
2515                 }\r
2516 \r
2517                 if(expression->getAsSymbolNode())\r
2518                 {\r
2519                         return budget;\r
2520                 }\r
2521                 else if(expression->getAsConstantUnion())\r
2522                 {\r
2523                         return budget;\r
2524                 }\r
2525                 else if(expression->getAsBinaryNode())\r
2526                 {\r
2527                         TIntermBinary *binary = expression->getAsBinaryNode();\r
2528 \r
2529                         switch(binary->getOp())\r
2530                         {\r
2531                         case EOpVectorSwizzle:\r
2532                         case EOpIndexDirect:\r
2533                         case EOpIndexDirectStruct:\r
2534                         case EOpIndexDirectInterfaceBlock:\r
2535                                 return cost(binary->getLeft(), budget - 0);\r
2536                         case EOpAdd:\r
2537                         case EOpSub:\r
2538                         case EOpMul:\r
2539                                 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));\r
2540                         default:\r
2541                                 return -1;\r
2542                         }\r
2543                 }\r
2544                 else if(expression->getAsUnaryNode())\r
2545                 {\r
2546                         TIntermUnary *unary = expression->getAsUnaryNode();\r
2547 \r
2548                         switch(unary->getOp())\r
2549                         {\r
2550                         case EOpAbs:\r
2551                         case EOpNegative:\r
2552                                 return cost(unary->getOperand(), budget - 1);\r
2553                         default:\r
2554                                 return -1;\r
2555                         }\r
2556                 }\r
2557                 else if(expression->getAsSelectionNode())\r
2558                 {\r
2559                         TIntermSelection *selection = expression->getAsSelectionNode();\r
2560 \r
2561                         if(selection->usesTernaryOperator())\r
2562                         {\r
2563                                 TIntermTyped *condition = selection->getCondition();\r
2564                                 TIntermNode *trueBlock = selection->getTrueBlock();\r
2565                                 TIntermNode *falseBlock = selection->getFalseBlock();\r
2566                                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
2567 \r
2568                                 if(constantCondition)\r
2569                                 {\r
2570                                         bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
2571 \r
2572                                         if(trueCondition)\r
2573                                         {\r
2574                                                 return cost(trueBlock, budget - 0);\r
2575                                         }\r
2576                                         else\r
2577                                         {\r
2578                                                 return cost(falseBlock, budget - 0);\r
2579                                         }\r
2580                                 }\r
2581                                 else\r
2582                                 {\r
2583                                         return cost(trueBlock, cost(falseBlock, budget - 2));\r
2584                                 }\r
2585                         }\r
2586                 }\r
2587 \r
2588                 return -1;\r
2589         }\r
2590 \r
2591         const Function *OutputASM::findFunction(const TString &name)\r
2592         {\r
2593                 for(unsigned int f = 0; f < functionArray.size(); f++)\r
2594                 {\r
2595                         if(functionArray[f].name == name)\r
2596                         {\r
2597                                 return &functionArray[f];\r
2598                         }\r
2599                 }\r
2600 \r
2601                 return 0;\r
2602         }\r
2603 \r
2604         int OutputASM::temporaryRegister(TIntermTyped *temporary)\r
2605         {\r
2606                 return allocate(temporaries, temporary);\r
2607         }\r
2608 \r
2609         int OutputASM::varyingRegister(TIntermTyped *varying)\r
2610         {\r
2611                 int var = lookup(varyings, varying);\r
2612 \r
2613                 if(var == -1)\r
2614                 {\r
2615                         var = allocate(varyings, varying);\r
2616                         int componentCount = varying->registerSize();\r
2617                         int registerCount = varying->totalRegisterCount();\r
2618 \r
2619                         if(pixelShader)\r
2620                         {\r
2621                                 if((var + registerCount) > sw::PixelShader::MAX_INPUT_VARYINGS)\r
2622                                 {\r
2623                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");\r
2624                                         return 0;\r
2625                                 }\r
2626 \r
2627                                 if(varying->getQualifier() == EvqPointCoord)\r
2628                                 {\r
2629                                         ASSERT(varying->isRegister());\r
2630                                         if(componentCount >= 1) pixelShader->semantic[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2631                                         if(componentCount >= 2) pixelShader->semantic[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2632                                         if(componentCount >= 3) pixelShader->semantic[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2633                                         if(componentCount >= 4) pixelShader->semantic[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2634                                 }\r
2635                                 else\r
2636                                 {\r
2637                                         for(int i = 0; i < varying->totalRegisterCount(); i++)\r
2638                                         {\r
2639                                                 if(componentCount >= 1) pixelShader->semantic[var + i][0] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2640                                                 if(componentCount >= 2) pixelShader->semantic[var + i][1] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2641                                                 if(componentCount >= 3) pixelShader->semantic[var + i][2] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2642                                                 if(componentCount >= 4) pixelShader->semantic[var + i][3] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2643                                         }\r
2644                                 }\r
2645                         }\r
2646                         else if(vertexShader)\r
2647                         {\r
2648                                 if((var + registerCount) > sw::VertexShader::MAX_OUTPUT_VARYINGS)\r
2649                                 {\r
2650                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");\r
2651                                         return 0;\r
2652                                 }\r
2653 \r
2654                                 if(varying->getQualifier() == EvqPosition)\r
2655                                 {\r
2656                                         ASSERT(varying->isRegister());\r
2657                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2658                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2659                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2660                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2661                                         vertexShader->positionRegister = var;\r
2662                                 }\r
2663                                 else if(varying->getQualifier() == EvqPointSize)\r
2664                                 {\r
2665                                         ASSERT(varying->isRegister());\r
2666                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2667                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2668                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2669                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2670                                         vertexShader->pointSizeRegister = var;\r
2671                                 }\r
2672                                 else\r
2673                                 {\r
2674                                         // Semantic indexes for user varyings will be assigned during program link to match the pixel shader\r
2675                                 }\r
2676                         }\r
2677                         else UNREACHABLE(0);\r
2678 \r
2679                         declareVarying(varying, var);\r
2680                 }\r
2681 \r
2682                 return var;\r
2683         }\r
2684 \r
2685         void OutputASM::declareVarying(TIntermTyped *varying, int reg)\r
2686         {\r
2687                 if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking\r
2688                 {\r
2689                         const TType &type = varying->getType();\r
2690                         const char *name = varying->getAsSymbolNode()->getSymbol().c_str();\r
2691                         VaryingList &activeVaryings = shaderObject->varyings;\r
2692 \r
2693                         // Check if this varying has been declared before without having a register assigned\r
2694                         for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)\r
2695                         {\r
2696                                 if(v->name == name)\r
2697                                 {\r
2698                                         if(reg >= 0)\r
2699                                         {\r
2700                                                 ASSERT(v->reg < 0 || v->reg == reg);\r
2701                                                 v->reg = reg;\r
2702                                         }\r
2703 \r
2704                                         return;\r
2705                                 }\r
2706                         }\r
2707 \r
2708                         activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));\r
2709                 }\r
2710         }\r
2711 \r
2712         int OutputASM::uniformRegister(TIntermTyped *uniform)\r
2713         {\r
2714                 const TType &type = uniform->getType();\r
2715                 ASSERT(!IsSampler(type.getBasicType()));\r
2716                 TInterfaceBlock *block = type.getAsInterfaceBlock();\r
2717                 TIntermSymbol *symbol = uniform->getAsSymbolNode();\r
2718                 ASSERT(symbol || block);\r
2719 \r
2720                 if(symbol || block)\r
2721                 {\r
2722                         int index = lookup(uniforms, uniform);\r
2723 \r
2724                         if(index == -1)\r
2725                         {\r
2726                                 index = allocate(uniforms, uniform);\r
2727                                 const TString &name = symbol ? symbol->getSymbol() : block->name();\r
2728 \r
2729                                 declareUniform(type, name, index);\r
2730                         }\r
2731 \r
2732                         return index;\r
2733                 }\r
2734 \r
2735                 return 0;\r
2736         }\r
2737 \r
2738         int OutputASM::attributeRegister(TIntermTyped *attribute)\r
2739         {\r
2740                 ASSERT(!attribute->isArray());\r
2741 \r
2742                 int index = lookup(attributes, attribute);\r
2743 \r
2744                 if(index == -1)\r
2745                 {\r
2746                         TIntermSymbol *symbol = attribute->getAsSymbolNode();\r
2747                         ASSERT(symbol);\r
2748 \r
2749                         if(symbol)\r
2750                         {\r
2751                                 index = allocate(attributes, attribute);\r
2752                                 const TType &type = attribute->getType();\r
2753                                 int registerCount = attribute->totalRegisterCount();\r
2754 \r
2755                                 if(vertexShader && (index + registerCount) <= sw::VertexShader::MAX_INPUT_ATTRIBUTES)\r
2756                                 {\r
2757                                         for(int i = 0; i < registerCount; i++)\r
2758                                         {\r
2759                                                 vertexShader->input[index + i] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i);\r
2760                                         }\r
2761                                 }\r
2762 \r
2763                                 ActiveAttributes &activeAttributes = shaderObject->activeAttributes;\r
2764 \r
2765                                 const char *name = symbol->getSymbol().c_str();\r
2766                                 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));\r
2767                         }\r
2768                 }\r
2769 \r
2770                 return index;\r
2771         }\r
2772 \r
2773         int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)\r
2774         {\r
2775                 return allocate(fragmentOutputs, fragmentOutput);\r
2776         }\r
2777 \r
2778         int OutputASM::samplerRegister(TIntermTyped *sampler)\r
2779         {\r
2780                 ASSERT(IsSampler(sampler->getType().getBasicType()));\r
2781                 TIntermSymbol *symbol = sampler->getAsSymbolNode();\r
2782                 TIntermBinary *binary = sampler->getAsBinaryNode();\r
2783 \r
2784                 if(symbol)\r
2785                 {\r
2786                         return samplerRegister(symbol);\r
2787                 }\r
2788                 else if(binary)\r
2789                 {\r
2790                         ASSERT(binary->getOp() == EOpIndexDirect || binary->getOp() == EOpIndexIndirect ||\r
2791                                    binary->getOp() == EOpIndexDirectStruct || binary->getOp() == EOpIndexDirectInterfaceBlock);\r
2792 \r
2793                         return samplerRegister(binary->getLeft());   // Index added later\r
2794                 }\r
2795                 else UNREACHABLE(0);\r
2796 \r
2797                 return 0;\r
2798         }\r
2799 \r
2800         int OutputASM::samplerRegister(TIntermSymbol *sampler)\r
2801         {\r
2802                 const TType &type = sampler->getType();\r
2803                 ASSERT(IsSampler(type.getBasicType()) || type.getStruct());   // Structures can contain samplers\r
2804 \r
2805                 int index = lookup(samplers, sampler);\r
2806 \r
2807                 if(index == -1)\r
2808                 {\r
2809                         index = allocate(samplers, sampler);\r
2810 \r
2811                         if(sampler->getQualifier() == EvqUniform)\r
2812                         {\r
2813                                 const char *name = sampler->getSymbol().c_str();\r
2814                                 declareUniform(type, name, index);\r
2815                         }\r
2816                 }\r
2817 \r
2818                 return index;\r
2819         }\r
2820 \r
2821         int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)\r
2822         {\r
2823                 for(unsigned int i = 0; i < list.size(); i++)\r
2824                 {\r
2825                         if(list[i] == variable)\r
2826                         {\r
2827                                 return i;   // Pointer match\r
2828                         }\r
2829                 }\r
2830 \r
2831                 TIntermSymbol *varSymbol = variable->getAsSymbolNode();\r
2832                 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();\r
2833 \r
2834                 if(varBlock)\r
2835                 {\r
2836                         for(unsigned int i = 0; i < list.size(); i++)\r
2837                         {\r
2838                                 if(list[i])\r
2839                                 {\r
2840                                         TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();\r
2841 \r
2842                                         if(listBlock)\r
2843                                         {\r
2844                                                 if(listBlock->name() == varBlock->name())\r
2845                                                 {\r
2846                                                         ASSERT(listBlock->arraySize() == varBlock->arraySize());\r
2847                                                         ASSERT(listBlock->fields() == varBlock->fields());\r
2848                                                         ASSERT(listBlock->blockStorage() == varBlock->blockStorage());\r
2849                                                         ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());\r
2850 \r
2851                                                         return i;\r
2852                                                 }\r
2853                                         }\r
2854                                 }\r
2855                         }\r
2856                 }\r
2857                 else if(varSymbol)\r
2858                 {\r
2859                         for(unsigned int i = 0; i < list.size(); i++)\r
2860                         {\r
2861                                 if(list[i])\r
2862                                 {\r
2863                                         TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();\r
2864 \r
2865                                         if(listSymbol)\r
2866                                         {\r
2867                                                 if(listSymbol->getId() == varSymbol->getId())\r
2868                                                 {\r
2869                                                         ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());\r
2870                                                         ASSERT(listSymbol->getType() == varSymbol->getType());\r
2871                                                         ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());\r
2872 \r
2873                                                         return i;\r
2874                                                 }\r
2875                                         }\r
2876                                 }\r
2877                         }\r
2878                 }\r
2879 \r
2880                 return -1;\r
2881         }\r
2882 \r
2883         int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)\r
2884         {\r
2885                 int index = lookup(list, variable);\r
2886 \r
2887                 if(index == -1)\r
2888                 {\r
2889                         unsigned int registerCount = variable->totalRegisterCount();\r
2890 \r
2891                         for(unsigned int i = 0; i < list.size(); i++)\r
2892                         {\r
2893                                 if(list[i] == 0)\r
2894                                 {\r
2895                                         unsigned int j = 1;\r
2896                                         for( ; j < registerCount && (i + j) < list.size(); j++)\r
2897                                         {\r
2898                                                 if(list[i + j] != 0)\r
2899                                                 {\r
2900                                                         break;\r
2901                                                 }\r
2902                                         }\r
2903 \r
2904                                         if(j == registerCount)   // Found free slots\r
2905                                         {\r
2906                                                 for(unsigned int j = 0; j < registerCount; j++)\r
2907                                                 {\r
2908                                                         list[i + j] = variable;\r
2909                                                 }\r
2910 \r
2911                                                 return i;\r
2912                                         }\r
2913                                 }\r
2914                         }\r
2915 \r
2916                         index = list.size();\r
2917 \r
2918                         for(unsigned int i = 0; i < registerCount; i++)\r
2919                         {\r
2920                                 list.push_back(variable);\r
2921                         }\r
2922                 }\r
2923 \r
2924                 return index;\r
2925         }\r
2926 \r
2927         void OutputASM::free(VariableArray &list, TIntermTyped *variable)\r
2928         {\r
2929                 int index = lookup(list, variable);\r
2930 \r
2931                 if(index >= 0)\r
2932                 {\r
2933                         list[index] = 0;\r
2934                 }\r
2935         }\r
2936 \r
2937         void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder)\r
2938         {\r
2939                 const TStructure *structure = type.getStruct();\r
2940                 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;\r
2941                 ActiveUniforms &activeUniforms = shaderObject->activeUniforms;\r
2942 \r
2943                 if(!structure && !block)\r
2944                 {\r
2945                         if(blockId >= 0)\r
2946                         {\r
2947                                 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());\r
2948                         }\r
2949                         BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();\r
2950                         int regIndex = encoder ? registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;\r
2951                         activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(),\r
2952                                                          regIndex, blockId, blockInfo));\r
2953 \r
2954                         if(isSamplerRegister(type))\r
2955                         {\r
2956                                 for(int i = 0; i < type.totalRegisterCount(); i++)\r
2957                                 {\r
2958                                         shader->declareSampler(regIndex + i);\r
2959                                 }\r
2960                         }\r
2961                 }\r
2962                 else if(block)\r
2963                 {\r
2964                         ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;\r
2965                         blockId = activeUniformBlocks.size();\r
2966                         bool isRowMajor = block->matrixPacking() == EmpRowMajor;\r
2967                         const TString &blockName = block->name();\r
2968                         activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),\r
2969                                                                    block->blockStorage(), isRowMajor, registerIndex, blockId));\r
2970 \r
2971                         const TFieldList& fields = block->fields();\r
2972                         Std140BlockEncoder currentBlockEncoder(isRowMajor);\r
2973                         for(size_t i = 0; i < fields.size(); i++)\r
2974                         {\r
2975                                 const TType &fieldType = *(fields[i]->type());\r
2976                                 const TString &fieldName = fields[i]->name();\r
2977                                 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;\r
2978 \r
2979                                 declareUniform(fieldType, uniformName, registerIndex, blockId, &currentBlockEncoder);\r
2980                         }\r
2981                         activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();\r
2982                 }\r
2983                 else\r
2984                 {\r
2985                         int fieldRegisterIndex = registerIndex;\r
2986 \r
2987                         const TFieldList& fields = structure->fields();\r
2988                         if(type.isArray() && (structure || type.isInterfaceBlock()))\r
2989                         {\r
2990                                 for(int i = 0; i < type.getArraySize(); i++)\r
2991                                 {\r
2992                                         if(encoder)\r
2993                                         {\r
2994                                                 encoder->enterAggregateType();\r
2995                                         }\r
2996                                         for(size_t j = 0; j < fields.size(); j++)\r
2997                                         {\r
2998                                                 const TType &fieldType = *(fields[j]->type());\r
2999                                                 const TString &fieldName = fields[j]->name();\r
3000                                                 const TString uniformName = name + "[" + str(i) + "]." + fieldName;\r
3001 \r
3002                                                 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);\r
3003                                                 if(!encoder)\r
3004                                                 {\r
3005                                                         int registerCount = fieldType.totalRegisterCount();\r
3006                                                         fieldRegisterIndex += registerCount;\r
3007                                                 }\r
3008                                         }\r
3009                                         if(encoder)\r
3010                                         {\r
3011                                                 encoder->exitAggregateType();\r
3012                                         }\r
3013                                 }\r
3014                         }\r
3015                         else\r
3016                         {\r
3017                                 if(encoder)\r
3018                                 {\r
3019                                         encoder->enterAggregateType();\r
3020                                 }\r
3021                                 for(size_t i = 0; i < fields.size(); i++)\r
3022                                 {\r
3023                                         const TType &fieldType = *(fields[i]->type());\r
3024                                         const TString &fieldName = fields[i]->name();\r
3025                                         const TString uniformName = name + "." + fieldName;\r
3026 \r
3027                                         declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);\r
3028                                         if(!encoder)\r
3029                                         {\r
3030                                                 int registerCount = fieldType.totalRegisterCount();\r
3031                                                 fieldRegisterIndex += registerCount;\r
3032                                         }\r
3033                                 }\r
3034                                 if(encoder)\r
3035                                 {\r
3036                                         encoder->exitAggregateType();\r
3037                                 }\r
3038                         }\r
3039                 }\r
3040         }\r
3041 \r
3042         GLenum OutputASM::glVariableType(const TType &type)\r
3043         {\r
3044                 switch(type.getBasicType())\r
3045                 {\r
3046                 case EbtFloat:\r
3047                         if(type.isScalar())\r
3048                         {\r
3049                                 return GL_FLOAT;\r
3050                         }\r
3051                         else if(type.isVector())\r
3052                         {\r
3053                                 switch(type.getNominalSize())\r
3054                                 {\r
3055                                 case 2: return GL_FLOAT_VEC2;\r
3056                                 case 3: return GL_FLOAT_VEC3;\r
3057                                 case 4: return GL_FLOAT_VEC4;\r
3058                                 default: UNREACHABLE(type.getNominalSize());\r
3059                                 }\r
3060                         }\r
3061                         else if(type.isMatrix())\r
3062                         {\r
3063                                 switch(type.getNominalSize())\r
3064                                 {\r
3065                                 case 2:\r
3066                                         switch(type.getSecondarySize())\r
3067                                         {\r
3068                                         case 2: return GL_FLOAT_MAT2;\r
3069                                         case 3: return GL_FLOAT_MAT2x3;\r
3070                                         case 4: return GL_FLOAT_MAT2x4;\r
3071                                         default: UNREACHABLE(type.getSecondarySize());\r
3072                                         }\r
3073                                 case 3:\r
3074                                         switch(type.getSecondarySize())\r
3075                                         {\r
3076                                         case 2: return GL_FLOAT_MAT3x2;\r
3077                                         case 3: return GL_FLOAT_MAT3;\r
3078                                         case 4: return GL_FLOAT_MAT3x4;\r
3079                                         default: UNREACHABLE(type.getSecondarySize());\r
3080                                         }\r
3081                                 case 4:\r
3082                                         switch(type.getSecondarySize())\r
3083                                         {\r
3084                                         case 2: return GL_FLOAT_MAT4x2;\r
3085                                         case 3: return GL_FLOAT_MAT4x3;\r
3086                                         case 4: return GL_FLOAT_MAT4;\r
3087                                         default: UNREACHABLE(type.getSecondarySize());\r
3088                                         }\r
3089                                 default: UNREACHABLE(type.getNominalSize());\r
3090                                 }\r
3091                         }\r
3092                         else UNREACHABLE(0);\r
3093                         break;\r
3094                 case EbtInt:\r
3095                         if(type.isScalar())\r
3096                         {\r
3097                                 return GL_INT;\r
3098                         }\r
3099                         else if(type.isVector())\r
3100                         {\r
3101                                 switch(type.getNominalSize())\r
3102                                 {\r
3103                                 case 2: return GL_INT_VEC2;\r
3104                                 case 3: return GL_INT_VEC3;\r
3105                                 case 4: return GL_INT_VEC4;\r
3106                                 default: UNREACHABLE(type.getNominalSize());\r
3107                                 }\r
3108                         }\r
3109                         else UNREACHABLE(0);\r
3110                         break;\r
3111                 case EbtUInt:\r
3112                         if(type.isScalar())\r
3113                         {\r
3114                                 return GL_UNSIGNED_INT;\r
3115                         }\r
3116                         else if(type.isVector())\r
3117                         {\r
3118                                 switch(type.getNominalSize())\r
3119                                 {\r
3120                                 case 2: return GL_UNSIGNED_INT_VEC2;\r
3121                                 case 3: return GL_UNSIGNED_INT_VEC3;\r
3122                                 case 4: return GL_UNSIGNED_INT_VEC4;\r
3123                                 default: UNREACHABLE(type.getNominalSize());\r
3124                                 }\r
3125                         }\r
3126                         else UNREACHABLE(0);\r
3127                         break;\r
3128                 case EbtBool:\r
3129                         if(type.isScalar())\r
3130                         {\r
3131                                 return GL_BOOL;\r
3132                         }\r
3133                         else if(type.isVector())\r
3134                         {\r
3135                                 switch(type.getNominalSize())\r
3136                                 {\r
3137                                 case 2: return GL_BOOL_VEC2;\r
3138                                 case 3: return GL_BOOL_VEC3;\r
3139                                 case 4: return GL_BOOL_VEC4;\r
3140                                 default: UNREACHABLE(type.getNominalSize());\r
3141                                 }\r
3142                         }\r
3143                         else UNREACHABLE(0);\r
3144                         break;\r
3145                 case EbtSampler2D:\r
3146                         return GL_SAMPLER_2D;\r
3147                 case EbtISampler2D:\r
3148                         return GL_INT_SAMPLER_2D;\r
3149                 case EbtUSampler2D:\r
3150                         return GL_UNSIGNED_INT_SAMPLER_2D;\r
3151                 case EbtSamplerCube:\r
3152                         return GL_SAMPLER_CUBE;\r
3153                 case EbtISamplerCube:\r
3154                         return GL_INT_SAMPLER_CUBE;\r
3155                 case EbtUSamplerCube:\r
3156                         return GL_UNSIGNED_INT_SAMPLER_CUBE;\r
3157                 case EbtSamplerExternalOES:\r
3158                         return GL_SAMPLER_EXTERNAL_OES;\r
3159                 case EbtSampler3D:\r
3160                         return GL_SAMPLER_3D_OES;\r
3161                 case EbtISampler3D:\r
3162                         return GL_INT_SAMPLER_3D;\r
3163                 case EbtUSampler3D:\r
3164                         return GL_UNSIGNED_INT_SAMPLER_3D;\r
3165                 case EbtSampler2DArray:\r
3166                         return GL_SAMPLER_2D_ARRAY;\r
3167                 case EbtISampler2DArray:\r
3168                         return GL_INT_SAMPLER_2D_ARRAY;\r
3169                 case EbtUSampler2DArray:\r
3170                         return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;\r
3171                 case EbtSampler2DShadow:\r
3172                         return GL_SAMPLER_2D_SHADOW;\r
3173                 case EbtSamplerCubeShadow:\r
3174                         return GL_SAMPLER_CUBE_SHADOW;\r
3175                 case EbtSampler2DArrayShadow:\r
3176                         return GL_SAMPLER_2D_ARRAY_SHADOW;\r
3177                 default:\r
3178                         UNREACHABLE(type.getBasicType());\r
3179                         break;\r
3180                 }\r
3181 \r
3182                 return GL_NONE;\r
3183         }\r
3184 \r
3185         GLenum OutputASM::glVariablePrecision(const TType &type)\r
3186         {\r
3187                 if(type.getBasicType() == EbtFloat)\r
3188                 {\r
3189                         switch(type.getPrecision())\r
3190                         {\r
3191                         case EbpHigh:   return GL_HIGH_FLOAT;\r
3192                         case EbpMedium: return GL_MEDIUM_FLOAT;\r
3193                         case EbpLow:    return GL_LOW_FLOAT;\r
3194                         case EbpUndefined:\r
3195                                 // Should be defined as the default precision by the parser\r
3196                         default: UNREACHABLE(type.getPrecision());\r
3197                         }\r
3198                 }\r
3199                 else if(type.getBasicType() == EbtInt)\r
3200                 {\r
3201                         switch(type.getPrecision())\r
3202                         {\r
3203                         case EbpHigh:   return GL_HIGH_INT;\r
3204                         case EbpMedium: return GL_MEDIUM_INT;\r
3205                         case EbpLow:    return GL_LOW_INT;\r
3206                         case EbpUndefined:\r
3207                                 // Should be defined as the default precision by the parser\r
3208                         default: UNREACHABLE(type.getPrecision());\r
3209                         }\r
3210                 }\r
3211 \r
3212                 // Other types (boolean, sampler) don't have a precision\r
3213                 return GL_NONE;\r
3214         }\r
3215 \r
3216         int OutputASM::dim(TIntermNode *v)\r
3217         {\r
3218                 TIntermTyped *vector = v->getAsTyped();\r
3219                 ASSERT(vector && vector->isRegister());\r
3220                 return vector->getNominalSize();\r
3221         }\r
3222 \r
3223         int OutputASM::dim2(TIntermNode *m)\r
3224         {\r
3225                 TIntermTyped *matrix = m->getAsTyped();\r
3226                 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());\r
3227                 return matrix->getSecondarySize();\r
3228         }\r
3229 \r
3230         // Returns ~0u if no loop count could be determined\r
3231         unsigned int OutputASM::loopCount(TIntermLoop *node)\r
3232         {\r
3233                 // Parse loops of the form:\r
3234                 // for(int index = initial; index [comparator] limit; index += increment)\r
3235                 TIntermSymbol *index = 0;\r
3236                 TOperator comparator = EOpNull;\r
3237                 int initial = 0;\r
3238                 int limit = 0;\r
3239                 int increment = 0;\r
3240 \r
3241                 // Parse index name and intial value\r
3242                 if(node->getInit())\r
3243                 {\r
3244                         TIntermAggregate *init = node->getInit()->getAsAggregate();\r
3245 \r
3246                         if(init)\r
3247                         {\r
3248                                 TIntermSequence &sequence = init->getSequence();\r
3249                                 TIntermTyped *variable = sequence[0]->getAsTyped();\r
3250 \r
3251                                 if(variable && variable->getQualifier() == EvqTemporary)\r
3252                                 {\r
3253                                         TIntermBinary *assign = variable->getAsBinaryNode();\r
3254 \r
3255                                         if(assign->getOp() == EOpInitialize)\r
3256                                         {\r
3257                                                 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();\r
3258                                                 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();\r
3259 \r
3260                                                 if(symbol && constant)\r
3261                                                 {\r
3262                                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3263                                                         {\r
3264                                                                 index = symbol;\r
3265                                                                 initial = constant->getUnionArrayPointer()[0].getIConst();\r
3266                                                         }\r
3267                                                 }\r
3268                                         }\r
3269                                 }\r
3270                         }\r
3271                 }\r
3272 \r
3273                 // Parse comparator and limit value\r
3274                 if(index && node->getCondition())\r
3275                 {\r
3276                         TIntermBinary *test = node->getCondition()->getAsBinaryNode();\r
3277 \r
3278                         if(test && test->getLeft()->getAsSymbolNode()->getId() == index->getId())\r
3279                         {\r
3280                                 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();\r
3281 \r
3282                                 if(constant)\r
3283                                 {\r
3284                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3285                                         {\r
3286                                                 comparator = test->getOp();\r
3287                                                 limit = constant->getUnionArrayPointer()[0].getIConst();\r
3288                                         }\r
3289                                 }\r
3290                         }\r
3291                 }\r
3292 \r
3293                 // Parse increment\r
3294                 if(index && comparator != EOpNull && node->getExpression())\r
3295                 {\r
3296                         TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();\r
3297                         TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();\r
3298 \r
3299                         if(binaryTerminal)\r
3300                         {\r
3301                                 TOperator op = binaryTerminal->getOp();\r
3302                                 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();\r
3303 \r
3304                                 if(constant)\r
3305                                 {\r
3306                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3307                                         {\r
3308                                                 int value = constant->getUnionArrayPointer()[0].getIConst();\r
3309 \r
3310                                                 switch(op)\r
3311                                                 {\r
3312                                                 case EOpAddAssign: increment = value;  break;\r
3313                                                 case EOpSubAssign: increment = -value; break;\r
3314                                                 default: UNIMPLEMENTED();\r
3315                                                 }\r
3316                                         }\r
3317                                 }\r
3318                         }\r
3319                         else if(unaryTerminal)\r
3320                         {\r
3321                                 TOperator op = unaryTerminal->getOp();\r
3322 \r
3323                                 switch(op)\r
3324                                 {\r
3325                                 case EOpPostIncrement: increment = 1;  break;\r
3326                                 case EOpPostDecrement: increment = -1; break;\r
3327                                 case EOpPreIncrement:  increment = 1;  break;\r
3328                                 case EOpPreDecrement:  increment = -1; break;\r
3329                                 default: UNIMPLEMENTED();\r
3330                                 }\r
3331                         }\r
3332                 }\r
3333 \r
3334                 if(index && comparator != EOpNull && increment != 0)\r
3335                 {\r
3336                         if(comparator == EOpLessThanEqual)\r
3337                         {\r
3338                                 comparator = EOpLessThan;\r
3339                                 limit += 1;\r
3340                         }\r
3341 \r
3342                         if(comparator == EOpLessThan)\r
3343                         {\r
3344                                 int iterations = (limit - initial) / increment;\r
3345 \r
3346                                 if(iterations <= 0)\r
3347                                 {\r
3348                                         iterations = 0;\r
3349                                 }\r
3350 \r
3351                                 return iterations;\r
3352                         }\r
3353                         else UNIMPLEMENTED();   // Falls through\r
3354                 }\r
3355 \r
3356                 return ~0u;\r
3357         }\r
3358 \r
3359         bool DetectLoopDiscontinuity::traverse(TIntermNode *node)\r
3360         {\r
3361                 loopDepth = 0;\r
3362                 loopDiscontinuity = false;\r
3363 \r
3364                 node->traverse(this);\r
3365 \r
3366                 return loopDiscontinuity;\r
3367         }\r
3368 \r
3369         bool DetectLoopDiscontinuity::visitLoop(Visit visit, TIntermLoop *loop)\r
3370         {\r
3371                 if(visit == PreVisit)\r
3372                 {\r
3373                         loopDepth++;\r
3374                 }\r
3375                 else if(visit == PostVisit)\r
3376                 {\r
3377                         loopDepth++;\r
3378                 }\r
3379 \r
3380                 return true;\r
3381         }\r
3382 \r
3383         bool DetectLoopDiscontinuity::visitBranch(Visit visit, TIntermBranch *node)\r
3384         {\r
3385                 if(loopDiscontinuity)\r
3386                 {\r
3387                         return false;\r
3388                 }\r
3389 \r
3390                 if(!loopDepth)\r
3391                 {\r
3392                         return true;\r
3393                 }\r
3394 \r
3395                 switch(node->getFlowOp())\r
3396                 {\r
3397                 case EOpKill:\r
3398                         break;\r
3399                 case EOpBreak:\r
3400                 case EOpContinue:\r
3401                 case EOpReturn:\r
3402                         loopDiscontinuity = true;\r
3403                         break;\r
3404                 default: UNREACHABLE(node->getFlowOp());\r
3405                 }\r
3406 \r
3407                 return !loopDiscontinuity;\r
3408         }\r
3409 \r
3410         bool DetectLoopDiscontinuity::visitAggregate(Visit visit, TIntermAggregate *node)\r
3411         {\r
3412                 return !loopDiscontinuity;\r
3413         }\r
3414 }\r