OSDN Git Service

Uniform buffer uniform unpacking utility function
[android-x86/external-swiftshader.git] / src / OpenGL / compiler / OutputASM.cpp
1 // SwiftShader Software Renderer\r
2 //\r
3 // Copyright(c) 2005-2013 TransGaming Inc.\r
4 //\r
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,\r
6 // transcribed, stored in a retrieval system, translated into any human or computer\r
7 // language by any means, or disclosed to third parties without the explicit written\r
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express\r
9 // or implied, including but not limited to any patent rights, are granted to you.\r
10 //\r
11 \r
12 #include "OutputASM.h"\r
13 #include "Common/Math.hpp"\r
14 \r
15 #include "common/debug.h"\r
16 #include "InfoSink.h"\r
17 \r
18 #include "libGLESv2/Shader.h"\r
19 \r
20 #include <GLES2/gl2.h>\r
21 #include <GLES2/gl2ext.h>\r
22 #include <GLES3/gl3.h>\r
23 \r
24 namespace glsl\r
25 {\r
26         // Integer to TString conversion\r
27         TString str(int i)\r
28         {\r
29                 char buffer[20];\r
30                 sprintf(buffer, "%d", i);\r
31                 return buffer;\r
32         }\r
33 \r
34         class Temporary : public TIntermSymbol\r
35         {\r
36         public:\r
37                 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)\r
38                 {\r
39                 }\r
40 \r
41                 ~Temporary()\r
42                 {\r
43                         assembler->freeTemporary(this);\r
44                 }\r
45 \r
46         private:\r
47                 OutputASM *const assembler;\r
48         };\r
49 \r
50         class Constant : public TIntermConstantUnion\r
51         {\r
52         public:\r
53                 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))\r
54                 {\r
55                         constants[0].setFConst(x);\r
56                         constants[1].setFConst(y);\r
57                         constants[2].setFConst(z);\r
58                         constants[3].setFConst(w);\r
59                 }\r
60 \r
61                 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))\r
62                 {\r
63                         constants[0].setBConst(b);\r
64                 }\r
65 \r
66                 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))\r
67                 {\r
68                         constants[0].setIConst(i);\r
69                 }\r
70 \r
71                 ~Constant()\r
72                 {\r
73                 }\r
74 \r
75         private:\r
76                 ConstantUnion constants[4];\r
77         };\r
78 \r
79         Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :\r
80                 type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo)\r
81         {\r
82         }\r
83 \r
84         UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,\r
85                                    TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :\r
86                 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),\r
87                 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)\r
88         {\r
89         }\r
90 \r
91         BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor)\r
92                 : mCurrentOffset(0), isRowMajor(rowMajor)\r
93         {\r
94         }\r
95 \r
96         BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)\r
97         {\r
98                 int arrayStride;\r
99                 int matrixStride;\r
100 \r
101                 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);\r
102 \r
103                 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),\r
104                                                  static_cast<int>(arrayStride * BytesPerComponent),\r
105                                                  static_cast<int>(matrixStride * BytesPerComponent),\r
106                                                  (matrixStride > 0) && isRowMajor);\r
107 \r
108                 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);\r
109 \r
110                 return memberInfo;\r
111         }\r
112 \r
113         // static\r
114         size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)\r
115         {\r
116                 return (info.offset / BytesPerComponent) / ComponentsPerRegister;\r
117         }\r
118 \r
119         // static\r
120         size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)\r
121         {\r
122                 return (info.offset / BytesPerComponent) % ComponentsPerRegister;\r
123         }\r
124 \r
125         void BlockLayoutEncoder::nextRegister()\r
126         {\r
127                 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);\r
128         }\r
129 \r
130         Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor)\r
131         {\r
132         }\r
133 \r
134         void Std140BlockEncoder::enterAggregateType()\r
135         {\r
136                 nextRegister();\r
137         }\r
138 \r
139         void Std140BlockEncoder::exitAggregateType()\r
140         {\r
141                 nextRegister();\r
142         }\r
143 \r
144         void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)\r
145         {\r
146                 size_t baseAlignment = 0;\r
147                 int matrixStride = 0;\r
148                 int arrayStride = 0;\r
149 \r
150                 if(type.isMatrix())\r
151                 {\r
152                         baseAlignment = ComponentsPerRegister;\r
153                         matrixStride = ComponentsPerRegister;\r
154 \r
155                         if(arraySize > 0)\r
156                         {\r
157                                 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();\r
158                                 arrayStride = ComponentsPerRegister * numRegisters;\r
159                         }\r
160                 }\r
161                 else if(arraySize > 0)\r
162                 {\r
163                         baseAlignment = ComponentsPerRegister;\r
164                         arrayStride = ComponentsPerRegister;\r
165                 }\r
166                 else\r
167                 {\r
168                         const int numComponents = type.getElementSize();\r
169                         baseAlignment = (numComponents == 3 ? 4u : static_cast<size_t>(numComponents));\r
170                 }\r
171 \r
172                 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);\r
173 \r
174                 *matrixStrideOut = matrixStride;\r
175                 *arrayStrideOut = arrayStride;\r
176         }\r
177 \r
178         void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)\r
179         {\r
180                 if(arraySize > 0)\r
181                 {\r
182                         mCurrentOffset += arrayStride * arraySize;\r
183                 }\r
184                 else if(type.isMatrix())\r
185                 {\r
186                         ASSERT(matrixStride == ComponentsPerRegister);\r
187                         const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();\r
188                         mCurrentOffset += ComponentsPerRegister * numRegisters;\r
189                 }\r
190                 else\r
191                 {\r
192                         mCurrentOffset += type.getElementSize();\r
193                 }\r
194         }\r
195 \r
196         Attribute::Attribute()\r
197         {\r
198                 type = GL_NONE;\r
199                 arraySize = 0;\r
200                 registerIndex = 0;\r
201         }\r
202 \r
203         Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)\r
204         {\r
205                 this->type = type;\r
206                 this->name = name;\r
207                 this->arraySize = arraySize;\r
208                 this->location = location;\r
209                 this->registerIndex = registerIndex;\r
210         }\r
211 \r
212         sw::PixelShader *Shader::getPixelShader() const\r
213         {\r
214                 return 0;\r
215         }\r
216 \r
217         sw::VertexShader *Shader::getVertexShader() const\r
218         {\r
219                 return 0;\r
220         }\r
221 \r
222         OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)\r
223         {\r
224                 TString name = TFunction::unmangleName(nodeName);\r
225 \r
226                 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D")\r
227                 {\r
228                         method = IMPLICIT;\r
229                 }\r
230                 else if(name == "texture2DProj" || name == "textureProj")\r
231                 {\r
232                         method = IMPLICIT;\r
233                         proj = true;\r
234                 }\r
235                 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")\r
236                 {\r
237                         method = LOD;\r
238                 }\r
239                 else if(name == "texture2DProjLod" || name == "textureProjLod")\r
240                 {\r
241                         method = LOD;\r
242                         proj = true;\r
243                 }\r
244                 else if(name == "textureSize")\r
245                 {\r
246                         method = SIZE;\r
247                 }\r
248                 else if(name == "textureOffset")\r
249                 {\r
250                         method = IMPLICIT;\r
251                         offset = true;\r
252                 }\r
253                 else if(name == "textureProjOffset")\r
254                 {\r
255                         method = IMPLICIT;\r
256                         offset = true;\r
257                         proj = true;\r
258                 }\r
259                 else if(name == "textureLodOffset")\r
260                 {\r
261                         method = LOD;\r
262                         offset = true;\r
263                 }\r
264                 else if(name == "textureProjLodOffset")\r
265                 {\r
266                         method = LOD;\r
267                         proj = true;\r
268                         offset = true;\r
269                 }\r
270                 else if(name == "texelFetch")\r
271                 {\r
272                         method = FETCH;\r
273                 }\r
274                 else if(name == "texelFetchOffset")\r
275                 {\r
276                         method = FETCH;\r
277                         offset = true;\r
278                 }\r
279                 else if(name == "textureGrad")\r
280                 {\r
281                         method = GRAD;\r
282                 }\r
283                 else if(name == "textureGradOffset")\r
284                 {\r
285                         method = GRAD;\r
286                         offset = true;\r
287                 }\r
288                 else if(name == "textureProjGrad")\r
289                 {\r
290                         method = GRAD;\r
291                         proj = true;\r
292                 }\r
293                 else if(name == "textureProjGradOffset")\r
294                 {\r
295                         method = GRAD;\r
296                         proj = true;\r
297                         offset = true;\r
298                 }\r
299                 else UNREACHABLE(0);\r
300         }\r
301 \r
302         OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)\r
303         {\r
304                 shader = 0;\r
305                 pixelShader = 0;\r
306                 vertexShader = 0;\r
307 \r
308                 if(shaderObject)\r
309                 {\r
310                         shader = shaderObject->getShader();\r
311                         pixelShader = shaderObject->getPixelShader();\r
312                         vertexShader = shaderObject->getVertexShader();\r
313                 }\r
314 \r
315                 functionArray.push_back(Function(0, "main(", 0, 0));\r
316                 currentFunction = 0;\r
317                 outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData\r
318         }\r
319 \r
320         OutputASM::~OutputASM()\r
321         {\r
322         }\r
323 \r
324         void OutputASM::output()\r
325         {\r
326                 if(shader)\r
327                 {\r
328                         emitShader(GLOBAL);\r
329 \r
330                         if(functionArray.size() > 1)   // Only call main() when there are other functions\r
331                         {\r
332                                 Instruction *callMain = emit(sw::Shader::OPCODE_CALL);\r
333                                 callMain->dst.type = sw::Shader::PARAMETER_LABEL;\r
334                                 callMain->dst.index = 0;   // main()\r
335 \r
336                                 emit(sw::Shader::OPCODE_RET);\r
337                         }\r
338 \r
339                         emitShader(FUNCTION);\r
340                 }\r
341         }\r
342 \r
343         void OutputASM::emitShader(Scope scope)\r
344         {\r
345                 emitScope = scope;\r
346                 currentScope = GLOBAL;\r
347                 mContext.getTreeRoot()->traverse(this);\r
348         }\r
349 \r
350         void OutputASM::freeTemporary(Temporary *temporary)\r
351         {\r
352                 free(temporaries, temporary);\r
353         }\r
354 \r
355         sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const\r
356         {\r
357                 TBasicType baseType = in->getType().getBasicType();\r
358 \r
359                 switch(op)\r
360                 {\r
361                 case sw::Shader::OPCODE_NEG:\r
362                         switch(baseType)\r
363                         {\r
364                         case EbtInt:\r
365                         case EbtUInt:\r
366                                 return sw::Shader::OPCODE_INEG;\r
367                         case EbtFloat:\r
368                         default:\r
369                                 return op;\r
370                         }\r
371                 case sw::Shader::OPCODE_ABS:\r
372                         switch(baseType)\r
373                         {\r
374                         case EbtInt:\r
375                                 return sw::Shader::OPCODE_IABS;\r
376                         case EbtFloat:\r
377                         default:\r
378                                 return op;\r
379                         }\r
380                 case sw::Shader::OPCODE_SGN:\r
381                         switch(baseType)\r
382                         {\r
383                         case EbtInt:\r
384                                 return sw::Shader::OPCODE_ISGN;\r
385                         case EbtFloat:\r
386                         default:\r
387                                 return op;\r
388                         }\r
389                 case sw::Shader::OPCODE_ADD:\r
390                         switch(baseType)\r
391                         {\r
392                         case EbtInt:\r
393                         case EbtUInt:\r
394                                 return sw::Shader::OPCODE_IADD;\r
395                         case EbtFloat:\r
396                         default:\r
397                                 return op;\r
398                         }\r
399                 case sw::Shader::OPCODE_SUB:\r
400                         switch(baseType)\r
401                         {\r
402                         case EbtInt:\r
403                         case EbtUInt:\r
404                                 return sw::Shader::OPCODE_ISUB;\r
405                         case EbtFloat:\r
406                         default:\r
407                                 return op;\r
408                         }\r
409                 case sw::Shader::OPCODE_MUL:\r
410                         switch(baseType)\r
411                         {\r
412                         case EbtInt:\r
413                         case EbtUInt:\r
414                                 return sw::Shader::OPCODE_IMUL;\r
415                         case EbtFloat:\r
416                         default:\r
417                                 return op;\r
418                         }\r
419                 case sw::Shader::OPCODE_DIV:\r
420                         switch(baseType)\r
421                         {\r
422                         case EbtInt:\r
423                                 return sw::Shader::OPCODE_IDIV;\r
424                         case EbtUInt:\r
425                                 return sw::Shader::OPCODE_UDIV;\r
426                         case EbtFloat:\r
427                         default:\r
428                                 return op;\r
429                         }\r
430                 case sw::Shader::OPCODE_IMOD:\r
431                         return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;\r
432                 case sw::Shader::OPCODE_ISHR:\r
433                         return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;\r
434                 case sw::Shader::OPCODE_MIN:\r
435                         switch(baseType)\r
436                         {\r
437                         case EbtInt:\r
438                                 return sw::Shader::OPCODE_IMIN;\r
439                         case EbtUInt:\r
440                                 return sw::Shader::OPCODE_UMIN;\r
441                         case EbtFloat:\r
442                         default:\r
443                                 return op;\r
444                         }\r
445                 case sw::Shader::OPCODE_MAX:\r
446                         switch(baseType)\r
447                         {\r
448                         case EbtInt:\r
449                                 return sw::Shader::OPCODE_IMAX;\r
450                         case EbtUInt:\r
451                                 return sw::Shader::OPCODE_UMAX;\r
452                         case EbtFloat:\r
453                         default:\r
454                                 return op;\r
455                         }\r
456                 default:\r
457                         return op;\r
458                 }\r
459         }\r
460 \r
461         void OutputASM::visitSymbol(TIntermSymbol *symbol)\r
462         {\r
463                 // Vertex varyings don't have to be actively used to successfully link\r
464                 // against pixel shaders that use them. So make sure they're declared.\r
465                 if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)\r
466                 {\r
467                         if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings\r
468                         {\r
469                                 declareVarying(symbol, -1);\r
470                         }\r
471                 }\r
472 \r
473                 TInterfaceBlock* block = symbol->getType().getInterfaceBlock();\r
474                 // OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:\r
475                 // "All members of a named uniform block declared with a shared or std140 layout qualifier\r
476                 // are considered active, even if they are not referenced in any shader in the program.\r
477                 // The uniform block itself is also considered active, even if no member of the block is referenced."\r
478                 if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))\r
479                 {\r
480                         uniformRegister(symbol);\r
481                 }\r
482         }\r
483 \r
484         bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)\r
485         {\r
486                 if(currentScope != emitScope)\r
487                 {\r
488                         return false;\r
489                 }\r
490 \r
491                 TIntermTyped *result = node;\r
492                 TIntermTyped *left = node->getLeft();\r
493                 TIntermTyped *right = node->getRight();\r
494                 const TType &leftType = left->getType();\r
495                 const TType &rightType = right->getType();\r
496                 const TType &resultType = node->getType();\r
497 \r
498                 switch(node->getOp())\r
499                 {\r
500                 case EOpAssign:\r
501                         if(visit == PostVisit)\r
502                         {\r
503                                 assignLvalue(left, right);\r
504                                 copy(result, right);\r
505                         }\r
506                         break;\r
507                 case EOpInitialize:\r
508                         if(visit == PostVisit)\r
509                         {\r
510                                 copy(left, right);\r
511                         }\r
512                         break;\r
513                 case EOpMatrixTimesScalarAssign:\r
514                         if(visit == PostVisit)\r
515                         {\r
516                                 for(int i = 0; i < leftType.getNominalSize(); i++)\r
517                                 {\r
518                                         emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);\r
519                                 }\r
520 \r
521                                 assignLvalue(left, result);\r
522                         }\r
523                         break;\r
524                 case EOpVectorTimesMatrixAssign:\r
525                         if(visit == PostVisit)\r
526                         {\r
527                                 int size = leftType.getNominalSize();\r
528 \r
529                                 for(int i = 0; i < size; i++)\r
530                                 {\r
531                                         Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);\r
532                                         dot->dst.mask = 1 << i;\r
533                                 }\r
534 \r
535                                 assignLvalue(left, result);\r
536                         }\r
537                         break;\r
538                 case EOpMatrixTimesMatrixAssign:\r
539                         if(visit == PostVisit)\r
540                         {\r
541                                 int dim = leftType.getNominalSize();\r
542 \r
543                                 for(int i = 0; i < dim; i++)\r
544                                 {\r
545                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);\r
546                                         mul->src[1].swizzle = 0x00;\r
547 \r
548                                         for(int j = 1; j < dim; j++)\r
549                                         {\r
550                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);\r
551                                                 mad->src[1].swizzle = j * 0x55;\r
552                                         }\r
553                                 }\r
554 \r
555                                 assignLvalue(left, result);\r
556                         }\r
557                         break;\r
558                 case EOpIndexDirect:\r
559                         if(visit == PostVisit)\r
560                         {\r
561                                 int index = right->getAsConstantUnion()->getIConst(0);\r
562 \r
563                                 if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock())\r
564                                 {\r
565                                         ASSERT(left->isArray());\r
566                                         copy(result, left, index * left->elementRegisterCount());\r
567                                 }\r
568                                 else if(result->isRegister())\r
569                                 {\r
570                                         int srcIndex = 0;\r
571                                         if(left->isRegister())\r
572                                         {\r
573                                                 srcIndex = 0;\r
574                                         }\r
575                                         else if(left->isArray())\r
576                                         {\r
577                                                 srcIndex = index * left->elementRegisterCount();\r
578                                         }\r
579                                         else if(left->isMatrix())\r
580                                         {\r
581                                                 ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error\r
582                                                 srcIndex = index;\r
583                                         }\r
584                                         else UNREACHABLE(0);\r
585 \r
586                                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex);\r
587 \r
588                                         if(left->isRegister())\r
589                                         {\r
590                                                 mov->src[0].swizzle = index;\r
591                                         }\r
592                                 }\r
593                                 else UNREACHABLE(0);\r
594                         }\r
595                         break;\r
596                 case EOpIndexIndirect:\r
597                         if(visit == PostVisit)\r
598                         {\r
599                                 if(left->isArray() || left->isMatrix())\r
600                                 {\r
601                                         for(int index = 0; index < result->totalRegisterCount(); index++)\r
602                                         {\r
603                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index);\r
604                                                 mov->dst.mask = writeMask(result, index);\r
605 \r
606                                                 if(left->totalRegisterCount() > 1)\r
607                                                 {\r
608                                                         sw::Shader::SourceParameter relativeRegister;\r
609                                                         argument(relativeRegister, right);\r
610 \r
611                                                         mov->src[0].rel.type = relativeRegister.type;\r
612                                                         mov->src[0].rel.index = relativeRegister.index;\r
613                                                         mov->src[0].rel.scale = result->totalRegisterCount();\r
614                                                         mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
615                                                 }\r
616                                         }\r
617                                 }\r
618                                 else if(left->isRegister())\r
619                                 {\r
620                                         emit(sw::Shader::OPCODE_EXTRACT, result, left, right);\r
621                                 }\r
622                                 else UNREACHABLE(0);\r
623                         }\r
624                         break;\r
625                 case EOpIndexDirectStruct:\r
626                 case EOpIndexDirectInterfaceBlock:\r
627                         if(visit == PostVisit)\r
628                         {\r
629                                 ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));\r
630 \r
631                                 const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?\r
632                                                            leftType.getStruct()->fields() :\r
633                                                            leftType.getInterfaceBlock()->fields();\r
634                                 int index = right->getAsConstantUnion()->getIConst(0);\r
635                                 int fieldOffset = 0;\r
636 \r
637                                 for(int i = 0; i < index; i++)\r
638                                 {\r
639                                         fieldOffset += fields[i]->type()->totalRegisterCount();\r
640                                 }\r
641 \r
642                                 copy(result, left, fieldOffset);\r
643                         }\r
644                         break;\r
645                 case EOpVectorSwizzle:\r
646                         if(visit == PostVisit)\r
647                         {\r
648                                 int swizzle = 0;\r
649                                 TIntermAggregate *components = right->getAsAggregate();\r
650 \r
651                                 if(components)\r
652                                 {\r
653                                         TIntermSequence &sequence = components->getSequence();\r
654                                         int component = 0;\r
655 \r
656                                         for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)\r
657                                         {\r
658                                                 TIntermConstantUnion *element = (*sit)->getAsConstantUnion();\r
659 \r
660                                                 if(element)\r
661                                                 {\r
662                                                         int i = element->getUnionArrayPointer()[0].getIConst();\r
663                                                         swizzle |= i << (component * 2);\r
664                                                         component++;\r
665                                                 }\r
666                                                 else UNREACHABLE(0);\r
667                                         }\r
668                                 }\r
669                                 else UNREACHABLE(0);\r
670 \r
671                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
672                                 mov->src[0].swizzle = swizzle;\r
673                         }\r
674                         break;\r
675                 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;\r
676                 case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;\r
677                 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;\r
678                 case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;\r
679                 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;\r
680                 case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;\r
681                 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;\r
682                 case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;\r
683                 case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;\r
684                 case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;\r
685                 case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;\r
686                 case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;\r
687                 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;\r
688                 case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;\r
689                 case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;\r
690                 case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;\r
691                 case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;\r
692                 case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;\r
693                 case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;\r
694                 case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;\r
695                 case EOpEqual:\r
696                         if(visit == PostVisit)\r
697                         {\r
698                                 emitBinary(sw::Shader::OPCODE_EQ, result, left, right);\r
699 \r
700                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
701                                 {\r
702                                         Temporary equal(this);\r
703                                         emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);\r
704                                         emit(sw::Shader::OPCODE_AND, result, result, &equal);\r
705                                 }\r
706                         }\r
707                         break;\r
708                 case EOpNotEqual:\r
709                         if(visit == PostVisit)\r
710                         {\r
711                                 emitBinary(sw::Shader::OPCODE_NE, result, left, right);\r
712 \r
713                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
714                                 {\r
715                                         Temporary notEqual(this);\r
716                                         emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);\r
717                                         emit(sw::Shader::OPCODE_OR, result, result, &notEqual);\r
718                                 }\r
719                         }\r
720                         break;\r
721                 case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;\r
722                 case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;\r
723                 case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;\r
724                 case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;\r
725                 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;\r
726                 case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;\r
727                 case EOpMatrixTimesScalar:\r
728                         if(visit == PostVisit)\r
729                         {\r
730                                 if(left->isMatrix())\r
731                                 {\r
732                                         for(int i = 0; i < leftType.getNominalSize(); i++)\r
733                                         {\r
734                                                 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);\r
735                                         }\r
736                                 }\r
737                                 else if(right->isMatrix())\r
738                                 {\r
739                                         for(int i = 0; i < rightType.getNominalSize(); i++)\r
740                                         {\r
741                                                 emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);\r
742                                         }\r
743                                 }\r
744                                 else UNREACHABLE(0);\r
745                         }\r
746                         break;\r
747                 case EOpVectorTimesMatrix:\r
748                         if(visit == PostVisit)\r
749                         {\r
750                                 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());\r
751 \r
752                                 int size = rightType.getNominalSize();\r
753                                 for(int i = 0; i < size; i++)\r
754                                 {\r
755                                         Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);\r
756                                         dot->dst.mask = 1 << i;\r
757                                 }\r
758                         }\r
759                         break;\r
760                 case EOpMatrixTimesVector:\r
761                         if(visit == PostVisit)\r
762                         {\r
763                                 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
764                                 mul->src[1].swizzle = 0x00;\r
765 \r
766                                 int size = rightType.getNominalSize();\r
767                                 for(int i = 1; i < size; i++)\r
768                                 {\r
769                                         Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);\r
770                                         mad->src[1].swizzle = i * 0x55;\r
771                                 }\r
772                         }\r
773                         break;\r
774                 case EOpMatrixTimesMatrix:\r
775                         if(visit == PostVisit)\r
776                         {\r
777                                 int dim = leftType.getNominalSize();\r
778 \r
779                                 int size = rightType.getNominalSize();\r
780                                 for(int i = 0; i < size; i++)\r
781                                 {\r
782                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);\r
783                                         mul->src[1].swizzle = 0x00;\r
784 \r
785                                         for(int j = 1; j < dim; j++)\r
786                                         {\r
787                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);\r
788                                                 mad->src[1].swizzle = j * 0x55;\r
789                                         }\r
790                                 }\r
791                         }\r
792                         break;\r
793                 case EOpLogicalOr:\r
794                         if(trivial(right, 6))\r
795                         {\r
796                                 if(visit == PostVisit)\r
797                                 {\r
798                                         emit(sw::Shader::OPCODE_OR, result, left, right);\r
799                                 }\r
800                         }\r
801                         else   // Short-circuit evaluation\r
802                         {\r
803                                 if(visit == InVisit)\r
804                                 {\r
805                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
806                                         Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);\r
807                                         ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;\r
808                                 }\r
809                                 else if(visit == PostVisit)\r
810                                 {\r
811                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
812                                         emit(sw::Shader::OPCODE_ENDIF);\r
813                                 }\r
814                         }\r
815                         break;\r
816                 case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;\r
817                 case EOpLogicalAnd:\r
818                         if(trivial(right, 6))\r
819                         {\r
820                                 if(visit == PostVisit)\r
821                                 {\r
822                                         emit(sw::Shader::OPCODE_AND, result, left, right);\r
823                                 }\r
824                         }\r
825                         else   // Short-circuit evaluation\r
826                         {\r
827                                 if(visit == InVisit)\r
828                                 {\r
829                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
830                                         emit(sw::Shader::OPCODE_IF, 0, result);\r
831                                 }\r
832                                 else if(visit == PostVisit)\r
833                                 {\r
834                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
835                                         emit(sw::Shader::OPCODE_ENDIF);\r
836                                 }\r
837                         }\r
838                         break;\r
839                 default: UNREACHABLE(node->getOp());\r
840                 }\r
841 \r
842                 return true;\r
843         }\r
844 \r
845         void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)\r
846         {\r
847                 switch(size)\r
848                 {\r
849                 case 1: // Used for cofactor computation only\r
850                         {\r
851                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
852                                 bool isMov = (row == col);\r
853                                 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;\r
854                                 Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);\r
855                                 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);\r
856                                 mov->dst.mask = 1 << outRow;\r
857                         }\r
858                         break;\r
859                 case 2:\r
860                         {\r
861                                 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy\r
862 \r
863                                 bool isCofactor = (col >= 0) && (row >= 0);\r
864                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
865                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
866                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
867 \r
868                                 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);\r
869                                 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];\r
870                                 det->dst.mask = 1 << outRow;\r
871                         }\r
872                         break;\r
873                 case 3:\r
874                         {\r
875                                 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw\r
876 \r
877                                 bool isCofactor = (col >= 0) && (row >= 0);\r
878                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
879                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
880                                 int col2 = (isCofactor && (col <= 2)) ? 3 : 2;\r
881                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
882 \r
883                                 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);\r
884                                 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];\r
885                                 det->dst.mask = 1 << outRow;\r
886                         }\r
887                         break;\r
888                 case 4:\r
889                         {\r
890                                 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);\r
891                                 det->dst.mask = 1 << outRow;\r
892                         }\r
893                         break;\r
894                 default:\r
895                         UNREACHABLE(size);\r
896                         break;\r
897                 }\r
898         }\r
899 \r
900         bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)\r
901         {\r
902                 if(currentScope != emitScope)\r
903                 {\r
904                         return false;\r
905                 }\r
906 \r
907                 TIntermTyped *result = node;\r
908                 TIntermTyped *arg = node->getOperand();\r
909                 TBasicType basicType = arg->getType().getBasicType();\r
910 \r
911                 union\r
912                 {\r
913                         float f;\r
914                         int i;\r
915                 } one_value;\r
916 \r
917                 if(basicType == EbtInt || basicType == EbtUInt)\r
918                 {\r
919                         one_value.i = 1;\r
920                 }\r
921                 else\r
922                 {\r
923                         one_value.f = 1.0f;\r
924                 }\r
925 \r
926                 Constant one(one_value.f, one_value.f, one_value.f, one_value.f);\r
927                 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);\r
928                 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);\r
929 \r
930                 switch(node->getOp())\r
931                 {\r
932                 case EOpNegative:\r
933                         if(visit == PostVisit)\r
934                         {\r
935                                 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);\r
936                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
937                                 {\r
938                                         emit(negOpcode, result, index, arg, index);\r
939                                 }\r
940                         }\r
941                         break;\r
942                 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
943                 case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
944                 case EOpPostIncrement:\r
945                         if(visit == PostVisit)\r
946                         {\r
947                                 copy(result, arg);\r
948 \r
949                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
950                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
951                                 {\r
952                                         emit(addOpcode, arg, index, arg, index, &one);\r
953                                 }\r
954 \r
955                                 assignLvalue(arg, arg);\r
956                         }\r
957                         break;\r
958                 case EOpPostDecrement:\r
959                         if(visit == PostVisit)\r
960                         {\r
961                                 copy(result, arg);\r
962 \r
963                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
964                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
965                                 {\r
966                                         emit(subOpcode, arg, index, arg, index, &one);\r
967                                 }\r
968 \r
969                                 assignLvalue(arg, arg);\r
970                         }\r
971                         break;\r
972                 case EOpPreIncrement:\r
973                         if(visit == PostVisit)\r
974                         {\r
975                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
976                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
977                                 {\r
978                                         emit(addOpcode, result, index, arg, index, &one);\r
979                                 }\r
980 \r
981                                 assignLvalue(arg, result);\r
982                         }\r
983                         break;\r
984                 case EOpPreDecrement:\r
985                         if(visit == PostVisit)\r
986                         {\r
987                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
988                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
989                                 {\r
990                                         emit(subOpcode, result, index, arg, index, &one);\r
991                                 }\r
992 \r
993                                 assignLvalue(arg, result);\r
994                         }\r
995                         break;\r
996                 case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;\r
997                 case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;\r
998                 case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;\r
999                 case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;\r
1000                 case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;\r
1001                 case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;\r
1002                 case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;\r
1003                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;\r
1004                 case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;\r
1005                 case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;\r
1006                 case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;\r
1007                 case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;\r
1008                 case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;\r
1009                 case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;\r
1010                 case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;\r
1011                 case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;\r
1012                 case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;\r
1013                 case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;\r
1014                 case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;\r
1015                 case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;\r
1016                 case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;\r
1017                 case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;\r
1018                 case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;\r
1019                 case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;\r
1020                 case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;\r
1021                 case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;\r
1022                 case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;\r
1023                 case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;\r
1024                 case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;\r
1025                 case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;\r
1026                 case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;\r
1027                 case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;\r
1028                 case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;\r
1029                 case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;\r
1030                 case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;\r
1031                 case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;\r
1032                 case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;\r
1033                 case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;\r
1034                 case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;\r
1035                 case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;\r
1036                 case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;\r
1037                 case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;\r
1038                 case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;\r
1039                 case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;\r
1040                 case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;\r
1041                 case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;\r
1042                 case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;\r
1043                 case EOpTranspose:\r
1044                         if(visit == PostVisit)\r
1045                         {\r
1046                                 int numCols = arg->getNominalSize();\r
1047                                 int numRows = arg->getSecondarySize();\r
1048                                 for(int i = 0; i < numCols; ++i)\r
1049                                 {\r
1050                                         for(int j = 0; j < numRows; ++j)\r
1051                                         {\r
1052                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);\r
1053                                                 mov->src[0].swizzle = 0x55 * j;\r
1054                                                 mov->dst.mask = 1 << i;\r
1055                                         }\r
1056                                 }\r
1057                         }\r
1058                         break;\r
1059                 case EOpDeterminant:\r
1060                         if(visit == PostVisit)\r
1061                         {\r
1062                                 int size = arg->getNominalSize();\r
1063                                 ASSERT(size == arg->getSecondarySize());\r
1064 \r
1065                                 emitDeterminant(result, arg, size);\r
1066                         }\r
1067                         break;\r
1068                 case EOpInverse:\r
1069                         if(visit == PostVisit)\r
1070                         {\r
1071                                 int size = arg->getNominalSize();\r
1072                                 ASSERT(size == arg->getSecondarySize());\r
1073 \r
1074                                 // Compute transposed matrix of cofactors\r
1075                                 for(int i = 0; i < size; ++i)\r
1076                                 {\r
1077                                         for(int j = 0; j < size; ++j)\r
1078                                         {\r
1079                                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
1080                                                 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant\r
1081                                                 emitDeterminant(result, arg, size - 1, j, i, i, j);\r
1082                                         }\r
1083                                 }\r
1084 \r
1085                                 // Compute 1 / determinant\r
1086                                 Temporary invDet(this);\r
1087                                 emitDeterminant(&invDet, arg, size);\r
1088                                 Constant one(1.0f, 1.0f, 1.0f, 1.0f);\r
1089                                 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);\r
1090                                 div->src[1].swizzle = 0x00; // xxxx\r
1091 \r
1092                                 // Divide transposed matrix of cofactors by determinant\r
1093                                 for(int i = 0; i < size; ++i)\r
1094                                 {\r
1095                                         emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);\r
1096                                 }\r
1097                         }\r
1098                         break;\r
1099                 default: UNREACHABLE(node->getOp());\r
1100                 }\r
1101 \r
1102                 return true;\r
1103         }\r
1104 \r
1105         bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)\r
1106         {\r
1107                 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)\r
1108                 {\r
1109                         return false;\r
1110                 }\r
1111 \r
1112                 Constant zero(0.0f, 0.0f, 0.0f, 0.0f);\r
1113 \r
1114                 TIntermTyped *result = node;\r
1115                 const TType &resultType = node->getType();\r
1116                 TIntermSequence &arg = node->getSequence();\r
1117                 int argumentCount = arg.size();\r
1118 \r
1119                 switch(node->getOp())\r
1120                 {\r
1121                 case EOpSequence:             break;\r
1122                 case EOpDeclaration:          break;\r
1123                 case EOpInvariantDeclaration: break;\r
1124                 case EOpPrototype:            break;\r
1125                 case EOpComma:\r
1126                         if(visit == PostVisit)\r
1127                         {\r
1128                                 copy(result, arg[1]);\r
1129                         }\r
1130                         break;\r
1131                 case EOpFunction:\r
1132                         if(visit == PreVisit)\r
1133                         {\r
1134                                 const TString &name = node->getName();\r
1135 \r
1136                                 if(emitScope == FUNCTION)\r
1137                                 {\r
1138                                         if(functionArray.size() > 1)   // No need for a label when there's only main()\r
1139                                         {\r
1140                                                 Instruction *label = emit(sw::Shader::OPCODE_LABEL);\r
1141                                                 label->dst.type = sw::Shader::PARAMETER_LABEL;\r
1142 \r
1143                                                 const Function *function = findFunction(name);\r
1144                                                 ASSERT(function);   // Should have been added during global pass\r
1145                                                 label->dst.index = function->label;\r
1146                                                 currentFunction = function->label;\r
1147                                         }\r
1148                                 }\r
1149                                 else if(emitScope == GLOBAL)\r
1150                                 {\r
1151                                         if(name != "main(")\r
1152                                         {\r
1153                                                 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();\r
1154                                                 functionArray.push_back(Function(functionArray.size(), name, &arguments, node));\r
1155                                         }\r
1156                                 }\r
1157                                 else UNREACHABLE(emitScope);\r
1158 \r
1159                                 currentScope = FUNCTION;\r
1160                         }\r
1161                         else if(visit == PostVisit)\r
1162                         {\r
1163                                 if(emitScope == FUNCTION)\r
1164                                 {\r
1165                                         if(functionArray.size() > 1)   // No need to return when there's only main()\r
1166                                         {\r
1167                                                 emit(sw::Shader::OPCODE_RET);\r
1168                                         }\r
1169                                 }\r
1170 \r
1171                                 currentScope = GLOBAL;\r
1172                         }\r
1173                         break;\r
1174                 case EOpFunctionCall:\r
1175                         if(visit == PostVisit)\r
1176                         {\r
1177                                 if(node->isUserDefined())\r
1178                                 {\r
1179                                         const TString &name = node->getName();\r
1180                                         const Function *function = findFunction(name);\r
1181 \r
1182                                         if(!function)\r
1183                                         {\r
1184                                                 mContext.error(node->getLine(), "function definition not found", name.c_str());\r
1185                                                 return false;\r
1186                                         }\r
1187 \r
1188                                         TIntermSequence &arguments = *function->arg;\r
1189 \r
1190                                         for(int i = 0; i < argumentCount; i++)\r
1191                                         {\r
1192                                                 TIntermTyped *in = arguments[i]->getAsTyped();\r
1193 \r
1194                                                 if(in->getQualifier() == EvqIn ||\r
1195                                                    in->getQualifier() == EvqInOut ||\r
1196                                                    in->getQualifier() == EvqConstReadOnly)\r
1197                                                 {\r
1198                                                         copy(in, arg[i]);\r
1199                                                 }\r
1200                                         }\r
1201 \r
1202                                         Instruction *call = emit(sw::Shader::OPCODE_CALL);\r
1203                                         call->dst.type = sw::Shader::PARAMETER_LABEL;\r
1204                                         call->dst.index = function->label;\r
1205 \r
1206                                         if(function->ret && function->ret->getType().getBasicType() != EbtVoid)\r
1207                                         {\r
1208                                                 copy(result, function->ret);\r
1209                                         }\r
1210 \r
1211                                         for(int i = 0; i < argumentCount; i++)\r
1212                                         {\r
1213                                                 TIntermTyped *argument = arguments[i]->getAsTyped();\r
1214                                                 TIntermTyped *out = arg[i]->getAsTyped();\r
1215 \r
1216                                                 if(argument->getQualifier() == EvqOut ||\r
1217                                                    argument->getQualifier() == EvqInOut)\r
1218                                                 {\r
1219                                                         copy(out, argument);\r
1220                                                 }\r
1221                                         }\r
1222                                 }\r
1223                                 else\r
1224                                 {\r
1225                                         const TextureFunction textureFunction(node->getName());\r
1226                                         TIntermTyped *t = arg[1]->getAsTyped();\r
1227 \r
1228                                         Temporary coord(this);\r
1229 \r
1230                                         if(textureFunction.proj)\r
1231                                         {\r
1232                                                 Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);\r
1233                                                 rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);\r
1234                                                 rcp->dst.mask = 0x7;\r
1235 \r
1236                                                 Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);\r
1237                                                 mul->dst.mask = 0x7;\r
1238                                         }\r
1239                                         else\r
1240                                         {\r
1241                                                 emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);\r
1242                                         }\r
1243 \r
1244                                         switch(textureFunction.method)\r
1245                                         {\r
1246                                         case TextureFunction::IMPLICIT:\r
1247                                                 {\r
1248                                                         TIntermNode* offset = textureFunction.offset ? arg[2] : 0;\r
1249 \r
1250                                                         if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3))\r
1251                                                         {\r
1252                                                                 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,\r
1253                                                                                         result, &coord, arg[0], offset);\r
1254                                                         }\r
1255                                                         else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))   // bias\r
1256                                                         {\r
1257                                                                 Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]);\r
1258                                                                 bias->dst.mask = 0x8;\r
1259 \r
1260                                                                 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,\r
1261                                                                                         result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction\r
1262                                                                 tex->bias = true;\r
1263                                                         }\r
1264                                                         else UNREACHABLE(argumentCount);\r
1265                                                 }\r
1266                                                 break;\r
1267                                         case TextureFunction::LOD:\r
1268                                                 {\r
1269                                                         Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);\r
1270                                                         lod->dst.mask = 0x8;\r
1271 \r
1272                                                         emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL,\r
1273                                                              result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr);\r
1274                                                 }\r
1275                                                 break;\r
1276                                         case TextureFunction::FETCH:\r
1277                                                 {\r
1278                                                         if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))\r
1279                                                         {\r
1280                                                                 TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr;\r
1281 \r
1282                                                                 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH,\r
1283                                                                      result, arg[1], arg[0], arg[2], offset);\r
1284                                                         }\r
1285                                                         else UNREACHABLE(argumentCount);\r
1286                                                 }\r
1287                                                 break;\r
1288                                         case TextureFunction::GRAD:\r
1289                                                 {\r
1290                                                         if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5))\r
1291                                                         {\r
1292                                                                 TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr;\r
1293 \r
1294                                                                 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD,\r
1295                                                                      result, &coord, arg[0], arg[2], arg[3], offset);\r
1296                                                         }\r
1297                                                         else UNREACHABLE(argumentCount);\r
1298                                                 }\r
1299                                                 break;\r
1300                                         case TextureFunction::SIZE:\r
1301                                                 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]);\r
1302                                                 break;\r
1303                                         default:\r
1304                                                 UNREACHABLE(textureFunction.method);\r
1305                                         }\r
1306                                 }\r
1307                         }\r
1308                         break;\r
1309                 case EOpParameters:\r
1310                         break;\r
1311                 case EOpConstructFloat:\r
1312                 case EOpConstructVec2:\r
1313                 case EOpConstructVec3:\r
1314                 case EOpConstructVec4:\r
1315                 case EOpConstructBool:\r
1316                 case EOpConstructBVec2:\r
1317                 case EOpConstructBVec3:\r
1318                 case EOpConstructBVec4:\r
1319                 case EOpConstructInt:\r
1320                 case EOpConstructIVec2:\r
1321                 case EOpConstructIVec3:\r
1322                 case EOpConstructIVec4:\r
1323                 case EOpConstructUInt:\r
1324                 case EOpConstructUVec2:\r
1325                 case EOpConstructUVec3:\r
1326                 case EOpConstructUVec4:\r
1327                         if(visit == PostVisit)\r
1328                         {\r
1329                                 int component = 0;\r
1330 \r
1331                                 for(int i = 0; i < argumentCount; i++)\r
1332                                 {\r
1333                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1334                                         int size = argi->getNominalSize();\r
1335 \r
1336                                         if(!argi->isMatrix())\r
1337                                         {\r
1338                                                 Instruction *mov = emitCast(result, argi);\r
1339                                                 mov->dst.mask = (0xF << component) & 0xF;\r
1340                                                 mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1341 \r
1342                                                 component += size;\r
1343                                         }\r
1344                                         else   // Matrix\r
1345                                         {\r
1346                                                 int column = 0;\r
1347 \r
1348                                                 while(component < resultType.getNominalSize())\r
1349                                                 {\r
1350                                                         Instruction *mov = emitCast(result, 0, argi, column);\r
1351                                                         mov->dst.mask = (0xF << component) & 0xF;\r
1352                                                         mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1353 \r
1354                                                         column++;\r
1355                                                         component += size;\r
1356                                                 }\r
1357                                         }\r
1358                                 }\r
1359                         }\r
1360                         break;\r
1361                 case EOpConstructMat2:\r
1362                 case EOpConstructMat2x3:\r
1363                 case EOpConstructMat2x4:\r
1364                 case EOpConstructMat3x2:\r
1365                 case EOpConstructMat3:\r
1366                 case EOpConstructMat3x4:\r
1367                 case EOpConstructMat4x2:\r
1368                 case EOpConstructMat4x3:\r
1369                 case EOpConstructMat4:\r
1370                         if(visit == PostVisit)\r
1371                         {\r
1372                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1373                                 const int outCols = result->getNominalSize();\r
1374                                 const int outRows = result->getSecondarySize();\r
1375 \r
1376                                 if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix\r
1377                                 {\r
1378                                         for(int i = 0; i < outCols; i++)\r
1379                                         {\r
1380                                                 Instruction *init = emit(sw::Shader::OPCODE_MOV, result, i, &zero);\r
1381                                                 Instruction *mov = emitCast(result, i, arg0, 0);\r
1382                                                 mov->dst.mask = 1 << i;\r
1383                                                 ASSERT(mov->src[0].swizzle == 0x00);\r
1384                                         }\r
1385                                 }\r
1386                                 else if(arg0->isMatrix())\r
1387                                 {\r
1388                                         const int inCols = arg0->getNominalSize();\r
1389                                         const int inRows = arg0->getSecondarySize();\r
1390 \r
1391                                         for(int i = 0; i < outCols; i++)\r
1392                                         {\r
1393                                                 if(i >= inCols || outRows > inRows)\r
1394                                                 {\r
1395                                                         // Initialize to identity matrix\r
1396                                                         Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));\r
1397                                                         Instruction *mov = emitCast(result, i, &col, 0);\r
1398                                                 }\r
1399 \r
1400                                                 if(i < inCols)\r
1401                                                 {\r
1402                                                         Instruction *mov = emitCast(result, i, arg0, i);\r
1403                                                         mov->dst.mask = 0xF >> (4 - inRows);\r
1404                                                 }\r
1405                                         }\r
1406                                 }\r
1407                                 else\r
1408                                 {\r
1409                                         int column = 0;\r
1410                                         int row = 0;\r
1411 \r
1412                                         for(int i = 0; i < argumentCount; i++)\r
1413                                         {\r
1414                                                 TIntermTyped *argi = arg[i]->getAsTyped();\r
1415                                                 int size = argi->getNominalSize();\r
1416                                                 int element = 0;\r
1417 \r
1418                                                 while(element < size)\r
1419                                                 {\r
1420                                                         Instruction *mov = emitCast(result, column, argi, 0);\r
1421                                                         mov->dst.mask = (0xF << row) & 0xF;\r
1422                                                         mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;\r
1423 \r
1424                                                         int end = row + size - element;\r
1425                                                         column = end >= outRows ? column + 1 : column;\r
1426                                                         element = element + outRows - row;\r
1427                                                         row = end >= outRows ? 0 : end;\r
1428                                                 }\r
1429                                         }\r
1430                                 }\r
1431                         }\r
1432                         break;\r
1433                 case EOpConstructStruct:\r
1434                         if(visit == PostVisit)\r
1435                         {\r
1436                                 int offset = 0;\r
1437                                 for(int i = 0; i < argumentCount; i++)\r
1438                                 {\r
1439                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1440                                         int size = argi->totalRegisterCount();\r
1441 \r
1442                                         for(int index = 0; index < size; index++)\r
1443                                         {\r
1444                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);\r
1445                                                 mov->dst.mask = writeMask(result, offset + index);\r
1446                                         }\r
1447 \r
1448                                         offset += size;\r
1449                                 }\r
1450                         }\r
1451                         break;\r
1452                 case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;\r
1453                 case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;\r
1454                 case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;\r
1455                 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;\r
1456                 case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;\r
1457                 case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;\r
1458                 case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;\r
1459                 case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;\r
1460                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;\r
1461                 case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;\r
1462                 case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;\r
1463                 case EOpClamp:\r
1464                         if(visit == PostVisit)\r
1465                         {\r
1466                                 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);\r
1467                                 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);\r
1468                         }\r
1469                         break;\r
1470                 case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;\r
1471                 case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;\r
1472                 case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;\r
1473                 case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;\r
1474                 case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;\r
1475                 case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;\r
1476                 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1477                 case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;\r
1478                 case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1479                 case EOpMul:\r
1480                         if(visit == PostVisit)\r
1481                         {\r
1482                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1483                                 TIntermTyped *arg1 = arg[1]->getAsTyped();\r
1484                                 ASSERT((arg0->getNominalSize() == arg1->getNominalSize()) && (arg0->getSecondarySize() == arg1->getSecondarySize()));\r
1485 \r
1486                                 int size = arg0->getNominalSize();\r
1487                                 for(int i = 0; i < size; i++)\r
1488                                 {\r
1489                                         emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);\r
1490                                 }\r
1491                         }\r
1492                         break;\r
1493                 case EOpOuterProduct:\r
1494                         if(visit == PostVisit)\r
1495                         {\r
1496                                 for(int i = 0; i < dim(arg[1]); i++)\r
1497                                 {\r
1498                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);\r
1499                                         mul->src[1].swizzle = 0x55 * i;\r
1500                                 }\r
1501                         }\r
1502                         break;\r
1503                 default: UNREACHABLE(node->getOp());\r
1504                 }\r
1505 \r
1506                 return true;\r
1507         }\r
1508 \r
1509         bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)\r
1510         {\r
1511                 if(currentScope != emitScope)\r
1512                 {\r
1513                         return false;\r
1514                 }\r
1515 \r
1516                 TIntermTyped *condition = node->getCondition();\r
1517                 TIntermNode *trueBlock = node->getTrueBlock();\r
1518                 TIntermNode *falseBlock = node->getFalseBlock();\r
1519                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
1520 \r
1521                 condition->traverse(this);\r
1522 \r
1523                 if(node->usesTernaryOperator())\r
1524                 {\r
1525                         if(constantCondition)\r
1526                         {\r
1527                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1528 \r
1529                                 if(trueCondition)\r
1530                                 {\r
1531                                         trueBlock->traverse(this);\r
1532                                         copy(node, trueBlock);\r
1533                                 }\r
1534                                 else\r
1535                                 {\r
1536                                         falseBlock->traverse(this);\r
1537                                         copy(node, falseBlock);\r
1538                                 }\r
1539                         }\r
1540                         else if(trivial(node, 6))   // Fast to compute both potential results and no side effects\r
1541                         {\r
1542                                 trueBlock->traverse(this);\r
1543                                 falseBlock->traverse(this);\r
1544                                 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);\r
1545                         }\r
1546                         else\r
1547                         {\r
1548                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1549 \r
1550                                 if(trueBlock)\r
1551                                 {\r
1552                                         trueBlock->traverse(this);\r
1553                                         copy(node, trueBlock);\r
1554                                 }\r
1555 \r
1556                                 if(falseBlock)\r
1557                                 {\r
1558                                         emit(sw::Shader::OPCODE_ELSE);\r
1559                                         falseBlock->traverse(this);\r
1560                                         copy(node, falseBlock);\r
1561                                 }\r
1562 \r
1563                                 emit(sw::Shader::OPCODE_ENDIF);\r
1564                         }\r
1565                 }\r
1566                 else  // if/else statement\r
1567                 {\r
1568                         if(constantCondition)\r
1569                         {\r
1570                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1571 \r
1572                                 if(trueCondition)\r
1573                                 {\r
1574                                         if(trueBlock)\r
1575                                         {\r
1576                                                 trueBlock->traverse(this);\r
1577                                         }\r
1578                                 }\r
1579                                 else\r
1580                                 {\r
1581                                         if(falseBlock)\r
1582                                         {\r
1583                                                 falseBlock->traverse(this);\r
1584                                         }\r
1585                                 }\r
1586                         }\r
1587                         else\r
1588                         {\r
1589                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1590 \r
1591                                 if(trueBlock)\r
1592                                 {\r
1593                                         trueBlock->traverse(this);\r
1594                                 }\r
1595 \r
1596                                 if(falseBlock)\r
1597                                 {\r
1598                                         emit(sw::Shader::OPCODE_ELSE);\r
1599                                         falseBlock->traverse(this);\r
1600                                 }\r
1601 \r
1602                                 emit(sw::Shader::OPCODE_ENDIF);\r
1603                         }\r
1604                 }\r
1605 \r
1606                 return false;\r
1607         }\r
1608 \r
1609         bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)\r
1610         {\r
1611                 if(currentScope != emitScope)\r
1612                 {\r
1613                         return false;\r
1614                 }\r
1615 \r
1616                 unsigned int iterations = loopCount(node);\r
1617 \r
1618                 if(iterations == 0)\r
1619                 {\r
1620                         return false;\r
1621                 }\r
1622 \r
1623                 bool unroll = (iterations <= 4);\r
1624 \r
1625                 if(unroll)\r
1626                 {\r
1627                         DetectLoopDiscontinuity detectLoopDiscontinuity;\r
1628                         unroll = !detectLoopDiscontinuity.traverse(node);\r
1629                 }\r
1630 \r
1631                 TIntermNode *init = node->getInit();\r
1632                 TIntermTyped *condition = node->getCondition();\r
1633                 TIntermTyped *expression = node->getExpression();\r
1634                 TIntermNode *body = node->getBody();\r
1635                 Constant True(true);\r
1636 \r
1637                 if(node->getType() == ELoopDoWhile)\r
1638                 {\r
1639                         Temporary iterate(this);\r
1640                         emit(sw::Shader::OPCODE_MOV, &iterate, &True);\r
1641 \r
1642                         emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while\r
1643 \r
1644                         if(body)\r
1645                         {\r
1646                                 body->traverse(this);\r
1647                         }\r
1648 \r
1649                         emit(sw::Shader::OPCODE_TEST);\r
1650 \r
1651                         condition->traverse(this);\r
1652                         emit(sw::Shader::OPCODE_MOV, &iterate, condition);\r
1653 \r
1654                         emit(sw::Shader::OPCODE_ENDWHILE);\r
1655                 }\r
1656                 else\r
1657                 {\r
1658                         if(init)\r
1659                         {\r
1660                                 init->traverse(this);\r
1661                         }\r
1662 \r
1663                         if(unroll)\r
1664                         {\r
1665                                 for(unsigned int i = 0; i < iterations; i++)\r
1666                                 {\r
1667                                 //      condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop\r
1668 \r
1669                                         if(body)\r
1670                                         {\r
1671                                                 body->traverse(this);\r
1672                                         }\r
1673 \r
1674                                         if(expression)\r
1675                                         {\r
1676                                                 expression->traverse(this);\r
1677                                         }\r
1678                                 }\r
1679                         }\r
1680                         else\r
1681                         {\r
1682                                 if(condition)\r
1683                                 {\r
1684                                         condition->traverse(this);\r
1685                                 }\r
1686                                 else\r
1687                                 {\r
1688                                         condition = &True;\r
1689                                 }\r
1690 \r
1691                                 emit(sw::Shader::OPCODE_WHILE, 0, condition);\r
1692 \r
1693                                 if(body)\r
1694                                 {\r
1695                                         body->traverse(this);\r
1696                                 }\r
1697 \r
1698                                 emit(sw::Shader::OPCODE_TEST);\r
1699 \r
1700                                 if(expression)\r
1701                                 {\r
1702                                         expression->traverse(this);\r
1703                                 }\r
1704 \r
1705                                 if(condition)\r
1706                                 {\r
1707                                         condition->traverse(this);\r
1708                                 }\r
1709 \r
1710                                 emit(sw::Shader::OPCODE_ENDWHILE);\r
1711                         }\r
1712                 }\r
1713 \r
1714                 return false;\r
1715         }\r
1716 \r
1717         bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)\r
1718         {\r
1719                 if(currentScope != emitScope)\r
1720                 {\r
1721                         return false;\r
1722                 }\r
1723 \r
1724                 switch(node->getFlowOp())\r
1725                 {\r
1726                 case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;\r
1727                 case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;\r
1728                 case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;\r
1729                 case EOpReturn:\r
1730                         if(visit == PostVisit)\r
1731                         {\r
1732                                 TIntermTyped *value = node->getExpression();\r
1733 \r
1734                                 if(value)\r
1735                                 {\r
1736                                         copy(functionArray[currentFunction].ret, value);\r
1737                                 }\r
1738 \r
1739                                 emit(sw::Shader::OPCODE_LEAVE);\r
1740                         }\r
1741                         break;\r
1742                 default: UNREACHABLE(node->getFlowOp());\r
1743                 }\r
1744 \r
1745                 return true;\r
1746         }\r
1747 \r
1748         bool OutputASM::isSamplerRegister(TIntermTyped *operand)\r
1749         {\r
1750                 return operand && isSamplerRegister(operand->getType());\r
1751         }\r
1752 \r
1753         bool OutputASM::isSamplerRegister(const TType &type)\r
1754         {\r
1755                 // A sampler register's qualifiers can be:\r
1756                 // - EvqUniform: The sampler uniform is used as is in the code (default case).\r
1757                 // - EvqTemporary: The sampler is indexed. It's still a sampler register.\r
1758                 // - EvqIn (and other similar types): The sampler has been passed as a function argument. At this point,\r
1759                 //                                    the sampler has been copied and is no longer a sampler register.\r
1760                 return IsSampler(type.getBasicType()) && (type.getQualifier() == EvqUniform || type.getQualifier() == EvqTemporary);\r
1761         }\r
1762 \r
1763         Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)\r
1764         {\r
1765                 return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);\r
1766         }\r
1767 \r
1768         Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,\r
1769                                      TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)\r
1770         {\r
1771                 if(isSamplerRegister(dst))\r
1772                 {\r
1773                         op = sw::Shader::OPCODE_NULL;   // Can't assign to a sampler, but this is hit when indexing sampler arrays\r
1774                 }\r
1775 \r
1776                 Instruction *instruction = new Instruction(op);\r
1777 \r
1778                 if(dst)\r
1779                 {\r
1780                         instruction->dst.type = registerType(dst);\r
1781                         instruction->dst.index = registerIndex(dst) + dstIndex;\r
1782                         instruction->dst.mask = writeMask(dst);\r
1783                         instruction->dst.integer = (dst->getBasicType() == EbtInt);\r
1784                 }\r
1785 \r
1786                 argument(instruction->src[0], src0, index0);\r
1787                 argument(instruction->src[1], src1, index1);\r
1788                 argument(instruction->src[2], src2, index2);\r
1789                 argument(instruction->src[3], src3, index3);\r
1790                 argument(instruction->src[4], src4, index4);\r
1791 \r
1792                 shader->append(instruction);\r
1793 \r
1794                 return instruction;\r
1795         }\r
1796 \r
1797         Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)\r
1798         {\r
1799                 return emitCast(dst, 0, src, 0);\r
1800         }\r
1801 \r
1802         Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)\r
1803         {\r
1804                 switch(src->getBasicType())\r
1805                 {\r
1806                 case EbtBool:\r
1807                         switch(dst->getBasicType())\r
1808                         {\r
1809                         case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);\r
1810                         case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);\r
1811                         case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);\r
1812                         default:       break;\r
1813                         }\r
1814                         break;\r
1815                 case EbtInt:\r
1816                         switch(dst->getBasicType())\r
1817                         {\r
1818                         case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);\r
1819                         case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);\r
1820                         default:       break;\r
1821                         }\r
1822                         break;\r
1823                 case EbtUInt:\r
1824                         switch(dst->getBasicType())\r
1825                         {\r
1826                         case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);\r
1827                         case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);\r
1828                         default:       break;\r
1829                         }\r
1830                         break;\r
1831                 case EbtFloat:\r
1832                         switch(dst->getBasicType())\r
1833                         {\r
1834                         case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);\r
1835                         case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);\r
1836                         case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);\r
1837                         default:      break;\r
1838                         }\r
1839                         break;\r
1840                 default:\r
1841                         break;\r
1842                 }\r
1843 \r
1844                 ASSERT(src->getBasicType() == dst->getBasicType());\r
1845 \r
1846                 return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);\r
1847         }\r
1848 \r
1849         void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)\r
1850         {\r
1851                 for(int index = 0; index < dst->elementRegisterCount(); index++)\r
1852                 {\r
1853                         emit(op, dst, index, src0, index, src1, index, src2, index);\r
1854                 }\r
1855         }\r
1856 \r
1857         void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)\r
1858         {\r
1859                 emitBinary(op, result, src0, src1);\r
1860                 assignLvalue(lhs, result);\r
1861         }\r
1862 \r
1863         void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)\r
1864         {\r
1865                 sw::Shader::Opcode opcode;\r
1866                 switch(left->getAsTyped()->getBasicType())\r
1867                 {\r
1868                 case EbtBool:\r
1869                 case EbtInt:\r
1870                         opcode = sw::Shader::OPCODE_ICMP;\r
1871                         break;\r
1872                 case EbtUInt:\r
1873                         opcode = sw::Shader::OPCODE_UCMP;\r
1874                         break;\r
1875                 default:\r
1876                         opcode = sw::Shader::OPCODE_CMP;\r
1877                         break;\r
1878                 }\r
1879 \r
1880                 Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);\r
1881                 cmp->control = cmpOp;\r
1882         }\r
1883 \r
1884         int componentCount(const TType &type, int registers)\r
1885         {\r
1886                 if(registers == 0)\r
1887                 {\r
1888                         return 0;\r
1889                 }\r
1890 \r
1891                 if(type.isArray() && registers >= type.elementRegisterCount())\r
1892                 {\r
1893                         int index = registers / type.elementRegisterCount();\r
1894                         registers -= index * type.elementRegisterCount();\r
1895                         return index * type.getElementSize() + componentCount(type, registers);\r
1896                 }\r
1897 \r
1898                 if(type.isStruct() || type.isInterfaceBlock())\r
1899                 {\r
1900                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
1901                         int elements = 0;\r
1902 \r
1903                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
1904                         {\r
1905                                 const TType &fieldType = *((*field)->type());\r
1906 \r
1907                                 if(fieldType.totalRegisterCount() <= registers)\r
1908                                 {\r
1909                                         registers -= fieldType.totalRegisterCount();\r
1910                                         elements += fieldType.getObjectSize();\r
1911                                 }\r
1912                                 else   // Register within this field\r
1913                                 {\r
1914                                         return elements + componentCount(fieldType, registers);\r
1915                                 }\r
1916                         }\r
1917                 }\r
1918                 else if(type.isMatrix())\r
1919                 {\r
1920                         return registers * type.registerSize();\r
1921                 }\r
1922 \r
1923                 UNREACHABLE(0);\r
1924                 return 0;\r
1925         }\r
1926 \r
1927         int registerSize(const TType &type, int registers)\r
1928         {\r
1929                 if(registers == 0)\r
1930                 {\r
1931                         if(type.isStruct())\r
1932                         {\r
1933                                 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);\r
1934                         }\r
1935                         else if(type.isInterfaceBlock())\r
1936                         {\r
1937                                 return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);\r
1938                         }\r
1939 \r
1940                         return type.registerSize();\r
1941                 }\r
1942 \r
1943                 if(type.isArray() && registers >= type.elementRegisterCount())\r
1944                 {\r
1945                         int index = registers / type.elementRegisterCount();\r
1946                         registers -= index * type.elementRegisterCount();\r
1947                         return registerSize(type, registers);\r
1948                 }\r
1949 \r
1950                 if(type.isStruct() || type.isInterfaceBlock())\r
1951                 {\r
1952                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
1953                         int elements = 0;\r
1954 \r
1955                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
1956                         {\r
1957                                 const TType &fieldType = *((*field)->type());\r
1958 \r
1959                                 if(fieldType.totalRegisterCount() <= registers)\r
1960                                 {\r
1961                                         registers -= fieldType.totalRegisterCount();\r
1962                                         elements += fieldType.getObjectSize();\r
1963                                 }\r
1964                                 else   // Register within this field\r
1965                                 {\r
1966                                         return registerSize(fieldType, registers);\r
1967                                 }\r
1968                         }\r
1969                 }\r
1970                 else if(type.isMatrix())\r
1971                 {\r
1972                         return registerSize(type, 0);\r
1973                 }\r
1974 \r
1975                 UNREACHABLE(0);\r
1976                 return 0;\r
1977         }\r
1978 \r
1979         int OutputASM::getBlockId(TIntermTyped *arg)\r
1980         {\r
1981                 if(arg)\r
1982                 {\r
1983                         const TType &type = arg->getType();\r
1984                         TInterfaceBlock* block = type.getInterfaceBlock();\r
1985                         if(block && (type.getQualifier() == EvqUniform))\r
1986                         {\r
1987                                 // Make sure the uniform block is declared\r
1988                                 uniformRegister(arg);\r
1989 \r
1990                                 const char* blockName = block->name().c_str();\r
1991 \r
1992                                 // Fetch uniform block index from array of blocks\r
1993                                 for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)\r
1994                                 {\r
1995                                         if(blockName == it->name)\r
1996                                         {\r
1997                                                 return it->blockId;\r
1998                                         }\r
1999                                 }\r
2000 \r
2001                                 ASSERT(false);\r
2002                         }\r
2003                 }\r
2004 \r
2005                 return -1;\r
2006         }\r
2007 \r
2008         OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)\r
2009         {\r
2010                 const TType &type = arg->getType();\r
2011                 int blockId = getBlockId(arg);\r
2012                 ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);\r
2013                 if(blockId != -1)\r
2014                 {\r
2015                         argumentInfo.bufferIndex = 0;\r
2016                         for(int i = 0; i < blockId; ++i)\r
2017                         {\r
2018                                 int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;\r
2019                                 argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;\r
2020                         }\r
2021 \r
2022                         const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];\r
2023 \r
2024                         BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();\r
2025                         BlockDefinitionIndexMap::const_iterator it = itEnd;\r
2026 \r
2027                         argumentInfo.clampedIndex = index;\r
2028                         if(type.isInterfaceBlock())\r
2029                         {\r
2030                                 // Offset index to the beginning of the selected instance\r
2031                                 size_t blockRegisters = type.elementRegisterCount();\r
2032                                 size_t bufferOffset = argumentInfo.clampedIndex / blockRegisters;\r
2033                                 argumentInfo.bufferIndex += bufferOffset;\r
2034                                 argumentInfo.clampedIndex -= bufferOffset * blockRegisters;\r
2035                         }\r
2036 \r
2037                         int regIndex = registerIndex(arg);\r
2038                         for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)\r
2039                         {\r
2040                                 it = blockDefinition.find(i);\r
2041                                 if(it != itEnd)\r
2042                                 {\r
2043                                         argumentInfo.clampedIndex -= (i - regIndex);\r
2044                                         break;\r
2045                                 }\r
2046                         }\r
2047                         ASSERT(it != itEnd);\r
2048 \r
2049                         argumentInfo.typedMemberInfo = it->second;\r
2050 \r
2051                         int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();\r
2052                         argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;\r
2053                 }\r
2054                 else\r
2055                 {\r
2056                         argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;\r
2057                 }\r
2058 \r
2059                 return argumentInfo;\r
2060         }\r
2061 \r
2062         void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)\r
2063         {\r
2064                 if(argument)\r
2065                 {\r
2066                         TIntermTyped *arg = argument->getAsTyped();\r
2067                         Temporary unpackedUniform(this);\r
2068 \r
2069                         const TType& srcType = arg->getType();\r
2070                         TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();\r
2071                         if(srcBlock && (srcType.getQualifier() == EvqUniform))\r
2072                         {\r
2073                                 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);\r
2074                                 const TType &memberType = argumentInfo.typedMemberInfo.type;\r
2075 \r
2076                                 if(memberType.getBasicType() == EbtBool)\r
2077                                 {\r
2078                                         int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1);\r
2079                                         ASSERT(argumentInfo.clampedIndex < arraySize);\r
2080 \r
2081                                         // Convert the packed bool, which is currently an int, to a true bool\r
2082                                         Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);\r
2083                                         instruction->dst.type = sw::Shader::PARAMETER_TEMP;\r
2084                                         instruction->dst.index = registerIndex(&unpackedUniform);\r
2085                                         instruction->src[0].type = sw::Shader::PARAMETER_CONST;\r
2086                                         instruction->src[0].bufferIndex = argumentInfo.bufferIndex;\r
2087                                         instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;\r
2088 \r
2089                                         shader->append(instruction);\r
2090 \r
2091                                         arg = &unpackedUniform;\r
2092                                         index = 0;\r
2093                                 }\r
2094                                 else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix())\r
2095                                 {\r
2096                                         int numCols = memberType.getNominalSize();\r
2097                                         int numRows = memberType.getSecondarySize();\r
2098                                         int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1);\r
2099 \r
2100                                         ASSERT(argumentInfo.clampedIndex < (numCols * arraySize));\r
2101 \r
2102                                         unsigned int dstIndex = registerIndex(&unpackedUniform);\r
2103                                         unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;\r
2104                                         int arrayIndex = argumentInfo.clampedIndex / numCols;\r
2105                                         int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;\r
2106 \r
2107                                         for(int j = 0; j < numRows; ++j)\r
2108                                         {\r
2109                                                 // Transpose the row major matrix\r
2110                                                 Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);\r
2111                                                 instruction->dst.type = sw::Shader::PARAMETER_TEMP;\r
2112                                                 instruction->dst.index = dstIndex;\r
2113                                                 instruction->dst.mask = 1 << j;\r
2114                                                 instruction->src[0].type = sw::Shader::PARAMETER_CONST;\r
2115                                                 instruction->src[0].bufferIndex = argumentInfo.bufferIndex;\r
2116                                                 instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;\r
2117                                                 instruction->src[0].swizzle = srcSwizzle;\r
2118 \r
2119                                                 shader->append(instruction);\r
2120                                         }\r
2121 \r
2122                                         arg = &unpackedUniform;\r
2123                                         index = 0;\r
2124                                 }\r
2125                         }\r
2126 \r
2127                         const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);\r
2128                         const TType &type = argumentInfo.typedMemberInfo.type;\r
2129 \r
2130                         int size = registerSize(type, argumentInfo.clampedIndex);\r
2131 \r
2132                         parameter.type = registerType(arg);\r
2133                         parameter.bufferIndex = argumentInfo.bufferIndex;\r
2134 \r
2135                         if(arg->getQualifier() == EvqConstExpr)\r
2136                         {\r
2137                                 int component = componentCount(type, argumentInfo.clampedIndex);\r
2138                                 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();\r
2139 \r
2140                                 for(int i = 0; i < 4; i++)\r
2141                                 {\r
2142                                         if(size == 1)   // Replicate\r
2143                                         {\r
2144                                                 parameter.value[i] = constants[component + 0].getAsFloat();\r
2145                                         }\r
2146                                         else if(i < size)\r
2147                                         {\r
2148                                                 parameter.value[i] = constants[component + i].getAsFloat();\r
2149                                         }\r
2150                                         else\r
2151                                         {\r
2152                                                 parameter.value[i] = 0.0f;\r
2153                                         }\r
2154                                 }\r
2155                         }\r
2156                         else\r
2157                         {\r
2158                                 parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;\r
2159 \r
2160                                 if(isSamplerRegister(arg))\r
2161                                 {\r
2162                                         TIntermBinary *binary = argument->getAsBinaryNode();\r
2163 \r
2164                                         if(binary)\r
2165                                         {\r
2166                                                 TIntermTyped *left = binary->getLeft();\r
2167                                                 TIntermTyped *right = binary->getRight();\r
2168 \r
2169                                                 switch(binary->getOp())\r
2170                                                 {\r
2171                                                 case EOpIndexDirect:\r
2172                                                         parameter.index += right->getAsConstantUnion()->getIConst(0);\r
2173                                                         break;\r
2174                                                 case EOpIndexIndirect:\r
2175                                                         if(left->getArraySize() > 1)\r
2176                                                         {\r
2177                                                                 parameter.rel.type = registerType(binary->getRight());\r
2178                                                                 parameter.rel.index = registerIndex(binary->getRight());\r
2179                                                                 parameter.rel.scale = 1;\r
2180                                                                 parameter.rel.deterministic = true;\r
2181                                                         }\r
2182                                                         break;\r
2183                                                 case EOpIndexDirectStruct:\r
2184                                                 case EOpIndexDirectInterfaceBlock:\r
2185                                                         parameter.index += right->getAsConstantUnion()->getIConst(0);\r
2186                                                         break;\r
2187                                                 default:\r
2188                                                         UNREACHABLE(binary->getOp());\r
2189                                                 }\r
2190                                         }\r
2191                                 }\r
2192                                 else if(parameter.bufferIndex != -1)\r
2193                                 {\r
2194                                         int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;\r
2195                                         parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;\r
2196                                 }\r
2197                         }\r
2198 \r
2199                         if(!IsSampler(arg->getBasicType()))\r
2200                         {\r
2201                                 parameter.swizzle = readSwizzle(arg, size);\r
2202                         }\r
2203                 }\r
2204         }\r
2205 \r
2206         void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)\r
2207         {\r
2208                 for(int index = 0; index < dst->totalRegisterCount(); index++)\r
2209                 {\r
2210                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);\r
2211                         mov->dst.mask = writeMask(dst, index);\r
2212                 }\r
2213         }\r
2214 \r
2215         int swizzleElement(int swizzle, int index)\r
2216         {\r
2217                 return (swizzle >> (index * 2)) & 0x03;\r
2218         }\r
2219 \r
2220         int swizzleSwizzle(int leftSwizzle, int rightSwizzle)\r
2221         {\r
2222                 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |\r
2223                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |\r
2224                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |\r
2225                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);\r
2226         }\r
2227 \r
2228         void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)\r
2229         {\r
2230                 if(src &&\r
2231                         ((src->isVector() && (!dst->isVector() || (dst->getNominalSize() != dst->getNominalSize()))) ||\r
2232                          (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))\r
2233                 {\r
2234                         return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");\r
2235                 }\r
2236 \r
2237                 TIntermBinary *binary = dst->getAsBinaryNode();\r
2238 \r
2239                 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())\r
2240                 {\r
2241                         Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);\r
2242 \r
2243                         Temporary address(this);\r
2244                         lvalue(insert->dst, address, dst);\r
2245 \r
2246                         insert->src[0].type = insert->dst.type;\r
2247                         insert->src[0].index = insert->dst.index;\r
2248                         insert->src[0].rel = insert->dst.rel;\r
2249                         argument(insert->src[1], src);\r
2250                         argument(insert->src[2], binary->getRight());\r
2251 \r
2252                         shader->append(insert);\r
2253                 }\r
2254                 else\r
2255                 {\r
2256                         for(int offset = 0; offset < dst->totalRegisterCount(); offset++)\r
2257                         {\r
2258                                 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);\r
2259 \r
2260                                 Temporary address(this);\r
2261                                 int swizzle = lvalue(mov->dst, address, dst);\r
2262                                 mov->dst.index += offset;\r
2263 \r
2264                                 if(offset > 0)\r
2265                                 {\r
2266                                         mov->dst.mask = writeMask(dst, offset);\r
2267                                 }\r
2268 \r
2269                                 argument(mov->src[0], src, offset);\r
2270                                 mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);\r
2271 \r
2272                                 shader->append(mov);\r
2273                         }\r
2274                 }\r
2275         }\r
2276 \r
2277         int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)\r
2278         {\r
2279                 TIntermTyped *result = node;\r
2280                 TIntermBinary *binary = node->getAsBinaryNode();\r
2281                 TIntermSymbol *symbol = node->getAsSymbolNode();\r
2282 \r
2283                 if(binary)\r
2284                 {\r
2285                         TIntermTyped *left = binary->getLeft();\r
2286                         TIntermTyped *right = binary->getRight();\r
2287 \r
2288                         int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side\r
2289 \r
2290                         switch(binary->getOp())\r
2291                         {\r
2292                         case EOpIndexDirect:\r
2293                                 {\r
2294                                         int rightIndex = right->getAsConstantUnion()->getIConst(0);\r
2295 \r
2296                                         if(left->isRegister())\r
2297                                         {\r
2298                                                 int leftMask = dst.mask;\r
2299 \r
2300                                                 dst.mask = 1;\r
2301                                                 while((leftMask & dst.mask) == 0)\r
2302                                                 {\r
2303                                                         dst.mask = dst.mask << 1;\r
2304                                                 }\r
2305 \r
2306                                                 int element = swizzleElement(leftSwizzle, rightIndex);\r
2307                                                 dst.mask = 1 << element;\r
2308 \r
2309                                                 return element;\r
2310                                         }\r
2311                                         else if(left->isArray() || left->isMatrix())\r
2312                                         {\r
2313                                                 dst.index += rightIndex * result->totalRegisterCount();\r
2314                                                 return 0xE4;\r
2315                                         }\r
2316                                         else UNREACHABLE(0);\r
2317                                 }\r
2318                                 break;\r
2319                         case EOpIndexIndirect:\r
2320                                 {\r
2321                                         if(left->isRegister())\r
2322                                         {\r
2323                                                 // Requires INSERT instruction (handled by calling function)\r
2324                                         }\r
2325                                         else if(left->isArray() || left->isMatrix())\r
2326                                         {\r
2327                                                 int scale = result->totalRegisterCount();\r
2328 \r
2329                                                 if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly\r
2330                                                 {\r
2331                                                         if(left->totalRegisterCount() > 1)\r
2332                                                         {\r
2333                                                                 sw::Shader::SourceParameter relativeRegister;\r
2334                                                                 argument(relativeRegister, right);\r
2335 \r
2336                                                                 dst.rel.index = relativeRegister.index;\r
2337                                                                 dst.rel.type = relativeRegister.type;\r
2338                                                                 dst.rel.scale = scale;\r
2339                                                                 dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
2340                                                         }\r
2341                                                 }\r
2342                                                 else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register\r
2343                                                 {\r
2344                                                         if(scale == 1)\r
2345                                                         {\r
2346                                                                 Constant oldScale((int)dst.rel.scale);\r
2347                                                                 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);\r
2348                                                                 mad->src[0].index = dst.rel.index;\r
2349                                                                 mad->src[0].type = dst.rel.type;\r
2350                                                         }\r
2351                                                         else\r
2352                                                         {\r
2353                                                                 Constant oldScale((int)dst.rel.scale);\r
2354                                                                 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);\r
2355                                                                 mul->src[0].index = dst.rel.index;\r
2356                                                                 mul->src[0].type = dst.rel.type;\r
2357 \r
2358                                                                 Constant newScale(scale);\r
2359                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2360                                                         }\r
2361 \r
2362                                                         dst.rel.type = sw::Shader::PARAMETER_TEMP;\r
2363                                                         dst.rel.index = registerIndex(&address);\r
2364                                                         dst.rel.scale = 1;\r
2365                                                 }\r
2366                                                 else   // Just add the new index to the address register\r
2367                                                 {\r
2368                                                         if(scale == 1)\r
2369                                                         {\r
2370                                                                 emit(sw::Shader::OPCODE_IADD, &address, &address, right);\r
2371                                                         }\r
2372                                                         else\r
2373                                                         {\r
2374                                                                 Constant newScale(scale);\r
2375                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2376                                                         }\r
2377                                                 }\r
2378                                         }\r
2379                                         else UNREACHABLE(0);\r
2380                                 }\r
2381                                 break;\r
2382                         case EOpIndexDirectStruct:\r
2383                         case EOpIndexDirectInterfaceBlock:\r
2384                                 {\r
2385                                         const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?\r
2386                                                                left->getType().getStruct()->fields() :\r
2387                                                                left->getType().getInterfaceBlock()->fields();\r
2388                                         int index = right->getAsConstantUnion()->getIConst(0);\r
2389                                         int fieldOffset = 0;\r
2390 \r
2391                                         for(int i = 0; i < index; i++)\r
2392                                         {\r
2393                                                 fieldOffset += fields[i]->type()->totalRegisterCount();\r
2394                                         }\r
2395 \r
2396                                         dst.type = registerType(left);\r
2397                                         dst.index += fieldOffset;\r
2398                                         dst.mask = writeMask(right);\r
2399 \r
2400                                         return 0xE4;\r
2401                                 }\r
2402                                 break;\r
2403                         case EOpVectorSwizzle:\r
2404                                 {\r
2405                                         ASSERT(left->isRegister());\r
2406 \r
2407                                         int leftMask = dst.mask;\r
2408 \r
2409                                         int swizzle = 0;\r
2410                                         int rightMask = 0;\r
2411 \r
2412                                         TIntermSequence &sequence = right->getAsAggregate()->getSequence();\r
2413 \r
2414                                         for(unsigned int i = 0; i < sequence.size(); i++)\r
2415                                         {\r
2416                                                 int index = sequence[i]->getAsConstantUnion()->getIConst(0);\r
2417 \r
2418                                                 int element = swizzleElement(leftSwizzle, index);\r
2419                                                 rightMask = rightMask | (1 << element);\r
2420                                                 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);\r
2421                                         }\r
2422 \r
2423                                         dst.mask = leftMask & rightMask;\r
2424 \r
2425                                         return swizzle;\r
2426                                 }\r
2427                                 break;\r
2428                         default:\r
2429                                 UNREACHABLE(binary->getOp());   // Not an l-value operator\r
2430                                 break;\r
2431                         }\r
2432                 }\r
2433                 else if(symbol)\r
2434                 {\r
2435                         dst.type = registerType(symbol);\r
2436                         dst.index = registerIndex(symbol);\r
2437                         dst.mask = writeMask(symbol);\r
2438                         return 0xE4;\r
2439                 }\r
2440 \r
2441                 return 0xE4;\r
2442         }\r
2443 \r
2444         sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)\r
2445         {\r
2446                 if(isSamplerRegister(operand))\r
2447                 {\r
2448                         return sw::Shader::PARAMETER_SAMPLER;\r
2449                 }\r
2450 \r
2451                 const TQualifier qualifier = operand->getQualifier();\r
2452                 if((EvqFragColor == qualifier) || (EvqFragData == qualifier))\r
2453                 {\r
2454                         if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||\r
2455                            ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))\r
2456                         {\r
2457                                 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");\r
2458                         }\r
2459                         outputQualifier = qualifier;\r
2460                 }\r
2461 \r
2462                 switch(qualifier)\r
2463                 {\r
2464                 case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;\r
2465                 case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;\r
2466                 case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float\r
2467                 case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;\r
2468                 case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;\r
2469                 case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;\r
2470                 case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;\r
2471                 case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;\r
2472                 case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;\r
2473                 case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;\r
2474                 case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend\r
2475                 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend\r
2476                 case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;\r
2477                 case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;\r
2478                 case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;\r
2479                 case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;\r
2480                 case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;\r
2481                 case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;\r
2482                 case EvqUniform:             return sw::Shader::PARAMETER_CONST;\r
2483                 case EvqIn:                  return sw::Shader::PARAMETER_TEMP;\r
2484                 case EvqOut:                 return sw::Shader::PARAMETER_TEMP;\r
2485                 case EvqInOut:               return sw::Shader::PARAMETER_TEMP;\r
2486                 case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;\r
2487                 case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;\r
2488                 case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;\r
2489                 case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;\r
2490                 case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;\r
2491                 case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;\r
2492                 case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;\r
2493                 case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;\r
2494                 case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;\r
2495                 case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;\r
2496                 default: UNREACHABLE(qualifier);\r
2497                 }\r
2498 \r
2499                 return sw::Shader::PARAMETER_VOID;\r
2500         }\r
2501 \r
2502         unsigned int OutputASM::registerIndex(TIntermTyped *operand)\r
2503         {\r
2504                 if(isSamplerRegister(operand))\r
2505                 {\r
2506                         return samplerRegister(operand);\r
2507                 }\r
2508 \r
2509                 switch(operand->getQualifier())\r
2510                 {\r
2511                 case EvqTemporary:           return temporaryRegister(operand);\r
2512                 case EvqGlobal:              return temporaryRegister(operand);\r
2513                 case EvqConstExpr:           UNREACHABLE(EvqConstExpr);\r
2514                 case EvqAttribute:           return attributeRegister(operand);\r
2515                 case EvqVaryingIn:           return varyingRegister(operand);\r
2516                 case EvqVaryingOut:          return varyingRegister(operand);\r
2517                 case EvqVertexIn:            return attributeRegister(operand);\r
2518                 case EvqFragmentOut:         return fragmentOutputRegister(operand);\r
2519                 case EvqVertexOut:           return varyingRegister(operand);\r
2520                 case EvqFragmentIn:          return varyingRegister(operand);\r
2521                 case EvqInvariantVaryingIn:  return varyingRegister(operand);\r
2522                 case EvqInvariantVaryingOut: return varyingRegister(operand);\r
2523                 case EvqSmooth:              return varyingRegister(operand);\r
2524                 case EvqFlat:                return varyingRegister(operand);\r
2525                 case EvqCentroidOut:         return varyingRegister(operand);\r
2526                 case EvqSmoothIn:            return varyingRegister(operand);\r
2527                 case EvqFlatIn:              return varyingRegister(operand);\r
2528                 case EvqCentroidIn:          return varyingRegister(operand);\r
2529                 case EvqUniform:             return uniformRegister(operand);\r
2530                 case EvqIn:                  return temporaryRegister(operand);\r
2531                 case EvqOut:                 return temporaryRegister(operand);\r
2532                 case EvqInOut:               return temporaryRegister(operand);\r
2533                 case EvqConstReadOnly:       return temporaryRegister(operand);\r
2534                 case EvqPosition:            return varyingRegister(operand);\r
2535                 case EvqPointSize:           return varyingRegister(operand);\r
2536                 case EvqInstanceID:          vertexShader->instanceIdDeclared = true; return 0;\r
2537                 case EvqFragCoord:           pixelShader->vPosDeclared = true;  return 0;\r
2538                 case EvqFrontFacing:         pixelShader->vFaceDeclared = true; return 1;\r
2539                 case EvqPointCoord:          return varyingRegister(operand);\r
2540                 case EvqFragColor:           return 0;\r
2541                 case EvqFragData:            return 0;\r
2542                 case EvqFragDepth:           return 0;\r
2543                 default: UNREACHABLE(operand->getQualifier());\r
2544                 }\r
2545 \r
2546                 return 0;\r
2547         }\r
2548 \r
2549         int OutputASM::writeMask(TIntermTyped *destination, int index)\r
2550         {\r
2551                 if(destination->getQualifier() == EvqPointSize)\r
2552                 {\r
2553                         return 0x2;   // Point size stored in the y component\r
2554                 }\r
2555 \r
2556                 return 0xF >> (4 - registerSize(destination->getType(), index));\r
2557         }\r
2558 \r
2559         int OutputASM::readSwizzle(TIntermTyped *argument, int size)\r
2560         {\r
2561                 if(argument->getQualifier() == EvqPointSize)\r
2562                 {\r
2563                         return 0x55;   // Point size stored in the y component\r
2564                 }\r
2565 \r
2566                 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw\r
2567 \r
2568                 return swizzleSize[size];\r
2569         }\r
2570 \r
2571         // Conservatively checks whether an expression is fast to compute and has no side effects\r
2572         bool OutputASM::trivial(TIntermTyped *expression, int budget)\r
2573         {\r
2574                 if(!expression->isRegister())\r
2575                 {\r
2576                         return false;\r
2577                 }\r
2578 \r
2579                 return cost(expression, budget) >= 0;\r
2580         }\r
2581 \r
2582         // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)\r
2583         int OutputASM::cost(TIntermNode *expression, int budget)\r
2584         {\r
2585                 if(budget < 0)\r
2586                 {\r
2587                         return budget;\r
2588                 }\r
2589 \r
2590                 if(expression->getAsSymbolNode())\r
2591                 {\r
2592                         return budget;\r
2593                 }\r
2594                 else if(expression->getAsConstantUnion())\r
2595                 {\r
2596                         return budget;\r
2597                 }\r
2598                 else if(expression->getAsBinaryNode())\r
2599                 {\r
2600                         TIntermBinary *binary = expression->getAsBinaryNode();\r
2601 \r
2602                         switch(binary->getOp())\r
2603                         {\r
2604                         case EOpVectorSwizzle:\r
2605                         case EOpIndexDirect:\r
2606                         case EOpIndexDirectStruct:\r
2607                         case EOpIndexDirectInterfaceBlock:\r
2608                                 return cost(binary->getLeft(), budget - 0);\r
2609                         case EOpAdd:\r
2610                         case EOpSub:\r
2611                         case EOpMul:\r
2612                                 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));\r
2613                         default:\r
2614                                 return -1;\r
2615                         }\r
2616                 }\r
2617                 else if(expression->getAsUnaryNode())\r
2618                 {\r
2619                         TIntermUnary *unary = expression->getAsUnaryNode();\r
2620 \r
2621                         switch(unary->getOp())\r
2622                         {\r
2623                         case EOpAbs:\r
2624                         case EOpNegative:\r
2625                                 return cost(unary->getOperand(), budget - 1);\r
2626                         default:\r
2627                                 return -1;\r
2628                         }\r
2629                 }\r
2630                 else if(expression->getAsSelectionNode())\r
2631                 {\r
2632                         TIntermSelection *selection = expression->getAsSelectionNode();\r
2633 \r
2634                         if(selection->usesTernaryOperator())\r
2635                         {\r
2636                                 TIntermTyped *condition = selection->getCondition();\r
2637                                 TIntermNode *trueBlock = selection->getTrueBlock();\r
2638                                 TIntermNode *falseBlock = selection->getFalseBlock();\r
2639                                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
2640 \r
2641                                 if(constantCondition)\r
2642                                 {\r
2643                                         bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
2644 \r
2645                                         if(trueCondition)\r
2646                                         {\r
2647                                                 return cost(trueBlock, budget - 0);\r
2648                                         }\r
2649                                         else\r
2650                                         {\r
2651                                                 return cost(falseBlock, budget - 0);\r
2652                                         }\r
2653                                 }\r
2654                                 else\r
2655                                 {\r
2656                                         return cost(trueBlock, cost(falseBlock, budget - 2));\r
2657                                 }\r
2658                         }\r
2659                 }\r
2660 \r
2661                 return -1;\r
2662         }\r
2663 \r
2664         const Function *OutputASM::findFunction(const TString &name)\r
2665         {\r
2666                 for(unsigned int f = 0; f < functionArray.size(); f++)\r
2667                 {\r
2668                         if(functionArray[f].name == name)\r
2669                         {\r
2670                                 return &functionArray[f];\r
2671                         }\r
2672                 }\r
2673 \r
2674                 return 0;\r
2675         }\r
2676 \r
2677         int OutputASM::temporaryRegister(TIntermTyped *temporary)\r
2678         {\r
2679                 return allocate(temporaries, temporary);\r
2680         }\r
2681 \r
2682         int OutputASM::varyingRegister(TIntermTyped *varying)\r
2683         {\r
2684                 int var = lookup(varyings, varying);\r
2685 \r
2686                 if(var == -1)\r
2687                 {\r
2688                         var = allocate(varyings, varying);\r
2689                         int componentCount = varying->registerSize();\r
2690                         int registerCount = varying->totalRegisterCount();\r
2691 \r
2692                         if(pixelShader)\r
2693                         {\r
2694                                 if((var + registerCount) > sw::PixelShader::MAX_INPUT_VARYINGS)\r
2695                                 {\r
2696                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");\r
2697                                         return 0;\r
2698                                 }\r
2699 \r
2700                                 if(varying->getQualifier() == EvqPointCoord)\r
2701                                 {\r
2702                                         ASSERT(varying->isRegister());\r
2703                                         if(componentCount >= 1) pixelShader->semantic[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2704                                         if(componentCount >= 2) pixelShader->semantic[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2705                                         if(componentCount >= 3) pixelShader->semantic[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2706                                         if(componentCount >= 4) pixelShader->semantic[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2707                                 }\r
2708                                 else\r
2709                                 {\r
2710                                         for(int i = 0; i < varying->totalRegisterCount(); i++)\r
2711                                         {\r
2712                                                 if(componentCount >= 1) pixelShader->semantic[var + i][0] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2713                                                 if(componentCount >= 2) pixelShader->semantic[var + i][1] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2714                                                 if(componentCount >= 3) pixelShader->semantic[var + i][2] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2715                                                 if(componentCount >= 4) pixelShader->semantic[var + i][3] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2716                                         }\r
2717                                 }\r
2718                         }\r
2719                         else if(vertexShader)\r
2720                         {\r
2721                                 if((var + registerCount) > sw::VertexShader::MAX_OUTPUT_VARYINGS)\r
2722                                 {\r
2723                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");\r
2724                                         return 0;\r
2725                                 }\r
2726 \r
2727                                 if(varying->getQualifier() == EvqPosition)\r
2728                                 {\r
2729                                         ASSERT(varying->isRegister());\r
2730                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2731                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2732                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2733                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2734                                         vertexShader->positionRegister = var;\r
2735                                 }\r
2736                                 else if(varying->getQualifier() == EvqPointSize)\r
2737                                 {\r
2738                                         ASSERT(varying->isRegister());\r
2739                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2740                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2741                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2742                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2743                                         vertexShader->pointSizeRegister = var;\r
2744                                 }\r
2745                                 else\r
2746                                 {\r
2747                                         // Semantic indexes for user varyings will be assigned during program link to match the pixel shader\r
2748                                 }\r
2749                         }\r
2750                         else UNREACHABLE(0);\r
2751 \r
2752                         declareVarying(varying, var);\r
2753                 }\r
2754 \r
2755                 return var;\r
2756         }\r
2757 \r
2758         void OutputASM::declareVarying(TIntermTyped *varying, int reg)\r
2759         {\r
2760                 if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking\r
2761                 {\r
2762                         const TType &type = varying->getType();\r
2763                         const char *name = varying->getAsSymbolNode()->getSymbol().c_str();\r
2764                         VaryingList &activeVaryings = shaderObject->varyings;\r
2765 \r
2766                         // Check if this varying has been declared before without having a register assigned\r
2767                         for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)\r
2768                         {\r
2769                                 if(v->name == name)\r
2770                                 {\r
2771                                         if(reg >= 0)\r
2772                                         {\r
2773                                                 ASSERT(v->reg < 0 || v->reg == reg);\r
2774                                                 v->reg = reg;\r
2775                                         }\r
2776 \r
2777                                         return;\r
2778                                 }\r
2779                         }\r
2780 \r
2781                         activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));\r
2782                 }\r
2783         }\r
2784 \r
2785         int OutputASM::uniformRegister(TIntermTyped *uniform)\r
2786         {\r
2787                 const TType &type = uniform->getType();\r
2788                 ASSERT(!IsSampler(type.getBasicType()));\r
2789                 TInterfaceBlock *block = type.getAsInterfaceBlock();\r
2790                 TIntermSymbol *symbol = uniform->getAsSymbolNode();\r
2791                 ASSERT(symbol || block);\r
2792 \r
2793                 if(symbol || block)\r
2794                 {\r
2795                         int index = lookup(uniforms, uniform);\r
2796 \r
2797                         if(index == -1)\r
2798                         {\r
2799                                 index = allocate(uniforms, uniform);\r
2800                                 const TString &name = symbol ? symbol->getSymbol() : block->name();\r
2801 \r
2802                                 declareUniform(type, name, index);\r
2803                         }\r
2804 \r
2805                         return index;\r
2806                 }\r
2807 \r
2808                 return 0;\r
2809         }\r
2810 \r
2811         int OutputASM::attributeRegister(TIntermTyped *attribute)\r
2812         {\r
2813                 ASSERT(!attribute->isArray());\r
2814 \r
2815                 int index = lookup(attributes, attribute);\r
2816 \r
2817                 if(index == -1)\r
2818                 {\r
2819                         TIntermSymbol *symbol = attribute->getAsSymbolNode();\r
2820                         ASSERT(symbol);\r
2821 \r
2822                         if(symbol)\r
2823                         {\r
2824                                 index = allocate(attributes, attribute);\r
2825                                 const TType &type = attribute->getType();\r
2826                                 int registerCount = attribute->totalRegisterCount();\r
2827 \r
2828                                 if(vertexShader && (index + registerCount) <= sw::VertexShader::MAX_INPUT_ATTRIBUTES)\r
2829                                 {\r
2830                                         for(int i = 0; i < registerCount; i++)\r
2831                                         {\r
2832                                                 vertexShader->input[index + i] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i);\r
2833                                         }\r
2834                                 }\r
2835 \r
2836                                 ActiveAttributes &activeAttributes = shaderObject->activeAttributes;\r
2837 \r
2838                                 const char *name = symbol->getSymbol().c_str();\r
2839                                 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));\r
2840                         }\r
2841                 }\r
2842 \r
2843                 return index;\r
2844         }\r
2845 \r
2846         int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)\r
2847         {\r
2848                 return allocate(fragmentOutputs, fragmentOutput);\r
2849         }\r
2850 \r
2851         int OutputASM::samplerRegister(TIntermTyped *sampler)\r
2852         {\r
2853                 ASSERT(IsSampler(sampler->getType().getBasicType()));\r
2854                 TIntermSymbol *symbol = sampler->getAsSymbolNode();\r
2855                 TIntermBinary *binary = sampler->getAsBinaryNode();\r
2856 \r
2857                 if(symbol)\r
2858                 {\r
2859                         return samplerRegister(symbol);\r
2860                 }\r
2861                 else if(binary)\r
2862                 {\r
2863                         ASSERT(binary->getOp() == EOpIndexDirect || binary->getOp() == EOpIndexIndirect ||\r
2864                                    binary->getOp() == EOpIndexDirectStruct || binary->getOp() == EOpIndexDirectInterfaceBlock);\r
2865 \r
2866                         return samplerRegister(binary->getLeft());   // Index added later\r
2867                 }\r
2868                 else UNREACHABLE(0);\r
2869 \r
2870                 return 0;\r
2871         }\r
2872 \r
2873         int OutputASM::samplerRegister(TIntermSymbol *sampler)\r
2874         {\r
2875                 const TType &type = sampler->getType();\r
2876                 ASSERT(IsSampler(type.getBasicType()) || type.getStruct());   // Structures can contain samplers\r
2877 \r
2878                 int index = lookup(samplers, sampler);\r
2879 \r
2880                 if(index == -1)\r
2881                 {\r
2882                         index = allocate(samplers, sampler);\r
2883 \r
2884                         if(sampler->getQualifier() == EvqUniform)\r
2885                         {\r
2886                                 const char *name = sampler->getSymbol().c_str();\r
2887                                 declareUniform(type, name, index);\r
2888                         }\r
2889                 }\r
2890 \r
2891                 return index;\r
2892         }\r
2893 \r
2894         int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)\r
2895         {\r
2896                 for(unsigned int i = 0; i < list.size(); i++)\r
2897                 {\r
2898                         if(list[i] == variable)\r
2899                         {\r
2900                                 return i;   // Pointer match\r
2901                         }\r
2902                 }\r
2903 \r
2904                 TIntermSymbol *varSymbol = variable->getAsSymbolNode();\r
2905                 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();\r
2906 \r
2907                 if(varBlock)\r
2908                 {\r
2909                         for(unsigned int i = 0; i < list.size(); i++)\r
2910                         {\r
2911                                 if(list[i])\r
2912                                 {\r
2913                                         TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();\r
2914 \r
2915                                         if(listBlock)\r
2916                                         {\r
2917                                                 if(listBlock->name() == varBlock->name())\r
2918                                                 {\r
2919                                                         ASSERT(listBlock->arraySize() == varBlock->arraySize());\r
2920                                                         ASSERT(listBlock->fields() == varBlock->fields());\r
2921                                                         ASSERT(listBlock->blockStorage() == varBlock->blockStorage());\r
2922                                                         ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());\r
2923 \r
2924                                                         return i;\r
2925                                                 }\r
2926                                         }\r
2927                                 }\r
2928                         }\r
2929                 }\r
2930                 else if(varSymbol)\r
2931                 {\r
2932                         for(unsigned int i = 0; i < list.size(); i++)\r
2933                         {\r
2934                                 if(list[i])\r
2935                                 {\r
2936                                         TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();\r
2937 \r
2938                                         if(listSymbol)\r
2939                                         {\r
2940                                                 if(listSymbol->getId() == varSymbol->getId())\r
2941                                                 {\r
2942                                                         ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());\r
2943                                                         ASSERT(listSymbol->getType() == varSymbol->getType());\r
2944                                                         ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());\r
2945 \r
2946                                                         return i;\r
2947                                                 }\r
2948                                         }\r
2949                                 }\r
2950                         }\r
2951                 }\r
2952 \r
2953                 return -1;\r
2954         }\r
2955 \r
2956         int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)\r
2957         {\r
2958                 int index = lookup(list, variable);\r
2959 \r
2960                 if(index == -1)\r
2961                 {\r
2962                         unsigned int registerCount = variable->totalRegisterCount();\r
2963 \r
2964                         for(unsigned int i = 0; i < list.size(); i++)\r
2965                         {\r
2966                                 if(list[i] == 0)\r
2967                                 {\r
2968                                         unsigned int j = 1;\r
2969                                         for( ; j < registerCount && (i + j) < list.size(); j++)\r
2970                                         {\r
2971                                                 if(list[i + j] != 0)\r
2972                                                 {\r
2973                                                         break;\r
2974                                                 }\r
2975                                         }\r
2976 \r
2977                                         if(j == registerCount)   // Found free slots\r
2978                                         {\r
2979                                                 for(unsigned int j = 0; j < registerCount; j++)\r
2980                                                 {\r
2981                                                         list[i + j] = variable;\r
2982                                                 }\r
2983 \r
2984                                                 return i;\r
2985                                         }\r
2986                                 }\r
2987                         }\r
2988 \r
2989                         index = list.size();\r
2990 \r
2991                         for(unsigned int i = 0; i < registerCount; i++)\r
2992                         {\r
2993                                 list.push_back(variable);\r
2994                         }\r
2995                 }\r
2996 \r
2997                 return index;\r
2998         }\r
2999 \r
3000         void OutputASM::free(VariableArray &list, TIntermTyped *variable)\r
3001         {\r
3002                 int index = lookup(list, variable);\r
3003 \r
3004                 if(index >= 0)\r
3005                 {\r
3006                         list[index] = 0;\r
3007                 }\r
3008         }\r
3009 \r
3010         int OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder)\r
3011         {\r
3012                 const TStructure *structure = type.getStruct();\r
3013                 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;\r
3014                 ActiveUniforms &activeUniforms = shaderObject->activeUniforms;\r
3015 \r
3016                 if(!structure && !block)\r
3017                 {\r
3018                         const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();\r
3019                         if(blockId >= 0)\r
3020                         {\r
3021                                 blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type);\r
3022                                 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());\r
3023                         }\r
3024                         int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;\r
3025                         activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(),\r
3026                                                          fieldRegisterIndex, blockId, blockInfo));\r
3027                         if(isSamplerRegister(type))\r
3028                         {\r
3029                                 for(int i = 0; i < type.totalRegisterCount(); i++)\r
3030                                 {\r
3031                                         shader->declareSampler(fieldRegisterIndex + i);\r
3032                                 }\r
3033                         }\r
3034                 }\r
3035                 else if(block)\r
3036                 {\r
3037                         ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;\r
3038                         const TFieldList& fields = block->fields();\r
3039                         const TString &blockName = block->name();\r
3040                         int fieldRegisterIndex = registerIndex;\r
3041                         bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);\r
3042 \r
3043                         if(isUniformBlockMember)\r
3044                         {\r
3045                                 // This is a uniform that's part of a block, let's see if the block is already defined\r
3046                                 for(size_t i = 0; i < activeUniformBlocks.size(); ++i)\r
3047                                 {\r
3048                                         if(activeUniformBlocks[i].name == blockName.c_str())\r
3049                                         {\r
3050                                                 // The block is already defined, find the register for the current uniform and return it\r
3051                                                 for(size_t j = 0; j < fields.size(); j++)\r
3052                                                 {\r
3053                                                         const TString &fieldName = fields[j]->name();\r
3054                                                         if(fieldName == name)\r
3055                                                         {\r
3056                                                                 return fieldRegisterIndex;\r
3057                                                         }\r
3058 \r
3059                                                         fieldRegisterIndex += fields[j]->type()->totalRegisterCount();\r
3060                                                 }\r
3061 \r
3062                                                 ASSERT(false);\r
3063                                                 return fieldRegisterIndex;\r
3064                                         }\r
3065                                 }\r
3066                         }\r
3067 \r
3068                         blockId = activeUniformBlocks.size();\r
3069                         bool isRowMajor = block->matrixPacking() == EmpRowMajor;\r
3070                         activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),\r
3071                                                                    block->blockStorage(), isRowMajor, registerIndex, blockId));\r
3072                         blockDefinitions.push_back(BlockDefinitionIndexMap());\r
3073 \r
3074                         Std140BlockEncoder currentBlockEncoder(isRowMajor);\r
3075                         currentBlockEncoder.enterAggregateType();\r
3076                         for(size_t i = 0; i < fields.size(); i++)\r
3077                         {\r
3078                                 const TType &fieldType = *(fields[i]->type());\r
3079                                 const TString &fieldName = fields[i]->name();\r
3080                                 if(isUniformBlockMember && (fieldName == name))\r
3081                                 {\r
3082                                         registerIndex = fieldRegisterIndex;\r
3083                                 }\r
3084 \r
3085                                 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;\r
3086 \r
3087                                 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, &currentBlockEncoder);\r
3088                                 fieldRegisterIndex += fieldType.totalRegisterCount();\r
3089                         }\r
3090                         currentBlockEncoder.exitAggregateType();\r
3091                         activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();\r
3092                 }\r
3093                 else\r
3094                 {\r
3095                         int fieldRegisterIndex = registerIndex;\r
3096 \r
3097                         const TFieldList& fields = structure->fields();\r
3098                         if(type.isArray() && (structure || type.isInterfaceBlock()))\r
3099                         {\r
3100                                 for(int i = 0; i < type.getArraySize(); i++)\r
3101                                 {\r
3102                                         if(encoder)\r
3103                                         {\r
3104                                                 encoder->enterAggregateType();\r
3105                                         }\r
3106                                         for(size_t j = 0; j < fields.size(); j++)\r
3107                                         {\r
3108                                                 const TType &fieldType = *(fields[j]->type());\r
3109                                                 const TString &fieldName = fields[j]->name();\r
3110                                                 const TString uniformName = name + "[" + str(i) + "]." + fieldName;\r
3111 \r
3112                                                 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);\r
3113                                                 fieldRegisterIndex += fieldType.totalRegisterCount();\r
3114                                         }\r
3115                                         if(encoder)\r
3116                                         {\r
3117                                                 encoder->exitAggregateType();\r
3118                                         }\r
3119                                 }\r
3120                         }\r
3121                         else\r
3122                         {\r
3123                                 if(encoder)\r
3124                                 {\r
3125                                         encoder->enterAggregateType();\r
3126                                 }\r
3127                                 for(size_t i = 0; i < fields.size(); i++)\r
3128                                 {\r
3129                                         const TType &fieldType = *(fields[i]->type());\r
3130                                         const TString &fieldName = fields[i]->name();\r
3131                                         const TString uniformName = name + "." + fieldName;\r
3132 \r
3133                                         declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);\r
3134                                         fieldRegisterIndex += fieldType.totalRegisterCount();\r
3135                                 }\r
3136                                 if(encoder)\r
3137                                 {\r
3138                                         encoder->exitAggregateType();\r
3139                                 }\r
3140                         }\r
3141                 }\r
3142 \r
3143                 return registerIndex;\r
3144         }\r
3145 \r
3146         GLenum OutputASM::glVariableType(const TType &type)\r
3147         {\r
3148                 switch(type.getBasicType())\r
3149                 {\r
3150                 case EbtFloat:\r
3151                         if(type.isScalar())\r
3152                         {\r
3153                                 return GL_FLOAT;\r
3154                         }\r
3155                         else if(type.isVector())\r
3156                         {\r
3157                                 switch(type.getNominalSize())\r
3158                                 {\r
3159                                 case 2: return GL_FLOAT_VEC2;\r
3160                                 case 3: return GL_FLOAT_VEC3;\r
3161                                 case 4: return GL_FLOAT_VEC4;\r
3162                                 default: UNREACHABLE(type.getNominalSize());\r
3163                                 }\r
3164                         }\r
3165                         else if(type.isMatrix())\r
3166                         {\r
3167                                 switch(type.getNominalSize())\r
3168                                 {\r
3169                                 case 2:\r
3170                                         switch(type.getSecondarySize())\r
3171                                         {\r
3172                                         case 2: return GL_FLOAT_MAT2;\r
3173                                         case 3: return GL_FLOAT_MAT2x3;\r
3174                                         case 4: return GL_FLOAT_MAT2x4;\r
3175                                         default: UNREACHABLE(type.getSecondarySize());\r
3176                                         }\r
3177                                 case 3:\r
3178                                         switch(type.getSecondarySize())\r
3179                                         {\r
3180                                         case 2: return GL_FLOAT_MAT3x2;\r
3181                                         case 3: return GL_FLOAT_MAT3;\r
3182                                         case 4: return GL_FLOAT_MAT3x4;\r
3183                                         default: UNREACHABLE(type.getSecondarySize());\r
3184                                         }\r
3185                                 case 4:\r
3186                                         switch(type.getSecondarySize())\r
3187                                         {\r
3188                                         case 2: return GL_FLOAT_MAT4x2;\r
3189                                         case 3: return GL_FLOAT_MAT4x3;\r
3190                                         case 4: return GL_FLOAT_MAT4;\r
3191                                         default: UNREACHABLE(type.getSecondarySize());\r
3192                                         }\r
3193                                 default: UNREACHABLE(type.getNominalSize());\r
3194                                 }\r
3195                         }\r
3196                         else UNREACHABLE(0);\r
3197                         break;\r
3198                 case EbtInt:\r
3199                         if(type.isScalar())\r
3200                         {\r
3201                                 return GL_INT;\r
3202                         }\r
3203                         else if(type.isVector())\r
3204                         {\r
3205                                 switch(type.getNominalSize())\r
3206                                 {\r
3207                                 case 2: return GL_INT_VEC2;\r
3208                                 case 3: return GL_INT_VEC3;\r
3209                                 case 4: return GL_INT_VEC4;\r
3210                                 default: UNREACHABLE(type.getNominalSize());\r
3211                                 }\r
3212                         }\r
3213                         else UNREACHABLE(0);\r
3214                         break;\r
3215                 case EbtUInt:\r
3216                         if(type.isScalar())\r
3217                         {\r
3218                                 return GL_UNSIGNED_INT;\r
3219                         }\r
3220                         else if(type.isVector())\r
3221                         {\r
3222                                 switch(type.getNominalSize())\r
3223                                 {\r
3224                                 case 2: return GL_UNSIGNED_INT_VEC2;\r
3225                                 case 3: return GL_UNSIGNED_INT_VEC3;\r
3226                                 case 4: return GL_UNSIGNED_INT_VEC4;\r
3227                                 default: UNREACHABLE(type.getNominalSize());\r
3228                                 }\r
3229                         }\r
3230                         else UNREACHABLE(0);\r
3231                         break;\r
3232                 case EbtBool:\r
3233                         if(type.isScalar())\r
3234                         {\r
3235                                 return GL_BOOL;\r
3236                         }\r
3237                         else if(type.isVector())\r
3238                         {\r
3239                                 switch(type.getNominalSize())\r
3240                                 {\r
3241                                 case 2: return GL_BOOL_VEC2;\r
3242                                 case 3: return GL_BOOL_VEC3;\r
3243                                 case 4: return GL_BOOL_VEC4;\r
3244                                 default: UNREACHABLE(type.getNominalSize());\r
3245                                 }\r
3246                         }\r
3247                         else UNREACHABLE(0);\r
3248                         break;\r
3249                 case EbtSampler2D:\r
3250                         return GL_SAMPLER_2D;\r
3251                 case EbtISampler2D:\r
3252                         return GL_INT_SAMPLER_2D;\r
3253                 case EbtUSampler2D:\r
3254                         return GL_UNSIGNED_INT_SAMPLER_2D;\r
3255                 case EbtSamplerCube:\r
3256                         return GL_SAMPLER_CUBE;\r
3257                 case EbtISamplerCube:\r
3258                         return GL_INT_SAMPLER_CUBE;\r
3259                 case EbtUSamplerCube:\r
3260                         return GL_UNSIGNED_INT_SAMPLER_CUBE;\r
3261                 case EbtSamplerExternalOES:\r
3262                         return GL_SAMPLER_EXTERNAL_OES;\r
3263                 case EbtSampler3D:\r
3264                         return GL_SAMPLER_3D_OES;\r
3265                 case EbtISampler3D:\r
3266                         return GL_INT_SAMPLER_3D;\r
3267                 case EbtUSampler3D:\r
3268                         return GL_UNSIGNED_INT_SAMPLER_3D;\r
3269                 case EbtSampler2DArray:\r
3270                         return GL_SAMPLER_2D_ARRAY;\r
3271                 case EbtISampler2DArray:\r
3272                         return GL_INT_SAMPLER_2D_ARRAY;\r
3273                 case EbtUSampler2DArray:\r
3274                         return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;\r
3275                 case EbtSampler2DShadow:\r
3276                         return GL_SAMPLER_2D_SHADOW;\r
3277                 case EbtSamplerCubeShadow:\r
3278                         return GL_SAMPLER_CUBE_SHADOW;\r
3279                 case EbtSampler2DArrayShadow:\r
3280                         return GL_SAMPLER_2D_ARRAY_SHADOW;\r
3281                 default:\r
3282                         UNREACHABLE(type.getBasicType());\r
3283                         break;\r
3284                 }\r
3285 \r
3286                 return GL_NONE;\r
3287         }\r
3288 \r
3289         GLenum OutputASM::glVariablePrecision(const TType &type)\r
3290         {\r
3291                 if(type.getBasicType() == EbtFloat)\r
3292                 {\r
3293                         switch(type.getPrecision())\r
3294                         {\r
3295                         case EbpHigh:   return GL_HIGH_FLOAT;\r
3296                         case EbpMedium: return GL_MEDIUM_FLOAT;\r
3297                         case EbpLow:    return GL_LOW_FLOAT;\r
3298                         case EbpUndefined:\r
3299                                 // Should be defined as the default precision by the parser\r
3300                         default: UNREACHABLE(type.getPrecision());\r
3301                         }\r
3302                 }\r
3303                 else if(type.getBasicType() == EbtInt)\r
3304                 {\r
3305                         switch(type.getPrecision())\r
3306                         {\r
3307                         case EbpHigh:   return GL_HIGH_INT;\r
3308                         case EbpMedium: return GL_MEDIUM_INT;\r
3309                         case EbpLow:    return GL_LOW_INT;\r
3310                         case EbpUndefined:\r
3311                                 // Should be defined as the default precision by the parser\r
3312                         default: UNREACHABLE(type.getPrecision());\r
3313                         }\r
3314                 }\r
3315 \r
3316                 // Other types (boolean, sampler) don't have a precision\r
3317                 return GL_NONE;\r
3318         }\r
3319 \r
3320         int OutputASM::dim(TIntermNode *v)\r
3321         {\r
3322                 TIntermTyped *vector = v->getAsTyped();\r
3323                 ASSERT(vector && vector->isRegister());\r
3324                 return vector->getNominalSize();\r
3325         }\r
3326 \r
3327         int OutputASM::dim2(TIntermNode *m)\r
3328         {\r
3329                 TIntermTyped *matrix = m->getAsTyped();\r
3330                 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());\r
3331                 return matrix->getSecondarySize();\r
3332         }\r
3333 \r
3334         // Returns ~0u if no loop count could be determined\r
3335         unsigned int OutputASM::loopCount(TIntermLoop *node)\r
3336         {\r
3337                 // Parse loops of the form:\r
3338                 // for(int index = initial; index [comparator] limit; index += increment)\r
3339                 TIntermSymbol *index = 0;\r
3340                 TOperator comparator = EOpNull;\r
3341                 int initial = 0;\r
3342                 int limit = 0;\r
3343                 int increment = 0;\r
3344 \r
3345                 // Parse index name and intial value\r
3346                 if(node->getInit())\r
3347                 {\r
3348                         TIntermAggregate *init = node->getInit()->getAsAggregate();\r
3349 \r
3350                         if(init)\r
3351                         {\r
3352                                 TIntermSequence &sequence = init->getSequence();\r
3353                                 TIntermTyped *variable = sequence[0]->getAsTyped();\r
3354 \r
3355                                 if(variable && variable->getQualifier() == EvqTemporary)\r
3356                                 {\r
3357                                         TIntermBinary *assign = variable->getAsBinaryNode();\r
3358 \r
3359                                         if(assign->getOp() == EOpInitialize)\r
3360                                         {\r
3361                                                 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();\r
3362                                                 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();\r
3363 \r
3364                                                 if(symbol && constant)\r
3365                                                 {\r
3366                                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3367                                                         {\r
3368                                                                 index = symbol;\r
3369                                                                 initial = constant->getUnionArrayPointer()[0].getIConst();\r
3370                                                         }\r
3371                                                 }\r
3372                                         }\r
3373                                 }\r
3374                         }\r
3375                 }\r
3376 \r
3377                 // Parse comparator and limit value\r
3378                 if(index && node->getCondition())\r
3379                 {\r
3380                         TIntermBinary *test = node->getCondition()->getAsBinaryNode();\r
3381 \r
3382                         if(test && test->getLeft()->getAsSymbolNode()->getId() == index->getId())\r
3383                         {\r
3384                                 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();\r
3385 \r
3386                                 if(constant)\r
3387                                 {\r
3388                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3389                                         {\r
3390                                                 comparator = test->getOp();\r
3391                                                 limit = constant->getUnionArrayPointer()[0].getIConst();\r
3392                                         }\r
3393                                 }\r
3394                         }\r
3395                 }\r
3396 \r
3397                 // Parse increment\r
3398                 if(index && comparator != EOpNull && node->getExpression())\r
3399                 {\r
3400                         TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();\r
3401                         TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();\r
3402 \r
3403                         if(binaryTerminal)\r
3404                         {\r
3405                                 TOperator op = binaryTerminal->getOp();\r
3406                                 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();\r
3407 \r
3408                                 if(constant)\r
3409                                 {\r
3410                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3411                                         {\r
3412                                                 int value = constant->getUnionArrayPointer()[0].getIConst();\r
3413 \r
3414                                                 switch(op)\r
3415                                                 {\r
3416                                                 case EOpAddAssign: increment = value;  break;\r
3417                                                 case EOpSubAssign: increment = -value; break;\r
3418                                                 default: UNIMPLEMENTED();\r
3419                                                 }\r
3420                                         }\r
3421                                 }\r
3422                         }\r
3423                         else if(unaryTerminal)\r
3424                         {\r
3425                                 TOperator op = unaryTerminal->getOp();\r
3426 \r
3427                                 switch(op)\r
3428                                 {\r
3429                                 case EOpPostIncrement: increment = 1;  break;\r
3430                                 case EOpPostDecrement: increment = -1; break;\r
3431                                 case EOpPreIncrement:  increment = 1;  break;\r
3432                                 case EOpPreDecrement:  increment = -1; break;\r
3433                                 default: UNIMPLEMENTED();\r
3434                                 }\r
3435                         }\r
3436                 }\r
3437 \r
3438                 if(index && comparator != EOpNull && increment != 0)\r
3439                 {\r
3440                         if(comparator == EOpLessThanEqual)\r
3441                         {\r
3442                                 comparator = EOpLessThan;\r
3443                                 limit += 1;\r
3444                         }\r
3445 \r
3446                         if(comparator == EOpLessThan)\r
3447                         {\r
3448                                 int iterations = (limit - initial) / increment;\r
3449 \r
3450                                 if(iterations <= 0)\r
3451                                 {\r
3452                                         iterations = 0;\r
3453                                 }\r
3454 \r
3455                                 return iterations;\r
3456                         }\r
3457                         else UNIMPLEMENTED();   // Falls through\r
3458                 }\r
3459 \r
3460                 return ~0u;\r
3461         }\r
3462 \r
3463         bool DetectLoopDiscontinuity::traverse(TIntermNode *node)\r
3464         {\r
3465                 loopDepth = 0;\r
3466                 loopDiscontinuity = false;\r
3467 \r
3468                 node->traverse(this);\r
3469 \r
3470                 return loopDiscontinuity;\r
3471         }\r
3472 \r
3473         bool DetectLoopDiscontinuity::visitLoop(Visit visit, TIntermLoop *loop)\r
3474         {\r
3475                 if(visit == PreVisit)\r
3476                 {\r
3477                         loopDepth++;\r
3478                 }\r
3479                 else if(visit == PostVisit)\r
3480                 {\r
3481                         loopDepth++;\r
3482                 }\r
3483 \r
3484                 return true;\r
3485         }\r
3486 \r
3487         bool DetectLoopDiscontinuity::visitBranch(Visit visit, TIntermBranch *node)\r
3488         {\r
3489                 if(loopDiscontinuity)\r
3490                 {\r
3491                         return false;\r
3492                 }\r
3493 \r
3494                 if(!loopDepth)\r
3495                 {\r
3496                         return true;\r
3497                 }\r
3498 \r
3499                 switch(node->getFlowOp())\r
3500                 {\r
3501                 case EOpKill:\r
3502                         break;\r
3503                 case EOpBreak:\r
3504                 case EOpContinue:\r
3505                 case EOpReturn:\r
3506                         loopDiscontinuity = true;\r
3507                         break;\r
3508                 default: UNREACHABLE(node->getFlowOp());\r
3509                 }\r
3510 \r
3511                 return !loopDiscontinuity;\r
3512         }\r
3513 \r
3514         bool DetectLoopDiscontinuity::visitAggregate(Visit visit, TIntermAggregate *node)\r
3515         {\r
3516                 return !loopDiscontinuity;\r
3517         }\r
3518 }\r