OSDN Git Service

modf implementation
[android-x86/external-swiftshader.git] / src / OpenGL / compiler / OutputASM.cpp
1 // SwiftShader Software Renderer\r
2 //\r
3 // Copyright(c) 2005-2013 TransGaming Inc.\r
4 //\r
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,\r
6 // transcribed, stored in a retrieval system, translated into any human or computer\r
7 // language by any means, or disclosed to third parties without the explicit written\r
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express\r
9 // or implied, including but not limited to any patent rights, are granted to you.\r
10 //\r
11 \r
12 #include "OutputASM.h"\r
13 #include "Common/Math.hpp"\r
14 \r
15 #include "common/debug.h"\r
16 #include "InfoSink.h"\r
17 \r
18 #include "libGLESv2/Shader.h"\r
19 \r
20 #include <GLES2/gl2.h>\r
21 #include <GLES2/gl2ext.h>\r
22 #include <GLES3/gl3.h>\r
23 \r
24 namespace glsl\r
25 {\r
26         // Integer to TString conversion\r
27         TString str(int i)\r
28         {\r
29                 char buffer[20];\r
30                 sprintf(buffer, "%d", i);\r
31                 return buffer;\r
32         }\r
33 \r
34         class Temporary : public TIntermSymbol\r
35         {\r
36         public:\r
37                 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)\r
38                 {\r
39                 }\r
40 \r
41                 ~Temporary()\r
42                 {\r
43                         assembler->freeTemporary(this);\r
44                 }\r
45 \r
46         private:\r
47                 OutputASM *const assembler;\r
48         };\r
49 \r
50         class Constant : public TIntermConstantUnion\r
51         {\r
52         public:\r
53                 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))\r
54                 {\r
55                         constants[0].setFConst(x);\r
56                         constants[1].setFConst(y);\r
57                         constants[2].setFConst(z);\r
58                         constants[3].setFConst(w);\r
59                 }\r
60 \r
61                 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))\r
62                 {\r
63                         constants[0].setBConst(b);\r
64                 }\r
65 \r
66                 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))\r
67                 {\r
68                         constants[0].setIConst(i);\r
69                 }\r
70 \r
71                 ~Constant()\r
72                 {\r
73                 }\r
74 \r
75         private:\r
76                 ConstantUnion constants[4];\r
77         };\r
78 \r
79         Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int blockId, const BlockMemberInfo& blockMemberInfo) :\r
80                 type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), blockId(blockId), blockInfo(blockMemberInfo)\r
81         {\r
82         }\r
83 \r
84         UniformBlock::UniformBlock(const std::string& name, unsigned int dataSize, unsigned int arraySize,\r
85                                    TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :\r
86                 name(name), dataSize(dataSize), arraySize(arraySize), layout(layout),\r
87                 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)\r
88         {\r
89         }\r
90 \r
91         BlockLayoutEncoder::BlockLayoutEncoder(bool rowMajor)\r
92                 : mCurrentOffset(0), isRowMajor(rowMajor)\r
93         {\r
94         }\r
95 \r
96         BlockMemberInfo BlockLayoutEncoder::encodeType(const TType &type)\r
97         {\r
98                 int arrayStride;\r
99                 int matrixStride;\r
100 \r
101                 getBlockLayoutInfo(type, type.getArraySize(), isRowMajor, &arrayStride, &matrixStride);\r
102 \r
103                 const BlockMemberInfo memberInfo(static_cast<int>(mCurrentOffset * BytesPerComponent),\r
104                                                  static_cast<int>(arrayStride * BytesPerComponent),\r
105                                                  static_cast<int>(matrixStride * BytesPerComponent),\r
106                                                  (matrixStride > 0) && isRowMajor);\r
107 \r
108                 advanceOffset(type, type.getArraySize(), isRowMajor, arrayStride, matrixStride);\r
109 \r
110                 return memberInfo;\r
111         }\r
112 \r
113         // static\r
114         size_t BlockLayoutEncoder::getBlockRegister(const BlockMemberInfo &info)\r
115         {\r
116                 return (info.offset / BytesPerComponent) / ComponentsPerRegister;\r
117         }\r
118 \r
119         // static\r
120         size_t BlockLayoutEncoder::getBlockRegisterElement(const BlockMemberInfo &info)\r
121         {\r
122                 return (info.offset / BytesPerComponent) % ComponentsPerRegister;\r
123         }\r
124 \r
125         void BlockLayoutEncoder::nextRegister()\r
126         {\r
127                 mCurrentOffset = sw::align(mCurrentOffset, ComponentsPerRegister);\r
128         }\r
129 \r
130         Std140BlockEncoder::Std140BlockEncoder(bool rowMajor) : BlockLayoutEncoder(rowMajor)\r
131         {\r
132         }\r
133 \r
134         void Std140BlockEncoder::enterAggregateType()\r
135         {\r
136                 nextRegister();\r
137         }\r
138 \r
139         void Std140BlockEncoder::exitAggregateType()\r
140         {\r
141                 nextRegister();\r
142         }\r
143 \r
144         void Std140BlockEncoder::getBlockLayoutInfo(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int *arrayStrideOut, int *matrixStrideOut)\r
145         {\r
146                 size_t baseAlignment = 0;\r
147                 int matrixStride = 0;\r
148                 int arrayStride = 0;\r
149 \r
150                 if(type.isMatrix())\r
151                 {\r
152                         baseAlignment = ComponentsPerRegister;\r
153                         matrixStride = ComponentsPerRegister;\r
154 \r
155                         if(arraySize > 0)\r
156                         {\r
157                                 const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();\r
158                                 arrayStride = ComponentsPerRegister * numRegisters;\r
159                         }\r
160                 }\r
161                 else if(arraySize > 0)\r
162                 {\r
163                         baseAlignment = ComponentsPerRegister;\r
164                         arrayStride = ComponentsPerRegister;\r
165                 }\r
166                 else\r
167                 {\r
168                         const size_t numComponents = type.getElementSize();\r
169                         baseAlignment = (numComponents == 3 ? 4u : numComponents);\r
170                 }\r
171 \r
172                 mCurrentOffset = sw::align(mCurrentOffset, baseAlignment);\r
173 \r
174                 *matrixStrideOut = matrixStride;\r
175                 *arrayStrideOut = arrayStride;\r
176         }\r
177 \r
178         void Std140BlockEncoder::advanceOffset(const TType &type, unsigned int arraySize, bool isRowMajorMatrix, int arrayStride, int matrixStride)\r
179         {\r
180                 if(arraySize > 0)\r
181                 {\r
182                         mCurrentOffset += arrayStride * arraySize;\r
183                 }\r
184                 else if(type.isMatrix())\r
185                 {\r
186                         ASSERT(matrixStride == ComponentsPerRegister);\r
187                         const int numRegisters = isRowMajorMatrix ? type.getSecondarySize() : type.getNominalSize();\r
188                         mCurrentOffset += ComponentsPerRegister * numRegisters;\r
189                 }\r
190                 else\r
191                 {\r
192                         mCurrentOffset += type.getElementSize();\r
193                 }\r
194         }\r
195 \r
196         Attribute::Attribute()\r
197         {\r
198                 type = GL_NONE;\r
199                 arraySize = 0;\r
200                 registerIndex = 0;\r
201         }\r
202 \r
203         Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)\r
204         {\r
205                 this->type = type;\r
206                 this->name = name;\r
207                 this->arraySize = arraySize;\r
208                 this->location = location;\r
209                 this->registerIndex = registerIndex;\r
210         }\r
211 \r
212         sw::PixelShader *Shader::getPixelShader() const\r
213         {\r
214                 return 0;\r
215         }\r
216 \r
217         sw::VertexShader *Shader::getVertexShader() const\r
218         {\r
219                 return 0;\r
220         }\r
221 \r
222         OutputASM::TextureFunction::TextureFunction(const TString& nodeName) : method(IMPLICIT), proj(false), offset(false)\r
223         {\r
224                 TString name = TFunction::unmangleName(nodeName);\r
225 \r
226                 if(name == "texture2D" || name == "textureCube" || name == "texture" || name == "texture3D")\r
227                 {\r
228                         method = IMPLICIT;\r
229                 }\r
230                 else if(name == "texture2DProj" || name == "textureProj")\r
231                 {\r
232                         method = IMPLICIT;\r
233                         proj = true;\r
234                 }\r
235                 else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")\r
236                 {\r
237                         method = LOD;\r
238                 }\r
239                 else if(name == "texture2DProjLod" || name == "textureProjLod")\r
240                 {\r
241                         method = LOD;\r
242                         proj = true;\r
243                 }\r
244                 else if(name == "textureSize")\r
245                 {\r
246                         method = SIZE;\r
247                 }\r
248                 else if(name == "textureOffset")\r
249                 {\r
250                         method = IMPLICIT;\r
251                         offset = true;\r
252                 }\r
253                 else if(name == "textureProjOffset")\r
254                 {\r
255                         method = IMPLICIT;\r
256                         offset = true;\r
257                         proj = true;\r
258                 }\r
259                 else if(name == "textureLodOffset")\r
260                 {\r
261                         method = LOD;\r
262                         offset = true;\r
263                 }\r
264                 else if(name == "textureProjLodOffset")\r
265                 {\r
266                         method = LOD;\r
267                         proj = true;\r
268                         offset = true;\r
269                 }\r
270                 else if(name == "texelFetch")\r
271                 {\r
272                         method = FETCH;\r
273                 }\r
274                 else if(name == "texelFetchOffset")\r
275                 {\r
276                         method = FETCH;\r
277                         offset = true;\r
278                 }\r
279                 else if(name == "textureGrad")\r
280                 {\r
281                         method = GRAD;\r
282                 }\r
283                 else if(name == "textureGradOffset")\r
284                 {\r
285                         method = GRAD;\r
286                         offset = true;\r
287                 }\r
288                 else if(name == "textureProjGrad")\r
289                 {\r
290                         method = GRAD;\r
291                         proj = true;\r
292                 }\r
293                 else if(name == "textureProjGradOffset")\r
294                 {\r
295                         method = GRAD;\r
296                         proj = true;\r
297                         offset = true;\r
298                 }\r
299                 else UNREACHABLE(0);\r
300         }\r
301 \r
302         OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), shaderObject(shaderObject), mContext(context)\r
303         {\r
304                 shader = 0;\r
305                 pixelShader = 0;\r
306                 vertexShader = 0;\r
307 \r
308                 if(shaderObject)\r
309                 {\r
310                         shader = shaderObject->getShader();\r
311                         pixelShader = shaderObject->getPixelShader();\r
312                         vertexShader = shaderObject->getVertexShader();\r
313                 }\r
314 \r
315                 functionArray.push_back(Function(0, "main(", 0, 0));\r
316                 currentFunction = 0;\r
317                 outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData\r
318         }\r
319 \r
320         OutputASM::~OutputASM()\r
321         {\r
322         }\r
323 \r
324         void OutputASM::output()\r
325         {\r
326                 if(shader)\r
327                 {\r
328                         emitShader(GLOBAL);\r
329 \r
330                         if(functionArray.size() > 1)   // Only call main() when there are other functions\r
331                         {\r
332                                 Instruction *callMain = emit(sw::Shader::OPCODE_CALL);\r
333                                 callMain->dst.type = sw::Shader::PARAMETER_LABEL;\r
334                                 callMain->dst.index = 0;   // main()\r
335 \r
336                                 emit(sw::Shader::OPCODE_RET);\r
337                         }\r
338 \r
339                         emitShader(FUNCTION);\r
340                 }\r
341         }\r
342 \r
343         void OutputASM::emitShader(Scope scope)\r
344         {\r
345                 emitScope = scope;\r
346                 currentScope = GLOBAL;\r
347                 mContext.getTreeRoot()->traverse(this);\r
348         }\r
349 \r
350         void OutputASM::freeTemporary(Temporary *temporary)\r
351         {\r
352                 free(temporaries, temporary);\r
353         }\r
354 \r
355         sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const\r
356         {\r
357                 TBasicType baseType = in->getType().getBasicType();\r
358 \r
359                 switch(op)\r
360                 {\r
361                 case sw::Shader::OPCODE_NEG:\r
362                         switch(baseType)\r
363                         {\r
364                         case EbtInt:\r
365                         case EbtUInt:\r
366                                 return sw::Shader::OPCODE_INEG;\r
367                         case EbtFloat:\r
368                         default:\r
369                                 return op;\r
370                         }\r
371                 case sw::Shader::OPCODE_ABS:\r
372                         switch(baseType)\r
373                         {\r
374                         case EbtInt:\r
375                                 return sw::Shader::OPCODE_IABS;\r
376                         case EbtFloat:\r
377                         default:\r
378                                 return op;\r
379                         }\r
380                 case sw::Shader::OPCODE_SGN:\r
381                         switch(baseType)\r
382                         {\r
383                         case EbtInt:\r
384                                 return sw::Shader::OPCODE_ISGN;\r
385                         case EbtFloat:\r
386                         default:\r
387                                 return op;\r
388                         }\r
389                 case sw::Shader::OPCODE_ADD:\r
390                         switch(baseType)\r
391                         {\r
392                         case EbtInt:\r
393                         case EbtUInt:\r
394                                 return sw::Shader::OPCODE_IADD;\r
395                         case EbtFloat:\r
396                         default:\r
397                                 return op;\r
398                         }\r
399                 case sw::Shader::OPCODE_SUB:\r
400                         switch(baseType)\r
401                         {\r
402                         case EbtInt:\r
403                         case EbtUInt:\r
404                                 return sw::Shader::OPCODE_ISUB;\r
405                         case EbtFloat:\r
406                         default:\r
407                                 return op;\r
408                         }\r
409                 case sw::Shader::OPCODE_MUL:\r
410                         switch(baseType)\r
411                         {\r
412                         case EbtInt:\r
413                         case EbtUInt:\r
414                                 return sw::Shader::OPCODE_IMUL;\r
415                         case EbtFloat:\r
416                         default:\r
417                                 return op;\r
418                         }\r
419                 case sw::Shader::OPCODE_DIV:\r
420                         switch(baseType)\r
421                         {\r
422                         case EbtInt:\r
423                                 return sw::Shader::OPCODE_IDIV;\r
424                         case EbtUInt:\r
425                                 return sw::Shader::OPCODE_UDIV;\r
426                         case EbtFloat:\r
427                         default:\r
428                                 return op;\r
429                         }\r
430                 case sw::Shader::OPCODE_IMOD:\r
431                         return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;\r
432                 case sw::Shader::OPCODE_ISHR:\r
433                         return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;\r
434                 case sw::Shader::OPCODE_MIN:\r
435                         switch(baseType)\r
436                         {\r
437                         case EbtInt:\r
438                                 return sw::Shader::OPCODE_IMIN;\r
439                         case EbtUInt:\r
440                                 return sw::Shader::OPCODE_UMIN;\r
441                         case EbtFloat:\r
442                         default:\r
443                                 return op;\r
444                         }\r
445                 case sw::Shader::OPCODE_MAX:\r
446                         switch(baseType)\r
447                         {\r
448                         case EbtInt:\r
449                                 return sw::Shader::OPCODE_IMAX;\r
450                         case EbtUInt:\r
451                                 return sw::Shader::OPCODE_UMAX;\r
452                         case EbtFloat:\r
453                         default:\r
454                                 return op;\r
455                         }\r
456                 default:\r
457                         return op;\r
458                 }\r
459         }\r
460 \r
461         void OutputASM::visitSymbol(TIntermSymbol *symbol)\r
462         {\r
463                 // Vertex varyings don't have to be actively used to successfully link\r
464                 // against pixel shaders that use them. So make sure they're declared.\r
465                 if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)\r
466                 {\r
467                         if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings\r
468                         {\r
469                                 declareVarying(symbol, -1);\r
470                         }\r
471                 }\r
472 \r
473                 TInterfaceBlock* block = symbol->getType().getInterfaceBlock();\r
474                 // OpenGL ES 3.0.4 spec, section 2.12.6 Uniform Variables:\r
475                 // "All members of a named uniform block declared with a shared or std140 layout qualifier\r
476                 // are considered active, even if they are not referenced in any shader in the program.\r
477                 // The uniform block itself is also considered active, even if no member of the block is referenced."\r
478                 if(block && ((block->blockStorage() == EbsShared) || (block->blockStorage() == EbsStd140)))\r
479                 {\r
480                         uniformRegister(symbol);\r
481                 }\r
482         }\r
483 \r
484         bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)\r
485         {\r
486                 if(currentScope != emitScope)\r
487                 {\r
488                         return false;\r
489                 }\r
490 \r
491                 TIntermTyped *result = node;\r
492                 TIntermTyped *left = node->getLeft();\r
493                 TIntermTyped *right = node->getRight();\r
494                 const TType &leftType = left->getType();\r
495                 const TType &rightType = right->getType();\r
496                 const TType &resultType = node->getType();\r
497 \r
498                 if(isSamplerRegister(result))\r
499                 {\r
500                         return false;   // Don't traverse, the register index is determined statically\r
501                 }\r
502 \r
503                 switch(node->getOp())\r
504                 {\r
505                 case EOpAssign:\r
506                         if(visit == PostVisit)\r
507                         {\r
508                                 assignLvalue(left, right);\r
509                                 copy(result, right);\r
510                         }\r
511                         break;\r
512                 case EOpInitialize:\r
513                         if(visit == PostVisit)\r
514                         {\r
515                                 copy(left, right);\r
516                         }\r
517                         break;\r
518                 case EOpMatrixTimesScalarAssign:\r
519                         if(visit == PostVisit)\r
520                         {\r
521                                 for(int i = 0; i < leftType.getNominalSize(); i++)\r
522                                 {\r
523                                         emit(sw::Shader::OPCODE_MUL, result, i, left, i, right);\r
524                                 }\r
525 \r
526                                 assignLvalue(left, result);\r
527                         }\r
528                         break;\r
529                 case EOpVectorTimesMatrixAssign:\r
530                         if(visit == PostVisit)\r
531                         {\r
532                                 int size = leftType.getNominalSize();\r
533 \r
534                                 for(int i = 0; i < size; i++)\r
535                                 {\r
536                                         Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, 0, left, 0, right, i);\r
537                                         dot->dst.mask = 1 << i;\r
538                                 }\r
539 \r
540                                 assignLvalue(left, result);\r
541                         }\r
542                         break;\r
543                 case EOpMatrixTimesMatrixAssign:\r
544                         if(visit == PostVisit)\r
545                         {\r
546                                 int dim = leftType.getNominalSize();\r
547 \r
548                                 for(int i = 0; i < dim; i++)\r
549                                 {\r
550                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);\r
551                                         mul->src[1].swizzle = 0x00;\r
552 \r
553                                         for(int j = 1; j < dim; j++)\r
554                                         {\r
555                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);\r
556                                                 mad->src[1].swizzle = j * 0x55;\r
557                                         }\r
558                                 }\r
559 \r
560                                 assignLvalue(left, result);\r
561                         }\r
562                         break;\r
563                 case EOpIndexDirect:\r
564                         if(visit == PostVisit)\r
565                         {\r
566                                 int index = right->getAsConstantUnion()->getIConst(0);\r
567 \r
568                                 if(result->isMatrix() || result->isStruct() || result->isInterfaceBlock())\r
569                                 {\r
570                                         ASSERT(left->isArray());\r
571                                         copy(result, left, index * left->elementRegisterCount());\r
572                                 }\r
573                                 else if(result->isRegister())\r
574                                 {\r
575                                         int srcIndex = 0;\r
576                                         if(left->isRegister())\r
577                                         {\r
578                                                 srcIndex = 0;\r
579                                         }\r
580                                         else if(left->isArray())\r
581                                         {\r
582                                                 srcIndex = index * left->elementRegisterCount();\r
583                                         }\r
584                                         else if(left->isMatrix())\r
585                                         {\r
586                                                 ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error\r
587                                                 srcIndex = index;\r
588                                         }\r
589                                         else UNREACHABLE(0);\r
590 \r
591                                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, 0, left, srcIndex);\r
592 \r
593                                         if(left->isRegister())\r
594                                         {\r
595                                                 mov->src[0].swizzle = index;\r
596                                         }\r
597                                 }\r
598                                 else UNREACHABLE(0);\r
599                         }\r
600                         break;\r
601                 case EOpIndexIndirect:\r
602                         if(visit == PostVisit)\r
603                         {\r
604                                 if(left->isArray() || left->isMatrix())\r
605                                 {\r
606                                         for(int index = 0; index < result->totalRegisterCount(); index++)\r
607                                         {\r
608                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index, left, index);\r
609                                                 mov->dst.mask = writeMask(result, index);\r
610 \r
611                                                 if(left->totalRegisterCount() > 1)\r
612                                                 {\r
613                                                         sw::Shader::SourceParameter relativeRegister;\r
614                                                         argument(relativeRegister, right);\r
615 \r
616                                                         mov->src[0].rel.type = relativeRegister.type;\r
617                                                         mov->src[0].rel.index = relativeRegister.index;\r
618                                                         mov->src[0].rel.scale = result->totalRegisterCount();\r
619                                                         mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
620                                                 }\r
621                                         }\r
622                                 }\r
623                                 else if(left->isRegister())\r
624                                 {\r
625                                         emit(sw::Shader::OPCODE_EXTRACT, result, left, right);\r
626                                 }\r
627                                 else UNREACHABLE(0);\r
628                         }\r
629                         break;\r
630                 case EOpIndexDirectStruct:\r
631                 case EOpIndexDirectInterfaceBlock:\r
632                         if(visit == PostVisit)\r
633                         {\r
634                                 ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));\r
635 \r
636                                 const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?\r
637                                                            leftType.getStruct()->fields() :\r
638                                                            leftType.getInterfaceBlock()->fields();\r
639                                 int index = right->getAsConstantUnion()->getIConst(0);\r
640                                 int fieldOffset = 0;\r
641 \r
642                                 for(int i = 0; i < index; i++)\r
643                                 {\r
644                                         fieldOffset += fields[i]->type()->totalRegisterCount();\r
645                                 }\r
646 \r
647                                 copy(result, left, fieldOffset);\r
648                         }\r
649                         break;\r
650                 case EOpVectorSwizzle:\r
651                         if(visit == PostVisit)\r
652                         {\r
653                                 int swizzle = 0;\r
654                                 TIntermAggregate *components = right->getAsAggregate();\r
655 \r
656                                 if(components)\r
657                                 {\r
658                                         TIntermSequence &sequence = components->getSequence();\r
659                                         int component = 0;\r
660 \r
661                                         for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)\r
662                                         {\r
663                                                 TIntermConstantUnion *element = (*sit)->getAsConstantUnion();\r
664 \r
665                                                 if(element)\r
666                                                 {\r
667                                                         int i = element->getUnionArrayPointer()[0].getIConst();\r
668                                                         swizzle |= i << (component * 2);\r
669                                                         component++;\r
670                                                 }\r
671                                                 else UNREACHABLE(0);\r
672                                         }\r
673                                 }\r
674                                 else UNREACHABLE(0);\r
675 \r
676                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
677                                 mov->src[0].swizzle = swizzle;\r
678                         }\r
679                         break;\r
680                 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;\r
681                 case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;\r
682                 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;\r
683                 case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;\r
684                 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;\r
685                 case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;\r
686                 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;\r
687                 case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;\r
688                 case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;\r
689                 case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;\r
690                 case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;\r
691                 case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;\r
692                 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;\r
693                 case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;\r
694                 case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;\r
695                 case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;\r
696                 case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;\r
697                 case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;\r
698                 case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;\r
699                 case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;\r
700                 case EOpEqual:\r
701                         if(visit == PostVisit)\r
702                         {\r
703                                 emitBinary(sw::Shader::OPCODE_EQ, result, left, right);\r
704 \r
705                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
706                                 {\r
707                                         Temporary equal(this);\r
708                                         emit(sw::Shader::OPCODE_EQ, &equal, 0, left, index, right, index);\r
709                                         emit(sw::Shader::OPCODE_AND, result, result, &equal);\r
710                                 }\r
711                         }\r
712                         break;\r
713                 case EOpNotEqual:\r
714                         if(visit == PostVisit)\r
715                         {\r
716                                 emitBinary(sw::Shader::OPCODE_NE, result, left, right);\r
717 \r
718                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
719                                 {\r
720                                         Temporary notEqual(this);\r
721                                         emit(sw::Shader::OPCODE_NE, &notEqual, 0, left, index, right, index);\r
722                                         emit(sw::Shader::OPCODE_OR, result, result, &notEqual);\r
723                                 }\r
724                         }\r
725                         break;\r
726                 case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;\r
727                 case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;\r
728                 case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;\r
729                 case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;\r
730                 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;\r
731                 case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;\r
732                 case EOpMatrixTimesScalar:\r
733                         if(visit == PostVisit)\r
734                         {\r
735                                 if(left->isMatrix())\r
736                                 {\r
737                                         for(int i = 0; i < leftType.getNominalSize(); i++)\r
738                                         {\r
739                                                 emit(sw::Shader::OPCODE_MUL, result, i, left, i, right, 0);\r
740                                         }\r
741                                 }\r
742                                 else if(right->isMatrix())\r
743                                 {\r
744                                         for(int i = 0; i < rightType.getNominalSize(); i++)\r
745                                         {\r
746                                                 emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);\r
747                                         }\r
748                                 }\r
749                                 else UNREACHABLE(0);\r
750                         }\r
751                         break;\r
752                 case EOpVectorTimesMatrix:\r
753                         if(visit == PostVisit)\r
754                         {\r
755                                 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());\r
756 \r
757                                 int size = rightType.getNominalSize();\r
758                                 for(int i = 0; i < size; i++)\r
759                                 {\r
760                                         Instruction *dot = emit(dpOpcode, result, 0, left, 0, right, i);\r
761                                         dot->dst.mask = 1 << i;\r
762                                 }\r
763                         }\r
764                         break;\r
765                 case EOpMatrixTimesVector:\r
766                         if(visit == PostVisit)\r
767                         {\r
768                                 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
769                                 mul->src[1].swizzle = 0x00;\r
770 \r
771                                 int size = rightType.getNominalSize();\r
772                                 for(int i = 1; i < size; i++)\r
773                                 {\r
774                                         Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, 0, left, i, right, 0, result);\r
775                                         mad->src[1].swizzle = i * 0x55;\r
776                                 }\r
777                         }\r
778                         break;\r
779                 case EOpMatrixTimesMatrix:\r
780                         if(visit == PostVisit)\r
781                         {\r
782                                 int dim = leftType.getNominalSize();\r
783 \r
784                                 int size = rightType.getNominalSize();\r
785                                 for(int i = 0; i < size; i++)\r
786                                 {\r
787                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, left, 0, right, i);\r
788                                         mul->src[1].swizzle = 0x00;\r
789 \r
790                                         for(int j = 1; j < dim; j++)\r
791                                         {\r
792                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, i, left, j, right, i, result, i);\r
793                                                 mad->src[1].swizzle = j * 0x55;\r
794                                         }\r
795                                 }\r
796                         }\r
797                         break;\r
798                 case EOpLogicalOr:\r
799                         if(trivial(right, 6))\r
800                         {\r
801                                 if(visit == PostVisit)\r
802                                 {\r
803                                         emit(sw::Shader::OPCODE_OR, result, left, right);\r
804                                 }\r
805                         }\r
806                         else   // Short-circuit evaluation\r
807                         {\r
808                                 if(visit == InVisit)\r
809                                 {\r
810                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
811                                         Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);\r
812                                         ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;\r
813                                 }\r
814                                 else if(visit == PostVisit)\r
815                                 {\r
816                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
817                                         emit(sw::Shader::OPCODE_ENDIF);\r
818                                 }\r
819                         }\r
820                         break;\r
821                 case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;\r
822                 case EOpLogicalAnd:\r
823                         if(trivial(right, 6))\r
824                         {\r
825                                 if(visit == PostVisit)\r
826                                 {\r
827                                         emit(sw::Shader::OPCODE_AND, result, left, right);\r
828                                 }\r
829                         }\r
830                         else   // Short-circuit evaluation\r
831                         {\r
832                                 if(visit == InVisit)\r
833                                 {\r
834                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
835                                         emit(sw::Shader::OPCODE_IF, 0, result);\r
836                                 }\r
837                                 else if(visit == PostVisit)\r
838                                 {\r
839                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
840                                         emit(sw::Shader::OPCODE_ENDIF);\r
841                                 }\r
842                         }\r
843                         break;\r
844                 default: UNREACHABLE(node->getOp());\r
845                 }\r
846 \r
847                 return true;\r
848         }\r
849 \r
850         void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)\r
851         {\r
852                 switch(size)\r
853                 {\r
854                 case 1: // Used for cofactor computation only\r
855                         {\r
856                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
857                                 bool isMov = (row == col);\r
858                                 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;\r
859                                 Instruction *mov = emit(op, result, outCol, arg, isMov ? 1 - row : row);\r
860                                 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);\r
861                                 mov->dst.mask = 1 << outRow;\r
862                         }\r
863                         break;\r
864                 case 2:\r
865                         {\r
866                                 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy\r
867 \r
868                                 bool isCofactor = (col >= 0) && (row >= 0);\r
869                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
870                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
871                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
872 \r
873                                 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, outCol, arg, negate ? col1 : col0, arg, negate ? col0 : col1);\r
874                                 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];\r
875                                 det->dst.mask = 1 << outRow;\r
876                         }\r
877                         break;\r
878                 case 3:\r
879                         {\r
880                                 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw\r
881 \r
882                                 bool isCofactor = (col >= 0) && (row >= 0);\r
883                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
884                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
885                                 int col2 = (isCofactor && (col <= 2)) ? 3 : 2;\r
886                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
887 \r
888                                 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, outCol, arg, col0, arg, negate ? col2 : col1, arg, negate ? col1 : col2);\r
889                                 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];\r
890                                 det->dst.mask = 1 << outRow;\r
891                         }\r
892                         break;\r
893                 case 4:\r
894                         {\r
895                                 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, outCol, arg, 0, arg, 1, arg, 2, arg, 3);\r
896                                 det->dst.mask = 1 << outRow;\r
897                         }\r
898                         break;\r
899                 default:\r
900                         UNREACHABLE(size);\r
901                         break;\r
902                 }\r
903         }\r
904 \r
905         bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)\r
906         {\r
907                 if(currentScope != emitScope)\r
908                 {\r
909                         return false;\r
910                 }\r
911 \r
912                 TIntermTyped *result = node;\r
913                 TIntermTyped *arg = node->getOperand();\r
914                 TBasicType basicType = arg->getType().getBasicType();\r
915 \r
916                 union\r
917                 {\r
918                         float f;\r
919                         int i;\r
920                 } one_value;\r
921 \r
922                 if(basicType == EbtInt || basicType == EbtUInt)\r
923                 {\r
924                         one_value.i = 1;\r
925                 }\r
926                 else\r
927                 {\r
928                         one_value.f = 1.0f;\r
929                 }\r
930 \r
931                 Constant one(one_value.f, one_value.f, one_value.f, one_value.f);\r
932                 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);\r
933                 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);\r
934 \r
935                 switch(node->getOp())\r
936                 {\r
937                 case EOpNegative:\r
938                         if(visit == PostVisit)\r
939                         {\r
940                                 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);\r
941                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
942                                 {\r
943                                         emit(negOpcode, result, index, arg, index);\r
944                                 }\r
945                         }\r
946                         break;\r
947                 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
948                 case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
949                 case EOpPostIncrement:\r
950                         if(visit == PostVisit)\r
951                         {\r
952                                 copy(result, arg);\r
953 \r
954                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
955                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
956                                 {\r
957                                         emit(addOpcode, arg, index, arg, index, &one);\r
958                                 }\r
959 \r
960                                 assignLvalue(arg, arg);\r
961                         }\r
962                         break;\r
963                 case EOpPostDecrement:\r
964                         if(visit == PostVisit)\r
965                         {\r
966                                 copy(result, arg);\r
967 \r
968                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
969                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
970                                 {\r
971                                         emit(subOpcode, arg, index, arg, index, &one);\r
972                                 }\r
973 \r
974                                 assignLvalue(arg, arg);\r
975                         }\r
976                         break;\r
977                 case EOpPreIncrement:\r
978                         if(visit == PostVisit)\r
979                         {\r
980                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
981                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
982                                 {\r
983                                         emit(addOpcode, result, index, arg, index, &one);\r
984                                 }\r
985 \r
986                                 assignLvalue(arg, result);\r
987                         }\r
988                         break;\r
989                 case EOpPreDecrement:\r
990                         if(visit == PostVisit)\r
991                         {\r
992                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
993                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
994                                 {\r
995                                         emit(subOpcode, result, index, arg, index, &one);\r
996                                 }\r
997 \r
998                                 assignLvalue(arg, result);\r
999                         }\r
1000                         break;\r
1001                 case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;\r
1002                 case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;\r
1003                 case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;\r
1004                 case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;\r
1005                 case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;\r
1006                 case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;\r
1007                 case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;\r
1008                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;\r
1009                 case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;\r
1010                 case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;\r
1011                 case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;\r
1012                 case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;\r
1013                 case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;\r
1014                 case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;\r
1015                 case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;\r
1016                 case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;\r
1017                 case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;\r
1018                 case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;\r
1019                 case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;\r
1020                 case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;\r
1021                 case EOpAbs:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_ABS, result), result, arg); break;\r
1022                 case EOpSign:             if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_SGN, result), result, arg); break;\r
1023                 case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;\r
1024                 case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;\r
1025                 case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;\r
1026                 case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;\r
1027                 case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;\r
1028                 case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;\r
1029                 case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;\r
1030                 case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;\r
1031                 case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;\r
1032                 case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;\r
1033                 case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;\r
1034                 case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;\r
1035                 case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;\r
1036                 case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;\r
1037                 case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;\r
1038                 case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;\r
1039                 case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;\r
1040                 case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;\r
1041                 case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;\r
1042                 case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;\r
1043                 case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;\r
1044                 case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;\r
1045                 case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;\r
1046                 case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;\r
1047                 case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;\r
1048                 case EOpTranspose:\r
1049                         if(visit == PostVisit)\r
1050                         {\r
1051                                 int numCols = arg->getNominalSize();\r
1052                                 int numRows = arg->getSecondarySize();\r
1053                                 for(int i = 0; i < numCols; ++i)\r
1054                                 {\r
1055                                         for(int j = 0; j < numRows; ++j)\r
1056                                         {\r
1057                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, j, arg, i);\r
1058                                                 mov->src[0].swizzle = 0x55 * j;\r
1059                                                 mov->dst.mask = 1 << i;\r
1060                                         }\r
1061                                 }\r
1062                         }\r
1063                         break;\r
1064                 case EOpDeterminant:\r
1065                         if(visit == PostVisit)\r
1066                         {\r
1067                                 int size = arg->getNominalSize();\r
1068                                 ASSERT(size == arg->getSecondarySize());\r
1069 \r
1070                                 emitDeterminant(result, arg, size);\r
1071                         }\r
1072                         break;\r
1073                 case EOpInverse:\r
1074                         if(visit == PostVisit)\r
1075                         {\r
1076                                 int size = arg->getNominalSize();\r
1077                                 ASSERT(size == arg->getSecondarySize());\r
1078 \r
1079                                 // Compute transposed matrix of cofactors\r
1080                                 for(int i = 0; i < size; ++i)\r
1081                                 {\r
1082                                         for(int j = 0; j < size; ++j)\r
1083                                         {\r
1084                                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
1085                                                 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant\r
1086                                                 emitDeterminant(result, arg, size - 1, j, i, i, j);\r
1087                                         }\r
1088                                 }\r
1089 \r
1090                                 // Compute 1 / determinant\r
1091                                 Temporary invDet(this);\r
1092                                 emitDeterminant(&invDet, arg, size);\r
1093                                 Constant one(1.0f, 1.0f, 1.0f, 1.0f);\r
1094                                 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);\r
1095                                 div->src[1].swizzle = 0x00; // xxxx\r
1096 \r
1097                                 // Divide transposed matrix of cofactors by determinant\r
1098                                 for(int i = 0; i < size; ++i)\r
1099                                 {\r
1100                                         emit(sw::Shader::OPCODE_MUL, result, i, result, i, &invDet);\r
1101                                 }\r
1102                         }\r
1103                         break;\r
1104                 default: UNREACHABLE(node->getOp());\r
1105                 }\r
1106 \r
1107                 return true;\r
1108         }\r
1109 \r
1110         bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)\r
1111         {\r
1112                 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)\r
1113                 {\r
1114                         return false;\r
1115                 }\r
1116 \r
1117                 Constant zero(0.0f, 0.0f, 0.0f, 0.0f);\r
1118 \r
1119                 TIntermTyped *result = node;\r
1120                 const TType &resultType = node->getType();\r
1121                 TIntermSequence &arg = node->getSequence();\r
1122                 size_t argumentCount = arg.size();\r
1123 \r
1124                 switch(node->getOp())\r
1125                 {\r
1126                 case EOpSequence:             break;\r
1127                 case EOpDeclaration:          break;\r
1128                 case EOpInvariantDeclaration: break;\r
1129                 case EOpPrototype:            break;\r
1130                 case EOpComma:\r
1131                         if(visit == PostVisit)\r
1132                         {\r
1133                                 copy(result, arg[1]);\r
1134                         }\r
1135                         break;\r
1136                 case EOpFunction:\r
1137                         if(visit == PreVisit)\r
1138                         {\r
1139                                 const TString &name = node->getName();\r
1140 \r
1141                                 if(emitScope == FUNCTION)\r
1142                                 {\r
1143                                         if(functionArray.size() > 1)   // No need for a label when there's only main()\r
1144                                         {\r
1145                                                 Instruction *label = emit(sw::Shader::OPCODE_LABEL);\r
1146                                                 label->dst.type = sw::Shader::PARAMETER_LABEL;\r
1147 \r
1148                                                 const Function *function = findFunction(name);\r
1149                                                 ASSERT(function);   // Should have been added during global pass\r
1150                                                 label->dst.index = function->label;\r
1151                                                 currentFunction = function->label;\r
1152                                         }\r
1153                                 }\r
1154                                 else if(emitScope == GLOBAL)\r
1155                                 {\r
1156                                         if(name != "main(")\r
1157                                         {\r
1158                                                 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();\r
1159                                                 functionArray.push_back(Function(functionArray.size(), name, &arguments, node));\r
1160                                         }\r
1161                                 }\r
1162                                 else UNREACHABLE(emitScope);\r
1163 \r
1164                                 currentScope = FUNCTION;\r
1165                         }\r
1166                         else if(visit == PostVisit)\r
1167                         {\r
1168                                 if(emitScope == FUNCTION)\r
1169                                 {\r
1170                                         if(functionArray.size() > 1)   // No need to return when there's only main()\r
1171                                         {\r
1172                                                 emit(sw::Shader::OPCODE_RET);\r
1173                                         }\r
1174                                 }\r
1175 \r
1176                                 currentScope = GLOBAL;\r
1177                         }\r
1178                         break;\r
1179                 case EOpFunctionCall:\r
1180                         if(visit == PostVisit)\r
1181                         {\r
1182                                 if(node->isUserDefined())\r
1183                                 {\r
1184                                         const TString &name = node->getName();\r
1185                                         const Function *function = findFunction(name);\r
1186 \r
1187                                         if(!function)\r
1188                                         {\r
1189                                                 mContext.error(node->getLine(), "function definition not found", name.c_str());\r
1190                                                 return false;\r
1191                                         }\r
1192 \r
1193                                         TIntermSequence &arguments = *function->arg;\r
1194 \r
1195                                         for(size_t i = 0; i < argumentCount; i++)\r
1196                                         {\r
1197                                                 TIntermTyped *in = arguments[i]->getAsTyped();\r
1198 \r
1199                                                 if(in->getQualifier() == EvqIn ||\r
1200                                                    in->getQualifier() == EvqInOut ||\r
1201                                                    in->getQualifier() == EvqConstReadOnly)\r
1202                                                 {\r
1203                                                         copy(in, arg[i]);\r
1204                                                 }\r
1205                                         }\r
1206 \r
1207                                         Instruction *call = emit(sw::Shader::OPCODE_CALL);\r
1208                                         call->dst.type = sw::Shader::PARAMETER_LABEL;\r
1209                                         call->dst.index = function->label;\r
1210 \r
1211                                         if(function->ret && function->ret->getType().getBasicType() != EbtVoid)\r
1212                                         {\r
1213                                                 copy(result, function->ret);\r
1214                                         }\r
1215 \r
1216                                         for(size_t i = 0; i < argumentCount; i++)\r
1217                                         {\r
1218                                                 TIntermTyped *argument = arguments[i]->getAsTyped();\r
1219                                                 TIntermTyped *out = arg[i]->getAsTyped();\r
1220 \r
1221                                                 if(argument->getQualifier() == EvqOut ||\r
1222                                                    argument->getQualifier() == EvqInOut)\r
1223                                                 {\r
1224                                                         copy(out, argument);\r
1225                                                 }\r
1226                                         }\r
1227                                 }\r
1228                                 else\r
1229                                 {\r
1230                                         const TextureFunction textureFunction(node->getName());\r
1231                                         TIntermTyped *t = arg[1]->getAsTyped();\r
1232 \r
1233                                         Temporary coord(this);\r
1234 \r
1235                                         if(textureFunction.proj)\r
1236                                         {\r
1237                                                 TIntermConstantUnion* constant = arg[1]->getAsConstantUnion();\r
1238                                                 if(constant)\r
1239                                                 {\r
1240                                                         float projFactor = 1.0f / constant->getFConst(t->getNominalSize() - 1);\r
1241                                                         Constant projCoord(constant->getFConst(0) * projFactor,\r
1242                                                                            constant->getFConst(1) * projFactor,\r
1243                                                                            constant->getFConst(2) * projFactor,\r
1244                                                                            0.0f);\r
1245                                                         emit(sw::Shader::OPCODE_MOV, &coord, &projCoord);\r
1246                                                 }\r
1247                                                 else\r
1248                                                 {\r
1249                                                         Instruction *rcp = emit(sw::Shader::OPCODE_RCPX, &coord, arg[1]);\r
1250                                                         rcp->src[0].swizzle = 0x55 * (t->getNominalSize() - 1);\r
1251                                                         rcp->dst.mask = 0x7;\r
1252 \r
1253                                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, &coord, arg[1], &coord);\r
1254                                                         mul->dst.mask = 0x7;\r
1255                                                 }\r
1256                                         }\r
1257                                         else\r
1258                                         {\r
1259                                                 emit(sw::Shader::OPCODE_MOV, &coord, arg[1]);\r
1260                                         }\r
1261 \r
1262                                         switch(textureFunction.method)\r
1263                                         {\r
1264                                         case TextureFunction::IMPLICIT:\r
1265                                                 {\r
1266                                                         TIntermNode* offset = textureFunction.offset ? arg[2] : 0;\r
1267 \r
1268                                                         if(argumentCount == 2 || (textureFunction.offset && argumentCount == 3))\r
1269                                                         {\r
1270                                                                 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,\r
1271                                                                                         result, &coord, arg[0], offset);\r
1272                                                         }\r
1273                                                         else if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))   // bias\r
1274                                                         {\r
1275                                                                 Instruction *bias = emit(sw::Shader::OPCODE_MOV, &coord, arg[textureFunction.offset ? 3 : 2]);\r
1276                                                                 bias->dst.mask = 0x8;\r
1277 \r
1278                                                                 Instruction *tex = emit(textureFunction.offset ? sw::Shader::OPCODE_TEXOFFSET : sw::Shader::OPCODE_TEX,\r
1279                                                                                         result, &coord, arg[0], offset); // FIXME: Implement an efficient TEXLDB instruction\r
1280                                                                 tex->bias = true;\r
1281                                                         }\r
1282                                                         else UNREACHABLE(argumentCount);\r
1283                                                 }\r
1284                                                 break;\r
1285                                         case TextureFunction::LOD:\r
1286                                                 {\r
1287                                                         Instruction *lod = emit(sw::Shader::OPCODE_MOV, &coord, arg[2]);\r
1288                                                         lod->dst.mask = 0x8;\r
1289 \r
1290                                                         emit(textureFunction.offset ? sw::Shader::OPCODE_TEXLDLOFFSET : sw::Shader::OPCODE_TEXLDL,\r
1291                                                              result, &coord, arg[0], textureFunction.offset ? arg[3] : nullptr);\r
1292                                                 }\r
1293                                                 break;\r
1294                                         case TextureFunction::FETCH:\r
1295                                                 {\r
1296                                                         if(argumentCount == 3 || (textureFunction.offset && argumentCount == 4))\r
1297                                                         {\r
1298                                                                 TIntermNode *offset = textureFunction.offset ? arg[3] : nullptr;\r
1299 \r
1300                                                                 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXELFETCHOFFSET : sw::Shader::OPCODE_TEXELFETCH,\r
1301                                                                      result, arg[1], arg[0], arg[2], offset);\r
1302                                                         }\r
1303                                                         else UNREACHABLE(argumentCount);\r
1304                                                 }\r
1305                                                 break;\r
1306                                         case TextureFunction::GRAD:\r
1307                                                 {\r
1308                                                         if(argumentCount == 4 || (textureFunction.offset && argumentCount == 5))\r
1309                                                         {\r
1310                                                                 TIntermNode *offset = textureFunction.offset ? arg[4] : nullptr;\r
1311 \r
1312                                                                 emit(textureFunction.offset ? sw::Shader::OPCODE_TEXGRADOFFSET : sw::Shader::OPCODE_TEXGRAD,\r
1313                                                                      result, &coord, arg[0], arg[2], arg[3], offset);\r
1314                                                         }\r
1315                                                         else UNREACHABLE(argumentCount);\r
1316                                                 }\r
1317                                                 break;\r
1318                                         case TextureFunction::SIZE:\r
1319                                                 emit(sw::Shader::OPCODE_TEXSIZE, result, arg[1], arg[0]);\r
1320                                                 break;\r
1321                                         default:\r
1322                                                 UNREACHABLE(textureFunction.method);\r
1323                                         }\r
1324                                 }\r
1325                         }\r
1326                         break;\r
1327                 case EOpParameters:\r
1328                         break;\r
1329                 case EOpConstructFloat:\r
1330                 case EOpConstructVec2:\r
1331                 case EOpConstructVec3:\r
1332                 case EOpConstructVec4:\r
1333                 case EOpConstructBool:\r
1334                 case EOpConstructBVec2:\r
1335                 case EOpConstructBVec3:\r
1336                 case EOpConstructBVec4:\r
1337                 case EOpConstructInt:\r
1338                 case EOpConstructIVec2:\r
1339                 case EOpConstructIVec3:\r
1340                 case EOpConstructIVec4:\r
1341                 case EOpConstructUInt:\r
1342                 case EOpConstructUVec2:\r
1343                 case EOpConstructUVec3:\r
1344                 case EOpConstructUVec4:\r
1345                         if(visit == PostVisit)\r
1346                         {\r
1347                                 int component = 0;\r
1348 \r
1349                                 for(size_t i = 0; i < argumentCount; i++)\r
1350                                 {\r
1351                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1352                                         int size = argi->getNominalSize();\r
1353 \r
1354                                         if(!argi->isMatrix())\r
1355                                         {\r
1356                                                 Instruction *mov = emitCast(result, argi);\r
1357                                                 mov->dst.mask = (0xF << component) & 0xF;\r
1358                                                 mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1359 \r
1360                                                 component += size;\r
1361                                         }\r
1362                                         else   // Matrix\r
1363                                         {\r
1364                                                 int column = 0;\r
1365 \r
1366                                                 while(component < resultType.getNominalSize())\r
1367                                                 {\r
1368                                                         Instruction *mov = emitCast(result, 0, argi, column);\r
1369                                                         mov->dst.mask = (0xF << component) & 0xF;\r
1370                                                         mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1371 \r
1372                                                         column++;\r
1373                                                         component += size;\r
1374                                                 }\r
1375                                         }\r
1376                                 }\r
1377                         }\r
1378                         break;\r
1379                 case EOpConstructMat2:\r
1380                 case EOpConstructMat2x3:\r
1381                 case EOpConstructMat2x4:\r
1382                 case EOpConstructMat3x2:\r
1383                 case EOpConstructMat3:\r
1384                 case EOpConstructMat3x4:\r
1385                 case EOpConstructMat4x2:\r
1386                 case EOpConstructMat4x3:\r
1387                 case EOpConstructMat4:\r
1388                         if(visit == PostVisit)\r
1389                         {\r
1390                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1391                                 const int outCols = result->getNominalSize();\r
1392                                 const int outRows = result->getSecondarySize();\r
1393 \r
1394                                 if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix\r
1395                                 {\r
1396                                         for(int i = 0; i < outCols; i++)\r
1397                                         {\r
1398                                                 Instruction *init = emit(sw::Shader::OPCODE_MOV, result, i, &zero);\r
1399                                                 Instruction *mov = emitCast(result, i, arg0, 0);\r
1400                                                 mov->dst.mask = 1 << i;\r
1401                                                 ASSERT(mov->src[0].swizzle == 0x00);\r
1402                                         }\r
1403                                 }\r
1404                                 else if(arg0->isMatrix())\r
1405                                 {\r
1406                                         const int inCols = arg0->getNominalSize();\r
1407                                         const int inRows = arg0->getSecondarySize();\r
1408 \r
1409                                         for(int i = 0; i < outCols; i++)\r
1410                                         {\r
1411                                                 if(i >= inCols || outRows > inRows)\r
1412                                                 {\r
1413                                                         // Initialize to identity matrix\r
1414                                                         Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));\r
1415                                                         Instruction *mov = emitCast(result, i, &col, 0);\r
1416                                                 }\r
1417 \r
1418                                                 if(i < inCols)\r
1419                                                 {\r
1420                                                         Instruction *mov = emitCast(result, i, arg0, i);\r
1421                                                         mov->dst.mask = 0xF >> (4 - inRows);\r
1422                                                 }\r
1423                                         }\r
1424                                 }\r
1425                                 else\r
1426                                 {\r
1427                                         int column = 0;\r
1428                                         int row = 0;\r
1429 \r
1430                                         for(size_t i = 0; i < argumentCount; i++)\r
1431                                         {\r
1432                                                 TIntermTyped *argi = arg[i]->getAsTyped();\r
1433                                                 int size = argi->getNominalSize();\r
1434                                                 int element = 0;\r
1435 \r
1436                                                 while(element < size)\r
1437                                                 {\r
1438                                                         Instruction *mov = emitCast(result, column, argi, 0);\r
1439                                                         mov->dst.mask = (0xF << row) & 0xF;\r
1440                                                         mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;\r
1441 \r
1442                                                         int end = row + size - element;\r
1443                                                         column = end >= outRows ? column + 1 : column;\r
1444                                                         element = element + outRows - row;\r
1445                                                         row = end >= outRows ? 0 : end;\r
1446                                                 }\r
1447                                         }\r
1448                                 }\r
1449                         }\r
1450                         break;\r
1451                 case EOpConstructStruct:\r
1452                         if(visit == PostVisit)\r
1453                         {\r
1454                                 int offset = 0;\r
1455                                 for(size_t i = 0; i < argumentCount; i++)\r
1456                                 {\r
1457                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1458                                         int size = argi->totalRegisterCount();\r
1459 \r
1460                                         for(int index = 0; index < size; index++)\r
1461                                         {\r
1462                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, index + offset, argi, index);\r
1463                                                 mov->dst.mask = writeMask(result, offset + index);\r
1464                                         }\r
1465 \r
1466                                         offset += size;\r
1467                                 }\r
1468                         }\r
1469                         break;\r
1470                 case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;\r
1471                 case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;\r
1472                 case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;\r
1473                 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;\r
1474                 case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;\r
1475                 case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;\r
1476                 case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;\r
1477                 case EOpModf:\r
1478                         if(visit == PostVisit)\r
1479                         {\r
1480                                 TIntermTyped* arg1 = arg[1]->getAsTyped();\r
1481                                 emit(sw::Shader::OPCODE_TRUNC, arg1, arg[0]);\r
1482                                 assignLvalue(arg1, arg1);\r
1483                                 emitBinary(sw::Shader::OPCODE_SUB, result, arg[0], arg1);\r
1484                         }\r
1485                         break;\r
1486                 case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;\r
1487                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;\r
1488                 case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;\r
1489                 case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;\r
1490                 case EOpClamp:\r
1491                         if(visit == PostVisit)\r
1492                         {\r
1493                                 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);\r
1494                                 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);\r
1495                         }\r
1496                         break;\r
1497                 case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;\r
1498                 case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;\r
1499                 case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;\r
1500                 case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;\r
1501                 case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;\r
1502                 case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;\r
1503                 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1504                 case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;\r
1505                 case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1506                 case EOpMul:\r
1507                         if(visit == PostVisit)\r
1508                         {\r
1509                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1510                                 TIntermTyped *arg1 = arg[1]->getAsTyped();\r
1511                                 ASSERT((arg0->getNominalSize() == arg1->getNominalSize()) && (arg0->getSecondarySize() == arg1->getSecondarySize()));\r
1512 \r
1513                                 int size = arg0->getNominalSize();\r
1514                                 for(int i = 0; i < size; i++)\r
1515                                 {\r
1516                                         emit(sw::Shader::OPCODE_MUL, result, i, arg[0], i, arg[1], i);\r
1517                                 }\r
1518                         }\r
1519                         break;\r
1520                 case EOpOuterProduct:\r
1521                         if(visit == PostVisit)\r
1522                         {\r
1523                                 for(int i = 0; i < dim(arg[1]); i++)\r
1524                                 {\r
1525                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, i, arg[0], 0, arg[1]);\r
1526                                         mul->src[1].swizzle = 0x55 * i;\r
1527                                 }\r
1528                         }\r
1529                         break;\r
1530                 default: UNREACHABLE(node->getOp());\r
1531                 }\r
1532 \r
1533                 return true;\r
1534         }\r
1535 \r
1536         bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)\r
1537         {\r
1538                 if(currentScope != emitScope)\r
1539                 {\r
1540                         return false;\r
1541                 }\r
1542 \r
1543                 TIntermTyped *condition = node->getCondition();\r
1544                 TIntermNode *trueBlock = node->getTrueBlock();\r
1545                 TIntermNode *falseBlock = node->getFalseBlock();\r
1546                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
1547 \r
1548                 condition->traverse(this);\r
1549 \r
1550                 if(node->usesTernaryOperator())\r
1551                 {\r
1552                         if(constantCondition)\r
1553                         {\r
1554                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1555 \r
1556                                 if(trueCondition)\r
1557                                 {\r
1558                                         trueBlock->traverse(this);\r
1559                                         copy(node, trueBlock);\r
1560                                 }\r
1561                                 else\r
1562                                 {\r
1563                                         falseBlock->traverse(this);\r
1564                                         copy(node, falseBlock);\r
1565                                 }\r
1566                         }\r
1567                         else if(trivial(node, 6))   // Fast to compute both potential results and no side effects\r
1568                         {\r
1569                                 trueBlock->traverse(this);\r
1570                                 falseBlock->traverse(this);\r
1571                                 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);\r
1572                         }\r
1573                         else\r
1574                         {\r
1575                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1576 \r
1577                                 if(trueBlock)\r
1578                                 {\r
1579                                         trueBlock->traverse(this);\r
1580                                         copy(node, trueBlock);\r
1581                                 }\r
1582 \r
1583                                 if(falseBlock)\r
1584                                 {\r
1585                                         emit(sw::Shader::OPCODE_ELSE);\r
1586                                         falseBlock->traverse(this);\r
1587                                         copy(node, falseBlock);\r
1588                                 }\r
1589 \r
1590                                 emit(sw::Shader::OPCODE_ENDIF);\r
1591                         }\r
1592                 }\r
1593                 else  // if/else statement\r
1594                 {\r
1595                         if(constantCondition)\r
1596                         {\r
1597                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1598 \r
1599                                 if(trueCondition)\r
1600                                 {\r
1601                                         if(trueBlock)\r
1602                                         {\r
1603                                                 trueBlock->traverse(this);\r
1604                                         }\r
1605                                 }\r
1606                                 else\r
1607                                 {\r
1608                                         if(falseBlock)\r
1609                                         {\r
1610                                                 falseBlock->traverse(this);\r
1611                                         }\r
1612                                 }\r
1613                         }\r
1614                         else\r
1615                         {\r
1616                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1617 \r
1618                                 if(trueBlock)\r
1619                                 {\r
1620                                         trueBlock->traverse(this);\r
1621                                 }\r
1622 \r
1623                                 if(falseBlock)\r
1624                                 {\r
1625                                         emit(sw::Shader::OPCODE_ELSE);\r
1626                                         falseBlock->traverse(this);\r
1627                                 }\r
1628 \r
1629                                 emit(sw::Shader::OPCODE_ENDIF);\r
1630                         }\r
1631                 }\r
1632 \r
1633                 return false;\r
1634         }\r
1635 \r
1636         bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)\r
1637         {\r
1638                 if(currentScope != emitScope)\r
1639                 {\r
1640                         return false;\r
1641                 }\r
1642 \r
1643                 unsigned int iterations = loopCount(node);\r
1644 \r
1645                 if(iterations == 0)\r
1646                 {\r
1647                         return false;\r
1648                 }\r
1649 \r
1650                 bool unroll = (iterations <= 4);\r
1651 \r
1652                 if(unroll)\r
1653                 {\r
1654                         LoopUnrollable loopUnrollable;\r
1655                         unroll = loopUnrollable.traverse(node);\r
1656                 }\r
1657 \r
1658                 TIntermNode *init = node->getInit();\r
1659                 TIntermTyped *condition = node->getCondition();\r
1660                 TIntermTyped *expression = node->getExpression();\r
1661                 TIntermNode *body = node->getBody();\r
1662                 Constant True(true);\r
1663 \r
1664                 if(node->getType() == ELoopDoWhile)\r
1665                 {\r
1666                         Temporary iterate(this);\r
1667                         emit(sw::Shader::OPCODE_MOV, &iterate, &True);\r
1668 \r
1669                         emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while\r
1670 \r
1671                         if(body)\r
1672                         {\r
1673                                 body->traverse(this);\r
1674                         }\r
1675 \r
1676                         emit(sw::Shader::OPCODE_TEST);\r
1677 \r
1678                         condition->traverse(this);\r
1679                         emit(sw::Shader::OPCODE_MOV, &iterate, condition);\r
1680 \r
1681                         emit(sw::Shader::OPCODE_ENDWHILE);\r
1682                 }\r
1683                 else\r
1684                 {\r
1685                         if(init)\r
1686                         {\r
1687                                 init->traverse(this);\r
1688                         }\r
1689 \r
1690                         if(unroll)\r
1691                         {\r
1692                                 for(unsigned int i = 0; i < iterations; i++)\r
1693                                 {\r
1694                                 //      condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop\r
1695 \r
1696                                         if(body)\r
1697                                         {\r
1698                                                 body->traverse(this);\r
1699                                         }\r
1700 \r
1701                                         if(expression)\r
1702                                         {\r
1703                                                 expression->traverse(this);\r
1704                                         }\r
1705                                 }\r
1706                         }\r
1707                         else\r
1708                         {\r
1709                                 if(condition)\r
1710                                 {\r
1711                                         condition->traverse(this);\r
1712                                 }\r
1713                                 else\r
1714                                 {\r
1715                                         condition = &True;\r
1716                                 }\r
1717 \r
1718                                 emit(sw::Shader::OPCODE_WHILE, 0, condition);\r
1719 \r
1720                                 if(body)\r
1721                                 {\r
1722                                         body->traverse(this);\r
1723                                 }\r
1724 \r
1725                                 emit(sw::Shader::OPCODE_TEST);\r
1726 \r
1727                                 if(expression)\r
1728                                 {\r
1729                                         expression->traverse(this);\r
1730                                 }\r
1731 \r
1732                                 if(condition)\r
1733                                 {\r
1734                                         condition->traverse(this);\r
1735                                 }\r
1736 \r
1737                                 emit(sw::Shader::OPCODE_ENDWHILE);\r
1738                         }\r
1739                 }\r
1740 \r
1741                 return false;\r
1742         }\r
1743 \r
1744         bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)\r
1745         {\r
1746                 if(currentScope != emitScope)\r
1747                 {\r
1748                         return false;\r
1749                 }\r
1750 \r
1751                 switch(node->getFlowOp())\r
1752                 {\r
1753                 case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;\r
1754                 case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;\r
1755                 case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;\r
1756                 case EOpReturn:\r
1757                         if(visit == PostVisit)\r
1758                         {\r
1759                                 TIntermTyped *value = node->getExpression();\r
1760 \r
1761                                 if(value)\r
1762                                 {\r
1763                                         copy(functionArray[currentFunction].ret, value);\r
1764                                 }\r
1765 \r
1766                                 emit(sw::Shader::OPCODE_LEAVE);\r
1767                         }\r
1768                         break;\r
1769                 default: UNREACHABLE(node->getFlowOp());\r
1770                 }\r
1771 \r
1772                 return true;\r
1773         }\r
1774 \r
1775         Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, TIntermNode *src4)\r
1776         {\r
1777                 return emit(op, dst, 0, src0, 0, src1, 0, src2, 0, src3, 0, src4, 0);\r
1778         }\r
1779 \r
1780         Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, int dstIndex, TIntermNode *src0, int index0, TIntermNode *src1, int index1,\r
1781                                      TIntermNode *src2, int index2, TIntermNode *src3, int index3, TIntermNode *src4, int index4)\r
1782         {\r
1783                 Instruction *instruction = new Instruction(op);\r
1784 \r
1785                 if(dst)\r
1786                 {\r
1787                         instruction->dst.type = registerType(dst);\r
1788                         instruction->dst.index = registerIndex(dst) + dstIndex;\r
1789                         instruction->dst.mask = writeMask(dst);\r
1790                         instruction->dst.integer = (dst->getBasicType() == EbtInt);\r
1791                 }\r
1792 \r
1793                 argument(instruction->src[0], src0, index0);\r
1794                 argument(instruction->src[1], src1, index1);\r
1795                 argument(instruction->src[2], src2, index2);\r
1796                 argument(instruction->src[3], src3, index3);\r
1797                 argument(instruction->src[4], src4, index4);\r
1798 \r
1799                 shader->append(instruction);\r
1800 \r
1801                 return instruction;\r
1802         }\r
1803 \r
1804         Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)\r
1805         {\r
1806                 return emitCast(dst, 0, src, 0);\r
1807         }\r
1808 \r
1809         Instruction *OutputASM::emitCast(TIntermTyped *dst, int dstIndex, TIntermTyped *src, int srcIndex)\r
1810         {\r
1811                 switch(src->getBasicType())\r
1812                 {\r
1813                 case EbtBool:\r
1814                         switch(dst->getBasicType())\r
1815                         {\r
1816                         case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);\r
1817                         case EbtUInt:  return emit(sw::Shader::OPCODE_B2I, dst, dstIndex, src, srcIndex);\r
1818                         case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, dstIndex, src, srcIndex);\r
1819                         default:       break;\r
1820                         }\r
1821                         break;\r
1822                 case EbtInt:\r
1823                         switch(dst->getBasicType())\r
1824                         {\r
1825                         case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);\r
1826                         case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, dstIndex, src, srcIndex);\r
1827                         default:       break;\r
1828                         }\r
1829                         break;\r
1830                 case EbtUInt:\r
1831                         switch(dst->getBasicType())\r
1832                         {\r
1833                         case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, dstIndex, src, srcIndex);\r
1834                         case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, dstIndex, src, srcIndex);\r
1835                         default:       break;\r
1836                         }\r
1837                         break;\r
1838                 case EbtFloat:\r
1839                         switch(dst->getBasicType())\r
1840                         {\r
1841                         case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, dstIndex, src, srcIndex);\r
1842                         case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, dstIndex, src, srcIndex);\r
1843                         case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, dstIndex, src, srcIndex);\r
1844                         default:      break;\r
1845                         }\r
1846                         break;\r
1847                 default:\r
1848                         break;\r
1849                 }\r
1850 \r
1851                 ASSERT((src->getBasicType() == dst->getBasicType()) ||\r
1852                       ((src->getBasicType() == EbtInt) && (dst->getBasicType() == EbtUInt)) ||\r
1853                       ((src->getBasicType() == EbtUInt) && (dst->getBasicType() == EbtInt)));\r
1854 \r
1855                 return emit(sw::Shader::OPCODE_MOV, dst, dstIndex, src, srcIndex);\r
1856         }\r
1857 \r
1858         void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)\r
1859         {\r
1860                 for(int index = 0; index < dst->elementRegisterCount(); index++)\r
1861                 {\r
1862                         emit(op, dst, index, src0, index, src1, index, src2, index);\r
1863                 }\r
1864         }\r
1865 \r
1866         void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)\r
1867         {\r
1868                 emitBinary(op, result, src0, src1);\r
1869                 assignLvalue(lhs, result);\r
1870         }\r
1871 \r
1872         void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)\r
1873         {\r
1874                 sw::Shader::Opcode opcode;\r
1875                 switch(left->getAsTyped()->getBasicType())\r
1876                 {\r
1877                 case EbtBool:\r
1878                 case EbtInt:\r
1879                         opcode = sw::Shader::OPCODE_ICMP;\r
1880                         break;\r
1881                 case EbtUInt:\r
1882                         opcode = sw::Shader::OPCODE_UCMP;\r
1883                         break;\r
1884                 default:\r
1885                         opcode = sw::Shader::OPCODE_CMP;\r
1886                         break;\r
1887                 }\r
1888 \r
1889                 Instruction *cmp = emit(opcode, dst, 0, left, index, right, index);\r
1890                 cmp->control = cmpOp;\r
1891         }\r
1892 \r
1893         int componentCount(const TType &type, int registers)\r
1894         {\r
1895                 if(registers == 0)\r
1896                 {\r
1897                         return 0;\r
1898                 }\r
1899 \r
1900                 if(type.isArray() && registers >= type.elementRegisterCount())\r
1901                 {\r
1902                         int index = registers / type.elementRegisterCount();\r
1903                         registers -= index * type.elementRegisterCount();\r
1904                         return index * type.getElementSize() + componentCount(type, registers);\r
1905                 }\r
1906 \r
1907                 if(type.isStruct() || type.isInterfaceBlock())\r
1908                 {\r
1909                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
1910                         int elements = 0;\r
1911 \r
1912                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
1913                         {\r
1914                                 const TType &fieldType = *((*field)->type());\r
1915 \r
1916                                 if(fieldType.totalRegisterCount() <= registers)\r
1917                                 {\r
1918                                         registers -= fieldType.totalRegisterCount();\r
1919                                         elements += fieldType.getObjectSize();\r
1920                                 }\r
1921                                 else   // Register within this field\r
1922                                 {\r
1923                                         return elements + componentCount(fieldType, registers);\r
1924                                 }\r
1925                         }\r
1926                 }\r
1927                 else if(type.isMatrix())\r
1928                 {\r
1929                         return registers * type.registerSize();\r
1930                 }\r
1931 \r
1932                 UNREACHABLE(0);\r
1933                 return 0;\r
1934         }\r
1935 \r
1936         int registerSize(const TType &type, int registers)\r
1937         {\r
1938                 if(registers == 0)\r
1939                 {\r
1940                         if(type.isStruct())\r
1941                         {\r
1942                                 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);\r
1943                         }\r
1944                         else if(type.isInterfaceBlock())\r
1945                         {\r
1946                                 return registerSize(*((*(type.getInterfaceBlock()->fields().begin()))->type()), 0);\r
1947                         }\r
1948 \r
1949                         return type.registerSize();\r
1950                 }\r
1951 \r
1952                 if(type.isArray() && registers >= type.elementRegisterCount())\r
1953                 {\r
1954                         int index = registers / type.elementRegisterCount();\r
1955                         registers -= index * type.elementRegisterCount();\r
1956                         return registerSize(type, registers);\r
1957                 }\r
1958 \r
1959                 if(type.isStruct() || type.isInterfaceBlock())\r
1960                 {\r
1961                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
1962                         int elements = 0;\r
1963 \r
1964                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
1965                         {\r
1966                                 const TType &fieldType = *((*field)->type());\r
1967 \r
1968                                 if(fieldType.totalRegisterCount() <= registers)\r
1969                                 {\r
1970                                         registers -= fieldType.totalRegisterCount();\r
1971                                         elements += fieldType.getObjectSize();\r
1972                                 }\r
1973                                 else   // Register within this field\r
1974                                 {\r
1975                                         return registerSize(fieldType, registers);\r
1976                                 }\r
1977                         }\r
1978                 }\r
1979                 else if(type.isMatrix())\r
1980                 {\r
1981                         return registerSize(type, 0);\r
1982                 }\r
1983 \r
1984                 UNREACHABLE(0);\r
1985                 return 0;\r
1986         }\r
1987 \r
1988         int OutputASM::getBlockId(TIntermTyped *arg)\r
1989         {\r
1990                 if(arg)\r
1991                 {\r
1992                         const TType &type = arg->getType();\r
1993                         TInterfaceBlock* block = type.getInterfaceBlock();\r
1994                         if(block && (type.getQualifier() == EvqUniform))\r
1995                         {\r
1996                                 // Make sure the uniform block is declared\r
1997                                 uniformRegister(arg);\r
1998 \r
1999                                 const char* blockName = block->name().c_str();\r
2000 \r
2001                                 // Fetch uniform block index from array of blocks\r
2002                                 for(ActiveUniformBlocks::const_iterator it = shaderObject->activeUniformBlocks.begin(); it != shaderObject->activeUniformBlocks.end(); ++it)\r
2003                                 {\r
2004                                         if(blockName == it->name)\r
2005                                         {\r
2006                                                 return it->blockId;\r
2007                                         }\r
2008                                 }\r
2009 \r
2010                                 ASSERT(false);\r
2011                         }\r
2012                 }\r
2013 \r
2014                 return -1;\r
2015         }\r
2016 \r
2017         OutputASM::ArgumentInfo OutputASM::getArgumentInfo(TIntermTyped *arg, int index)\r
2018         {\r
2019                 const TType &type = arg->getType();\r
2020                 int blockId = getBlockId(arg);\r
2021                 ArgumentInfo argumentInfo(BlockMemberInfo::getDefaultBlockInfo(), type, -1, -1);\r
2022                 if(blockId != -1)\r
2023                 {\r
2024                         argumentInfo.bufferIndex = 0;\r
2025                         for(int i = 0; i < blockId; ++i)\r
2026                         {\r
2027                                 int blockArraySize = shaderObject->activeUniformBlocks[i].arraySize;\r
2028                                 argumentInfo.bufferIndex += blockArraySize > 0 ? blockArraySize : 1;\r
2029                         }\r
2030 \r
2031                         const BlockDefinitionIndexMap& blockDefinition = blockDefinitions[blockId];\r
2032 \r
2033                         BlockDefinitionIndexMap::const_iterator itEnd = blockDefinition.end();\r
2034                         BlockDefinitionIndexMap::const_iterator it = itEnd;\r
2035 \r
2036                         argumentInfo.clampedIndex = index;\r
2037                         if(type.isInterfaceBlock())\r
2038                         {\r
2039                                 // Offset index to the beginning of the selected instance\r
2040                                 int blockRegisters = type.elementRegisterCount();\r
2041                                 int bufferOffset = argumentInfo.clampedIndex / blockRegisters;\r
2042                                 argumentInfo.bufferIndex += bufferOffset;\r
2043                                 argumentInfo.clampedIndex -= bufferOffset * blockRegisters;\r
2044                         }\r
2045 \r
2046                         int regIndex = registerIndex(arg);\r
2047                         for(int i = regIndex + argumentInfo.clampedIndex; i >= regIndex; --i)\r
2048                         {\r
2049                                 it = blockDefinition.find(i);\r
2050                                 if(it != itEnd)\r
2051                                 {\r
2052                                         argumentInfo.clampedIndex -= (i - regIndex);\r
2053                                         break;\r
2054                                 }\r
2055                         }\r
2056                         ASSERT(it != itEnd);\r
2057 \r
2058                         argumentInfo.typedMemberInfo = it->second;\r
2059 \r
2060                         int registerCount = argumentInfo.typedMemberInfo.type.totalRegisterCount();\r
2061                         argumentInfo.clampedIndex = (argumentInfo.clampedIndex >= registerCount) ? registerCount - 1 : argumentInfo.clampedIndex;\r
2062                 }\r
2063                 else\r
2064                 {\r
2065                         argumentInfo.clampedIndex = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;\r
2066                 }\r
2067 \r
2068                 return argumentInfo;\r
2069         }\r
2070 \r
2071         void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)\r
2072         {\r
2073                 if(argument)\r
2074                 {\r
2075                         TIntermTyped *arg = argument->getAsTyped();\r
2076                         Temporary unpackedUniform(this);\r
2077 \r
2078                         const TType& srcType = arg->getType();\r
2079                         TInterfaceBlock* srcBlock = srcType.getInterfaceBlock();\r
2080                         if(srcBlock && (srcType.getQualifier() == EvqUniform))\r
2081                         {\r
2082                                 const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);\r
2083                                 const TType &memberType = argumentInfo.typedMemberInfo.type;\r
2084 \r
2085                                 if(memberType.getBasicType() == EbtBool)\r
2086                                 {\r
2087                                         int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1);\r
2088                                         ASSERT(argumentInfo.clampedIndex < arraySize);\r
2089 \r
2090                                         // Convert the packed bool, which is currently an int, to a true bool\r
2091                                         Instruction *instruction = new Instruction(sw::Shader::OPCODE_I2B);\r
2092                                         instruction->dst.type = sw::Shader::PARAMETER_TEMP;\r
2093                                         instruction->dst.index = registerIndex(&unpackedUniform);\r
2094                                         instruction->src[0].type = sw::Shader::PARAMETER_CONST;\r
2095                                         instruction->src[0].bufferIndex = argumentInfo.bufferIndex;\r
2096                                         instruction->src[0].index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * argumentInfo.typedMemberInfo.arrayStride;\r
2097 \r
2098                                         shader->append(instruction);\r
2099 \r
2100                                         arg = &unpackedUniform;\r
2101                                         index = 0;\r
2102                                 }\r
2103                                 else if((srcBlock->matrixPacking() == EmpRowMajor) && memberType.isMatrix())\r
2104                                 {\r
2105                                         int numCols = memberType.getNominalSize();\r
2106                                         int numRows = memberType.getSecondarySize();\r
2107                                         int arraySize = (memberType.isArray() ? memberType.getArraySize() : 1);\r
2108 \r
2109                                         ASSERT(argumentInfo.clampedIndex < (numCols * arraySize));\r
2110 \r
2111                                         unsigned int dstIndex = registerIndex(&unpackedUniform);\r
2112                                         unsigned int srcSwizzle = (argumentInfo.clampedIndex % numCols) * 0x55;\r
2113                                         int arrayIndex = argumentInfo.clampedIndex / numCols;\r
2114                                         int matrixStartOffset = argumentInfo.typedMemberInfo.offset + arrayIndex * argumentInfo.typedMemberInfo.arrayStride;\r
2115 \r
2116                                         for(int j = 0; j < numRows; ++j)\r
2117                                         {\r
2118                                                 // Transpose the row major matrix\r
2119                                                 Instruction *instruction = new Instruction(sw::Shader::OPCODE_MOV);\r
2120                                                 instruction->dst.type = sw::Shader::PARAMETER_TEMP;\r
2121                                                 instruction->dst.index = dstIndex;\r
2122                                                 instruction->dst.mask = 1 << j;\r
2123                                                 instruction->src[0].type = sw::Shader::PARAMETER_CONST;\r
2124                                                 instruction->src[0].bufferIndex = argumentInfo.bufferIndex;\r
2125                                                 instruction->src[0].index = matrixStartOffset + j * argumentInfo.typedMemberInfo.matrixStride;\r
2126                                                 instruction->src[0].swizzle = srcSwizzle;\r
2127 \r
2128                                                 shader->append(instruction);\r
2129                                         }\r
2130 \r
2131                                         arg = &unpackedUniform;\r
2132                                         index = 0;\r
2133                                 }\r
2134                         }\r
2135 \r
2136                         const ArgumentInfo argumentInfo = getArgumentInfo(arg, index);\r
2137                         const TType &type = argumentInfo.typedMemberInfo.type;\r
2138 \r
2139                         int size = registerSize(type, argumentInfo.clampedIndex);\r
2140 \r
2141                         parameter.type = registerType(arg);\r
2142                         parameter.bufferIndex = argumentInfo.bufferIndex;\r
2143 \r
2144                         if(arg->getAsConstantUnion() && arg->getAsConstantUnion()->getUnionArrayPointer())\r
2145                         {\r
2146                                 int component = componentCount(type, argumentInfo.clampedIndex);\r
2147                                 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();\r
2148 \r
2149                                 for(int i = 0; i < 4; i++)\r
2150                                 {\r
2151                                         if(size == 1)   // Replicate\r
2152                                         {\r
2153                                                 parameter.value[i] = constants[component + 0].getAsFloat();\r
2154                                         }\r
2155                                         else if(i < size)\r
2156                                         {\r
2157                                                 parameter.value[i] = constants[component + i].getAsFloat();\r
2158                                         }\r
2159                                         else\r
2160                                         {\r
2161                                                 parameter.value[i] = 0.0f;\r
2162                                         }\r
2163                                 }\r
2164                         }\r
2165                         else\r
2166                         {\r
2167                                 parameter.index = registerIndex(arg) + argumentInfo.clampedIndex;\r
2168 \r
2169                                 if(parameter.bufferIndex != -1)\r
2170                                 {\r
2171                                         int stride = (argumentInfo.typedMemberInfo.matrixStride > 0) ? argumentInfo.typedMemberInfo.matrixStride : argumentInfo.typedMemberInfo.arrayStride;\r
2172                                         parameter.index = argumentInfo.typedMemberInfo.offset + argumentInfo.clampedIndex * stride;\r
2173                                 }\r
2174                         }\r
2175 \r
2176                         if(!IsSampler(arg->getBasicType()))\r
2177                         {\r
2178                                 parameter.swizzle = readSwizzle(arg, size);\r
2179                         }\r
2180                 }\r
2181         }\r
2182 \r
2183         void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)\r
2184         {\r
2185                 for(int index = 0; index < dst->totalRegisterCount(); index++)\r
2186                 {\r
2187                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, index, src, offset + index);\r
2188                         mov->dst.mask = writeMask(dst, index);\r
2189                 }\r
2190         }\r
2191 \r
2192         int swizzleElement(int swizzle, int index)\r
2193         {\r
2194                 return (swizzle >> (index * 2)) & 0x03;\r
2195         }\r
2196 \r
2197         int swizzleSwizzle(int leftSwizzle, int rightSwizzle)\r
2198         {\r
2199                 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |\r
2200                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |\r
2201                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |\r
2202                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);\r
2203         }\r
2204 \r
2205         void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)\r
2206         {\r
2207                 if(src &&\r
2208                         ((src->isVector() && (!dst->isVector() || (src->getNominalSize() != dst->getNominalSize()))) ||\r
2209                          (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))\r
2210                 {\r
2211                         return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");\r
2212                 }\r
2213 \r
2214                 TIntermBinary *binary = dst->getAsBinaryNode();\r
2215 \r
2216                 if(binary && binary->getOp() == EOpIndexIndirect && binary->getLeft()->isVector() && dst->isScalar())\r
2217                 {\r
2218                         Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);\r
2219 \r
2220                         Temporary address(this);\r
2221                         lvalue(insert->dst, address, dst);\r
2222 \r
2223                         insert->src[0].type = insert->dst.type;\r
2224                         insert->src[0].index = insert->dst.index;\r
2225                         insert->src[0].rel = insert->dst.rel;\r
2226                         argument(insert->src[1], src);\r
2227                         argument(insert->src[2], binary->getRight());\r
2228 \r
2229                         shader->append(insert);\r
2230                 }\r
2231                 else\r
2232                 {\r
2233                         for(int offset = 0; offset < dst->totalRegisterCount(); offset++)\r
2234                         {\r
2235                                 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);\r
2236 \r
2237                                 Temporary address(this);\r
2238                                 int swizzle = lvalue(mov->dst, address, dst);\r
2239                                 mov->dst.index += offset;\r
2240 \r
2241                                 if(offset > 0)\r
2242                                 {\r
2243                                         mov->dst.mask = writeMask(dst, offset);\r
2244                                 }\r
2245 \r
2246                                 argument(mov->src[0], src, offset);\r
2247                                 mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);\r
2248 \r
2249                                 shader->append(mov);\r
2250                         }\r
2251                 }\r
2252         }\r
2253 \r
2254         int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)\r
2255         {\r
2256                 TIntermTyped *result = node;\r
2257                 TIntermBinary *binary = node->getAsBinaryNode();\r
2258                 TIntermSymbol *symbol = node->getAsSymbolNode();\r
2259 \r
2260                 if(binary)\r
2261                 {\r
2262                         TIntermTyped *left = binary->getLeft();\r
2263                         TIntermTyped *right = binary->getRight();\r
2264 \r
2265                         int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side\r
2266 \r
2267                         switch(binary->getOp())\r
2268                         {\r
2269                         case EOpIndexDirect:\r
2270                                 {\r
2271                                         int rightIndex = right->getAsConstantUnion()->getIConst(0);\r
2272 \r
2273                                         if(left->isRegister())\r
2274                                         {\r
2275                                                 int leftMask = dst.mask;\r
2276 \r
2277                                                 dst.mask = 1;\r
2278                                                 while((leftMask & dst.mask) == 0)\r
2279                                                 {\r
2280                                                         dst.mask = dst.mask << 1;\r
2281                                                 }\r
2282 \r
2283                                                 int element = swizzleElement(leftSwizzle, rightIndex);\r
2284                                                 dst.mask = 1 << element;\r
2285 \r
2286                                                 return element;\r
2287                                         }\r
2288                                         else if(left->isArray() || left->isMatrix())\r
2289                                         {\r
2290                                                 dst.index += rightIndex * result->totalRegisterCount();\r
2291                                                 return 0xE4;\r
2292                                         }\r
2293                                         else UNREACHABLE(0);\r
2294                                 }\r
2295                                 break;\r
2296                         case EOpIndexIndirect:\r
2297                                 {\r
2298                                         if(left->isRegister())\r
2299                                         {\r
2300                                                 // Requires INSERT instruction (handled by calling function)\r
2301                                         }\r
2302                                         else if(left->isArray() || left->isMatrix())\r
2303                                         {\r
2304                                                 int scale = result->totalRegisterCount();\r
2305 \r
2306                                                 if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly\r
2307                                                 {\r
2308                                                         if(left->totalRegisterCount() > 1)\r
2309                                                         {\r
2310                                                                 sw::Shader::SourceParameter relativeRegister;\r
2311                                                                 argument(relativeRegister, right);\r
2312 \r
2313                                                                 dst.rel.index = relativeRegister.index;\r
2314                                                                 dst.rel.type = relativeRegister.type;\r
2315                                                                 dst.rel.scale = scale;\r
2316                                                                 dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
2317                                                         }\r
2318                                                 }\r
2319                                                 else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register\r
2320                                                 {\r
2321                                                         if(scale == 1)\r
2322                                                         {\r
2323                                                                 Constant oldScale((int)dst.rel.scale);\r
2324                                                                 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);\r
2325                                                                 mad->src[0].index = dst.rel.index;\r
2326                                                                 mad->src[0].type = dst.rel.type;\r
2327                                                         }\r
2328                                                         else\r
2329                                                         {\r
2330                                                                 Constant oldScale((int)dst.rel.scale);\r
2331                                                                 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);\r
2332                                                                 mul->src[0].index = dst.rel.index;\r
2333                                                                 mul->src[0].type = dst.rel.type;\r
2334 \r
2335                                                                 Constant newScale(scale);\r
2336                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2337                                                         }\r
2338 \r
2339                                                         dst.rel.type = sw::Shader::PARAMETER_TEMP;\r
2340                                                         dst.rel.index = registerIndex(&address);\r
2341                                                         dst.rel.scale = 1;\r
2342                                                 }\r
2343                                                 else   // Just add the new index to the address register\r
2344                                                 {\r
2345                                                         if(scale == 1)\r
2346                                                         {\r
2347                                                                 emit(sw::Shader::OPCODE_IADD, &address, &address, right);\r
2348                                                         }\r
2349                                                         else\r
2350                                                         {\r
2351                                                                 Constant newScale(scale);\r
2352                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2353                                                         }\r
2354                                                 }\r
2355                                         }\r
2356                                         else UNREACHABLE(0);\r
2357                                 }\r
2358                                 break;\r
2359                         case EOpIndexDirectStruct:\r
2360                         case EOpIndexDirectInterfaceBlock:\r
2361                                 {\r
2362                                         const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?\r
2363                                                                left->getType().getStruct()->fields() :\r
2364                                                                left->getType().getInterfaceBlock()->fields();\r
2365                                         int index = right->getAsConstantUnion()->getIConst(0);\r
2366                                         int fieldOffset = 0;\r
2367 \r
2368                                         for(int i = 0; i < index; i++)\r
2369                                         {\r
2370                                                 fieldOffset += fields[i]->type()->totalRegisterCount();\r
2371                                         }\r
2372 \r
2373                                         dst.type = registerType(left);\r
2374                                         dst.index += fieldOffset;\r
2375                                         dst.mask = writeMask(right);\r
2376 \r
2377                                         return 0xE4;\r
2378                                 }\r
2379                                 break;\r
2380                         case EOpVectorSwizzle:\r
2381                                 {\r
2382                                         ASSERT(left->isRegister());\r
2383 \r
2384                                         int leftMask = dst.mask;\r
2385 \r
2386                                         int swizzle = 0;\r
2387                                         int rightMask = 0;\r
2388 \r
2389                                         TIntermSequence &sequence = right->getAsAggregate()->getSequence();\r
2390 \r
2391                                         for(unsigned int i = 0; i < sequence.size(); i++)\r
2392                                         {\r
2393                                                 int index = sequence[i]->getAsConstantUnion()->getIConst(0);\r
2394 \r
2395                                                 int element = swizzleElement(leftSwizzle, index);\r
2396                                                 rightMask = rightMask | (1 << element);\r
2397                                                 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);\r
2398                                         }\r
2399 \r
2400                                         dst.mask = leftMask & rightMask;\r
2401 \r
2402                                         return swizzle;\r
2403                                 }\r
2404                                 break;\r
2405                         default:\r
2406                                 UNREACHABLE(binary->getOp());   // Not an l-value operator\r
2407                                 break;\r
2408                         }\r
2409                 }\r
2410                 else if(symbol)\r
2411                 {\r
2412                         dst.type = registerType(symbol);\r
2413                         dst.index = registerIndex(symbol);\r
2414                         dst.mask = writeMask(symbol);\r
2415                         return 0xE4;\r
2416                 }\r
2417 \r
2418                 return 0xE4;\r
2419         }\r
2420 \r
2421         sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)\r
2422         {\r
2423                 if(isSamplerRegister(operand))\r
2424                 {\r
2425                         return sw::Shader::PARAMETER_SAMPLER;\r
2426                 }\r
2427 \r
2428                 const TQualifier qualifier = operand->getQualifier();\r
2429                 if((EvqFragColor == qualifier) || (EvqFragData == qualifier))\r
2430                 {\r
2431                         if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||\r
2432                            ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))\r
2433                         {\r
2434                                 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");\r
2435                         }\r
2436                         outputQualifier = qualifier;\r
2437                 }\r
2438 \r
2439                 if(qualifier == EvqConstExpr && (!operand->getAsConstantUnion() || !operand->getAsConstantUnion()->getUnionArrayPointer()))\r
2440                 {\r
2441                         return sw::Shader::PARAMETER_TEMP;\r
2442                 }\r
2443 \r
2444                 switch(qualifier)\r
2445                 {\r
2446                 case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;\r
2447                 case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;\r
2448                 case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float\r
2449                 case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;\r
2450                 case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;\r
2451                 case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;\r
2452                 case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;\r
2453                 case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;\r
2454                 case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;\r
2455                 case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;\r
2456                 case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend\r
2457                 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend\r
2458                 case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;\r
2459                 case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;\r
2460                 case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;\r
2461                 case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;\r
2462                 case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;\r
2463                 case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;\r
2464                 case EvqUniform:             return sw::Shader::PARAMETER_CONST;\r
2465                 case EvqIn:                  return sw::Shader::PARAMETER_TEMP;\r
2466                 case EvqOut:                 return sw::Shader::PARAMETER_TEMP;\r
2467                 case EvqInOut:               return sw::Shader::PARAMETER_TEMP;\r
2468                 case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;\r
2469                 case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;\r
2470                 case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;\r
2471                 case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;\r
2472                 case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;\r
2473                 case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;\r
2474                 case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;\r
2475                 case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;\r
2476                 case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;\r
2477                 case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;\r
2478                 default: UNREACHABLE(qualifier);\r
2479                 }\r
2480 \r
2481                 return sw::Shader::PARAMETER_VOID;\r
2482         }\r
2483 \r
2484         unsigned int OutputASM::registerIndex(TIntermTyped *operand)\r
2485         {\r
2486                 if(isSamplerRegister(operand))\r
2487                 {\r
2488                         return samplerRegister(operand);\r
2489                 }\r
2490 \r
2491                 switch(operand->getQualifier())\r
2492                 {\r
2493                 case EvqTemporary:           return temporaryRegister(operand);\r
2494                 case EvqGlobal:              return temporaryRegister(operand);\r
2495                 case EvqConstExpr:           return temporaryRegister(operand);   // Unevaluated constant expression\r
2496                 case EvqAttribute:           return attributeRegister(operand);\r
2497                 case EvqVaryingIn:           return varyingRegister(operand);\r
2498                 case EvqVaryingOut:          return varyingRegister(operand);\r
2499                 case EvqVertexIn:            return attributeRegister(operand);\r
2500                 case EvqFragmentOut:         return fragmentOutputRegister(operand);\r
2501                 case EvqVertexOut:           return varyingRegister(operand);\r
2502                 case EvqFragmentIn:          return varyingRegister(operand);\r
2503                 case EvqInvariantVaryingIn:  return varyingRegister(operand);\r
2504                 case EvqInvariantVaryingOut: return varyingRegister(operand);\r
2505                 case EvqSmooth:              return varyingRegister(operand);\r
2506                 case EvqFlat:                return varyingRegister(operand);\r
2507                 case EvqCentroidOut:         return varyingRegister(operand);\r
2508                 case EvqSmoothIn:            return varyingRegister(operand);\r
2509                 case EvqFlatIn:              return varyingRegister(operand);\r
2510                 case EvqCentroidIn:          return varyingRegister(operand);\r
2511                 case EvqUniform:             return uniformRegister(operand);\r
2512                 case EvqIn:                  return temporaryRegister(operand);\r
2513                 case EvqOut:                 return temporaryRegister(operand);\r
2514                 case EvqInOut:               return temporaryRegister(operand);\r
2515                 case EvqConstReadOnly:       return temporaryRegister(operand);\r
2516                 case EvqPosition:            return varyingRegister(operand);\r
2517                 case EvqPointSize:           return varyingRegister(operand);\r
2518                 case EvqInstanceID:          vertexShader->instanceIdDeclared = true; return 0;\r
2519                 case EvqFragCoord:           pixelShader->vPosDeclared = true;  return 0;\r
2520                 case EvqFrontFacing:         pixelShader->vFaceDeclared = true; return 1;\r
2521                 case EvqPointCoord:          return varyingRegister(operand);\r
2522                 case EvqFragColor:           return 0;\r
2523                 case EvqFragData:            return fragmentOutputRegister(operand);\r
2524                 case EvqFragDepth:           return 0;\r
2525                 default: UNREACHABLE(operand->getQualifier());\r
2526                 }\r
2527 \r
2528                 return 0;\r
2529         }\r
2530 \r
2531         int OutputASM::writeMask(TIntermTyped *destination, int index)\r
2532         {\r
2533                 if(destination->getQualifier() == EvqPointSize)\r
2534                 {\r
2535                         return 0x2;   // Point size stored in the y component\r
2536                 }\r
2537 \r
2538                 return 0xF >> (4 - registerSize(destination->getType(), index));\r
2539         }\r
2540 \r
2541         int OutputASM::readSwizzle(TIntermTyped *argument, int size)\r
2542         {\r
2543                 if(argument->getQualifier() == EvqPointSize)\r
2544                 {\r
2545                         return 0x55;   // Point size stored in the y component\r
2546                 }\r
2547 \r
2548                 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw\r
2549 \r
2550                 return swizzleSize[size];\r
2551         }\r
2552 \r
2553         // Conservatively checks whether an expression is fast to compute and has no side effects\r
2554         bool OutputASM::trivial(TIntermTyped *expression, int budget)\r
2555         {\r
2556                 if(!expression->isRegister())\r
2557                 {\r
2558                         return false;\r
2559                 }\r
2560 \r
2561                 return cost(expression, budget) >= 0;\r
2562         }\r
2563 \r
2564         // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)\r
2565         int OutputASM::cost(TIntermNode *expression, int budget)\r
2566         {\r
2567                 if(budget < 0)\r
2568                 {\r
2569                         return budget;\r
2570                 }\r
2571 \r
2572                 if(expression->getAsSymbolNode())\r
2573                 {\r
2574                         return budget;\r
2575                 }\r
2576                 else if(expression->getAsConstantUnion())\r
2577                 {\r
2578                         return budget;\r
2579                 }\r
2580                 else if(expression->getAsBinaryNode())\r
2581                 {\r
2582                         TIntermBinary *binary = expression->getAsBinaryNode();\r
2583 \r
2584                         switch(binary->getOp())\r
2585                         {\r
2586                         case EOpVectorSwizzle:\r
2587                         case EOpIndexDirect:\r
2588                         case EOpIndexDirectStruct:\r
2589                         case EOpIndexDirectInterfaceBlock:\r
2590                                 return cost(binary->getLeft(), budget - 0);\r
2591                         case EOpAdd:\r
2592                         case EOpSub:\r
2593                         case EOpMul:\r
2594                                 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));\r
2595                         default:\r
2596                                 return -1;\r
2597                         }\r
2598                 }\r
2599                 else if(expression->getAsUnaryNode())\r
2600                 {\r
2601                         TIntermUnary *unary = expression->getAsUnaryNode();\r
2602 \r
2603                         switch(unary->getOp())\r
2604                         {\r
2605                         case EOpAbs:\r
2606                         case EOpNegative:\r
2607                                 return cost(unary->getOperand(), budget - 1);\r
2608                         default:\r
2609                                 return -1;\r
2610                         }\r
2611                 }\r
2612                 else if(expression->getAsSelectionNode())\r
2613                 {\r
2614                         TIntermSelection *selection = expression->getAsSelectionNode();\r
2615 \r
2616                         if(selection->usesTernaryOperator())\r
2617                         {\r
2618                                 TIntermTyped *condition = selection->getCondition();\r
2619                                 TIntermNode *trueBlock = selection->getTrueBlock();\r
2620                                 TIntermNode *falseBlock = selection->getFalseBlock();\r
2621                                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
2622 \r
2623                                 if(constantCondition)\r
2624                                 {\r
2625                                         bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
2626 \r
2627                                         if(trueCondition)\r
2628                                         {\r
2629                                                 return cost(trueBlock, budget - 0);\r
2630                                         }\r
2631                                         else\r
2632                                         {\r
2633                                                 return cost(falseBlock, budget - 0);\r
2634                                         }\r
2635                                 }\r
2636                                 else\r
2637                                 {\r
2638                                         return cost(trueBlock, cost(falseBlock, budget - 2));\r
2639                                 }\r
2640                         }\r
2641                 }\r
2642 \r
2643                 return -1;\r
2644         }\r
2645 \r
2646         const Function *OutputASM::findFunction(const TString &name)\r
2647         {\r
2648                 for(unsigned int f = 0; f < functionArray.size(); f++)\r
2649                 {\r
2650                         if(functionArray[f].name == name)\r
2651                         {\r
2652                                 return &functionArray[f];\r
2653                         }\r
2654                 }\r
2655 \r
2656                 return 0;\r
2657         }\r
2658 \r
2659         int OutputASM::temporaryRegister(TIntermTyped *temporary)\r
2660         {\r
2661                 return allocate(temporaries, temporary);\r
2662         }\r
2663 \r
2664         int OutputASM::varyingRegister(TIntermTyped *varying)\r
2665         {\r
2666                 int var = lookup(varyings, varying);\r
2667 \r
2668                 if(var == -1)\r
2669                 {\r
2670                         var = allocate(varyings, varying);\r
2671                         int componentCount = varying->registerSize();\r
2672                         int registerCount = varying->totalRegisterCount();\r
2673 \r
2674                         if(pixelShader)\r
2675                         {\r
2676                                 if((var + registerCount) > sw::PixelShader::MAX_INPUT_VARYINGS)\r
2677                                 {\r
2678                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");\r
2679                                         return 0;\r
2680                                 }\r
2681 \r
2682                                 if(varying->getQualifier() == EvqPointCoord)\r
2683                                 {\r
2684                                         ASSERT(varying->isRegister());\r
2685                                         if(componentCount >= 1) pixelShader->semantic[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2686                                         if(componentCount >= 2) pixelShader->semantic[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2687                                         if(componentCount >= 3) pixelShader->semantic[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2688                                         if(componentCount >= 4) pixelShader->semantic[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2689                                 }\r
2690                                 else\r
2691                                 {\r
2692                                         for(int i = 0; i < varying->totalRegisterCount(); i++)\r
2693                                         {\r
2694                                                 if(componentCount >= 1) pixelShader->semantic[var + i][0] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2695                                                 if(componentCount >= 2) pixelShader->semantic[var + i][1] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2696                                                 if(componentCount >= 3) pixelShader->semantic[var + i][2] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2697                                                 if(componentCount >= 4) pixelShader->semantic[var + i][3] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2698                                         }\r
2699                                 }\r
2700                         }\r
2701                         else if(vertexShader)\r
2702                         {\r
2703                                 if((var + registerCount) > sw::VertexShader::MAX_OUTPUT_VARYINGS)\r
2704                                 {\r
2705                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");\r
2706                                         return 0;\r
2707                                 }\r
2708 \r
2709                                 if(varying->getQualifier() == EvqPosition)\r
2710                                 {\r
2711                                         ASSERT(varying->isRegister());\r
2712                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2713                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2714                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2715                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2716                                         vertexShader->positionRegister = var;\r
2717                                 }\r
2718                                 else if(varying->getQualifier() == EvqPointSize)\r
2719                                 {\r
2720                                         ASSERT(varying->isRegister());\r
2721                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2722                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2723                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2724                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2725                                         vertexShader->pointSizeRegister = var;\r
2726                                 }\r
2727                                 else\r
2728                                 {\r
2729                                         // Semantic indexes for user varyings will be assigned during program link to match the pixel shader\r
2730                                 }\r
2731                         }\r
2732                         else UNREACHABLE(0);\r
2733 \r
2734                         declareVarying(varying, var);\r
2735                 }\r
2736 \r
2737                 return var;\r
2738         }\r
2739 \r
2740         void OutputASM::declareVarying(TIntermTyped *varying, int reg)\r
2741         {\r
2742                 if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking\r
2743                 {\r
2744                         const TType &type = varying->getType();\r
2745                         const char *name = varying->getAsSymbolNode()->getSymbol().c_str();\r
2746                         VaryingList &activeVaryings = shaderObject->varyings;\r
2747 \r
2748                         // Check if this varying has been declared before without having a register assigned\r
2749                         for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)\r
2750                         {\r
2751                                 if(v->name == name)\r
2752                                 {\r
2753                                         if(reg >= 0)\r
2754                                         {\r
2755                                                 ASSERT(v->reg < 0 || v->reg == reg);\r
2756                                                 v->reg = reg;\r
2757                                         }\r
2758 \r
2759                                         return;\r
2760                                 }\r
2761                         }\r
2762 \r
2763                         activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));\r
2764                 }\r
2765         }\r
2766 \r
2767         int OutputASM::uniformRegister(TIntermTyped *uniform)\r
2768         {\r
2769                 const TType &type = uniform->getType();\r
2770                 ASSERT(!IsSampler(type.getBasicType()));\r
2771                 TInterfaceBlock *block = type.getAsInterfaceBlock();\r
2772                 TIntermSymbol *symbol = uniform->getAsSymbolNode();\r
2773                 ASSERT(symbol || block);\r
2774 \r
2775                 if(symbol || block)\r
2776                 {\r
2777                         TInterfaceBlock* parentBlock = type.getInterfaceBlock();\r
2778                         bool isBlockMember = (!block && parentBlock);\r
2779                         int index = isBlockMember ? lookup(uniforms, parentBlock) : lookup(uniforms, uniform);\r
2780 \r
2781                         if(index == -1 || isBlockMember)\r
2782                         {\r
2783                                 if(index == -1)\r
2784                                 {\r
2785                                         index = allocate(uniforms, uniform);\r
2786                                 }\r
2787 \r
2788                                 // Verify if the current uniform is a member of an already declared block\r
2789                                 const TString &name = symbol ? symbol->getSymbol() : block->name();\r
2790                                 int blockMemberIndex = blockMemberLookup(type, name, index);\r
2791                                 if(blockMemberIndex == -1)\r
2792                                 {\r
2793                                         declareUniform(type, name, index);\r
2794                                 }\r
2795                                 else\r
2796                                 {\r
2797                                         index = blockMemberIndex;\r
2798                                 }\r
2799                         }\r
2800 \r
2801                         return index;\r
2802                 }\r
2803 \r
2804                 return 0;\r
2805         }\r
2806 \r
2807         int OutputASM::attributeRegister(TIntermTyped *attribute)\r
2808         {\r
2809                 ASSERT(!attribute->isArray());\r
2810 \r
2811                 int index = lookup(attributes, attribute);\r
2812 \r
2813                 if(index == -1)\r
2814                 {\r
2815                         TIntermSymbol *symbol = attribute->getAsSymbolNode();\r
2816                         ASSERT(symbol);\r
2817 \r
2818                         if(symbol)\r
2819                         {\r
2820                                 index = allocate(attributes, attribute);\r
2821                                 const TType &type = attribute->getType();\r
2822                                 int registerCount = attribute->totalRegisterCount();\r
2823 \r
2824                                 if(vertexShader && (index + registerCount) <= sw::VertexShader::MAX_INPUT_ATTRIBUTES)\r
2825                                 {\r
2826                                         for(int i = 0; i < registerCount; i++)\r
2827                                         {\r
2828                                                 vertexShader->input[index + i] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i);\r
2829                                         }\r
2830                                 }\r
2831 \r
2832                                 ActiveAttributes &activeAttributes = shaderObject->activeAttributes;\r
2833 \r
2834                                 const char *name = symbol->getSymbol().c_str();\r
2835                                 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));\r
2836                         }\r
2837                 }\r
2838 \r
2839                 return index;\r
2840         }\r
2841 \r
2842         int OutputASM::fragmentOutputRegister(TIntermTyped *fragmentOutput)\r
2843         {\r
2844                 return allocate(fragmentOutputs, fragmentOutput);\r
2845         }\r
2846 \r
2847         int OutputASM::samplerRegister(TIntermTyped *sampler)\r
2848         {\r
2849                 const TType &type = sampler->getType();\r
2850                 ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers\r
2851 \r
2852                 TIntermSymbol *symbol = sampler->getAsSymbolNode();\r
2853                 TIntermBinary *binary = sampler->getAsBinaryNode();\r
2854 \r
2855                 if(symbol && type.getQualifier() == EvqUniform)\r
2856                 {\r
2857                         return samplerRegister(symbol);\r
2858                 }\r
2859                 else if(binary)\r
2860                 {\r
2861                         TIntermTyped *left = binary->getLeft();\r
2862                         TIntermTyped *right = binary->getRight();\r
2863                         const TType &leftType = left->getType();\r
2864                         int index = right->getAsConstantUnion() ? right->getAsConstantUnion()->getIConst(0) : 0;\r
2865                         int offset = 0;\r
2866 \r
2867                         switch(binary->getOp())\r
2868                         {\r
2869                         case EOpIndexDirect:\r
2870                                 ASSERT(left->isArray());\r
2871                                 offset = index * leftType.elementRegisterCount();\r
2872                                 break;\r
2873                         case EOpIndexDirectStruct:\r
2874                                 ASSERT(leftType.isStruct());\r
2875                                 {\r
2876                                         const TFieldList &fields = leftType.getStruct()->fields();\r
2877 \r
2878                                         for(int i = 0; i < index; i++)\r
2879                                         {\r
2880                                                 offset += fields[i]->type()->totalRegisterCount();\r
2881                                         }\r
2882                                 }\r
2883                                 break;\r
2884                         case EOpIndexIndirect:               // Indirect indexing produces a temporary, not a sampler register\r
2885                                 return -1;\r
2886                         case EOpIndexDirectInterfaceBlock:   // Interface blocks can't contain samplers\r
2887                         default:\r
2888                                 UNREACHABLE(binary->getOp());\r
2889                                 return -1;\r
2890                         }\r
2891 \r
2892                         int base = samplerRegister(left);\r
2893 \r
2894                         if(base < 0)\r
2895                         {\r
2896                                 return -1;\r
2897                         }\r
2898 \r
2899                         return base + offset;\r
2900                 }\r
2901 \r
2902                 UNREACHABLE(0);\r
2903                 return -1;   // Not a sampler register\r
2904         }\r
2905 \r
2906         int OutputASM::samplerRegister(TIntermSymbol *sampler)\r
2907         {\r
2908                 const TType &type = sampler->getType();\r
2909                 ASSERT(IsSampler(type.getBasicType()) || type.isStruct());   // Structures can contain samplers\r
2910 \r
2911                 int index = lookup(samplers, sampler);\r
2912 \r
2913                 if(index == -1)\r
2914                 {\r
2915                         index = allocate(samplers, sampler);\r
2916 \r
2917                         if(sampler->getQualifier() == EvqUniform)\r
2918                         {\r
2919                                 const char *name = sampler->getSymbol().c_str();\r
2920                                 declareUniform(type, name, index);\r
2921                         }\r
2922                 }\r
2923 \r
2924                 return index;\r
2925         }\r
2926 \r
2927         bool OutputASM::isSamplerRegister(TIntermTyped *operand)\r
2928         {\r
2929                 return operand && IsSampler(operand->getBasicType()) && samplerRegister(operand) >= 0;\r
2930         }\r
2931 \r
2932         int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)\r
2933         {\r
2934                 for(unsigned int i = 0; i < list.size(); i++)\r
2935                 {\r
2936                         if(list[i] == variable)\r
2937                         {\r
2938                                 return i;   // Pointer match\r
2939                         }\r
2940                 }\r
2941 \r
2942                 TIntermSymbol *varSymbol = variable->getAsSymbolNode();\r
2943                 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();\r
2944 \r
2945                 if(varBlock)\r
2946                 {\r
2947                         for(unsigned int i = 0; i < list.size(); i++)\r
2948                         {\r
2949                                 if(list[i])\r
2950                                 {\r
2951                                         TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();\r
2952 \r
2953                                         if(listBlock)\r
2954                                         {\r
2955                                                 if(listBlock->name() == varBlock->name())\r
2956                                                 {\r
2957                                                         ASSERT(listBlock->arraySize() == varBlock->arraySize());\r
2958                                                         ASSERT(listBlock->fields() == varBlock->fields());\r
2959                                                         ASSERT(listBlock->blockStorage() == varBlock->blockStorage());\r
2960                                                         ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());\r
2961 \r
2962                                                         return i;\r
2963                                                 }\r
2964                                         }\r
2965                                 }\r
2966                         }\r
2967                 }\r
2968                 else if(varSymbol)\r
2969                 {\r
2970                         for(unsigned int i = 0; i < list.size(); i++)\r
2971                         {\r
2972                                 if(list[i])\r
2973                                 {\r
2974                                         TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();\r
2975 \r
2976                                         if(listSymbol)\r
2977                                         {\r
2978                                                 if(listSymbol->getId() == varSymbol->getId())\r
2979                                                 {\r
2980                                                         ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());\r
2981                                                         ASSERT(listSymbol->getType() == varSymbol->getType());\r
2982                                                         ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());\r
2983 \r
2984                                                         return i;\r
2985                                                 }\r
2986                                         }\r
2987                                 }\r
2988                         }\r
2989                 }\r
2990 \r
2991                 return -1;\r
2992         }\r
2993 \r
2994         int OutputASM::lookup(VariableArray &list, TInterfaceBlock *block)\r
2995         {\r
2996                 for(unsigned int i = 0; i < list.size(); i++)\r
2997                 {\r
2998                         if(list[i] && (list[i]->getType().getInterfaceBlock() == block))\r
2999                         {\r
3000                                 return i;   // Pointer match\r
3001                         }\r
3002                 }\r
3003                 return -1;\r
3004         }\r
3005 \r
3006         int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)\r
3007         {\r
3008                 int index = lookup(list, variable);\r
3009 \r
3010                 if(index == -1)\r
3011                 {\r
3012                         unsigned int registerCount = variable->blockRegisterCount();\r
3013 \r
3014                         for(unsigned int i = 0; i < list.size(); i++)\r
3015                         {\r
3016                                 if(list[i] == 0)\r
3017                                 {\r
3018                                         unsigned int j = 1;\r
3019                                         for( ; j < registerCount && (i + j) < list.size(); j++)\r
3020                                         {\r
3021                                                 if(list[i + j] != 0)\r
3022                                                 {\r
3023                                                         break;\r
3024                                                 }\r
3025                                         }\r
3026 \r
3027                                         if(j == registerCount)   // Found free slots\r
3028                                         {\r
3029                                                 for(unsigned int j = 0; j < registerCount; j++)\r
3030                                                 {\r
3031                                                         list[i + j] = variable;\r
3032                                                 }\r
3033 \r
3034                                                 return i;\r
3035                                         }\r
3036                                 }\r
3037                         }\r
3038 \r
3039                         index = list.size();\r
3040 \r
3041                         for(unsigned int i = 0; i < registerCount; i++)\r
3042                         {\r
3043                                 list.push_back(variable);\r
3044                         }\r
3045                 }\r
3046 \r
3047                 return index;\r
3048         }\r
3049 \r
3050         void OutputASM::free(VariableArray &list, TIntermTyped *variable)\r
3051         {\r
3052                 int index = lookup(list, variable);\r
3053 \r
3054                 if(index >= 0)\r
3055                 {\r
3056                         list[index] = 0;\r
3057                 }\r
3058         }\r
3059 \r
3060         int OutputASM::blockMemberLookup(const TType &type, const TString &name, int registerIndex)\r
3061         {\r
3062                 const TInterfaceBlock *block = type.getInterfaceBlock();\r
3063 \r
3064                 if(block)\r
3065                 {\r
3066                         ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;\r
3067                         const TFieldList& fields = block->fields();\r
3068                         const TString &blockName = block->name();\r
3069                         int fieldRegisterIndex = registerIndex;\r
3070 \r
3071                         if(!type.isInterfaceBlock())\r
3072                         {\r
3073                                 // This is a uniform that's part of a block, let's see if the block is already defined\r
3074                                 for(size_t i = 0; i < activeUniformBlocks.size(); ++i)\r
3075                                 {\r
3076                                         if(activeUniformBlocks[i].name == blockName.c_str())\r
3077                                         {\r
3078                                                 // The block is already defined, find the register for the current uniform and return it\r
3079                                                 for(size_t j = 0; j < fields.size(); j++)\r
3080                                                 {\r
3081                                                         const TString &fieldName = fields[j]->name();\r
3082                                                         if(fieldName == name)\r
3083                                                         {\r
3084                                                                 return fieldRegisterIndex;\r
3085                                                         }\r
3086 \r
3087                                                         fieldRegisterIndex += fields[j]->type()->totalRegisterCount();\r
3088                                                 }\r
3089 \r
3090                                                 ASSERT(false);\r
3091                                                 return fieldRegisterIndex;\r
3092                                         }\r
3093                                 }\r
3094                         }\r
3095                 }\r
3096 \r
3097                 return -1;\r
3098         }\r
3099 \r
3100         void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int blockId, BlockLayoutEncoder* encoder)\r
3101         {\r
3102                 const TStructure *structure = type.getStruct();\r
3103                 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;\r
3104 \r
3105                 if(!structure && !block)\r
3106                 {\r
3107                         ActiveUniforms &activeUniforms = shaderObject->activeUniforms;\r
3108                         const BlockMemberInfo blockInfo = encoder ? encoder->encodeType(type) : BlockMemberInfo::getDefaultBlockInfo();\r
3109                         if(blockId >= 0)\r
3110                         {\r
3111                                 blockDefinitions[blockId][registerIndex] = TypedMemberInfo(blockInfo, type);\r
3112                                 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());\r
3113                         }\r
3114                         int fieldRegisterIndex = encoder ? shaderObject->activeUniformBlocks[blockId].registerIndex + BlockLayoutEncoder::getBlockRegister(blockInfo) : registerIndex;\r
3115                         activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(),\r
3116                                                          fieldRegisterIndex, blockId, blockInfo));\r
3117                         if(IsSampler(type.getBasicType()))\r
3118                         {\r
3119                                 for(int i = 0; i < type.totalRegisterCount(); i++)\r
3120                                 {\r
3121                                         shader->declareSampler(fieldRegisterIndex + i);\r
3122                                 }\r
3123                         }\r
3124                 }\r
3125                 else if(block)\r
3126                 {\r
3127                         ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;\r
3128                         const TFieldList& fields = block->fields();\r
3129                         const TString &blockName = block->name();\r
3130                         int fieldRegisterIndex = registerIndex;\r
3131                         bool isUniformBlockMember = !type.isInterfaceBlock() && (blockId == -1);\r
3132 \r
3133                         blockId = activeUniformBlocks.size();\r
3134                         bool isRowMajor = block->matrixPacking() == EmpRowMajor;\r
3135                         activeUniformBlocks.push_back(UniformBlock(blockName.c_str(), 0, block->arraySize(),\r
3136                                                                    block->blockStorage(), isRowMajor, registerIndex, blockId));\r
3137                         blockDefinitions.push_back(BlockDefinitionIndexMap());\r
3138 \r
3139                         Std140BlockEncoder currentBlockEncoder(isRowMajor);\r
3140                         currentBlockEncoder.enterAggregateType();\r
3141                         for(size_t i = 0; i < fields.size(); i++)\r
3142                         {\r
3143                                 const TType &fieldType = *(fields[i]->type());\r
3144                                 const TString &fieldName = fields[i]->name();\r
3145                                 if(isUniformBlockMember && (fieldName == name))\r
3146                                 {\r
3147                                         registerIndex = fieldRegisterIndex;\r
3148                                 }\r
3149 \r
3150                                 const TString uniformName = block->hasInstanceName() ? blockName + "." + fieldName : fieldName;\r
3151 \r
3152                                 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, &currentBlockEncoder);\r
3153                                 fieldRegisterIndex += fieldType.totalRegisterCount();\r
3154                         }\r
3155                         currentBlockEncoder.exitAggregateType();\r
3156                         activeUniformBlocks[blockId].dataSize = currentBlockEncoder.getBlockSize();\r
3157                 }\r
3158                 else\r
3159                 {\r
3160                         int fieldRegisterIndex = registerIndex;\r
3161 \r
3162                         const TFieldList& fields = structure->fields();\r
3163                         if(type.isArray() && (structure || type.isInterfaceBlock()))\r
3164                         {\r
3165                                 for(int i = 0; i < type.getArraySize(); i++)\r
3166                                 {\r
3167                                         if(encoder)\r
3168                                         {\r
3169                                                 encoder->enterAggregateType();\r
3170                                         }\r
3171                                         for(size_t j = 0; j < fields.size(); j++)\r
3172                                         {\r
3173                                                 const TType &fieldType = *(fields[j]->type());\r
3174                                                 const TString &fieldName = fields[j]->name();\r
3175                                                 const TString uniformName = name + "[" + str(i) + "]." + fieldName;\r
3176 \r
3177                                                 declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);\r
3178                                                 fieldRegisterIndex += fieldType.totalRegisterCount();\r
3179                                         }\r
3180                                         if(encoder)\r
3181                                         {\r
3182                                                 encoder->exitAggregateType();\r
3183                                         }\r
3184                                 }\r
3185                         }\r
3186                         else\r
3187                         {\r
3188                                 if(encoder)\r
3189                                 {\r
3190                                         encoder->enterAggregateType();\r
3191                                 }\r
3192                                 for(size_t i = 0; i < fields.size(); i++)\r
3193                                 {\r
3194                                         const TType &fieldType = *(fields[i]->type());\r
3195                                         const TString &fieldName = fields[i]->name();\r
3196                                         const TString uniformName = name + "." + fieldName;\r
3197 \r
3198                                         declareUniform(fieldType, uniformName, fieldRegisterIndex, blockId, encoder);\r
3199                                         fieldRegisterIndex += fieldType.totalRegisterCount();\r
3200                                 }\r
3201                                 if(encoder)\r
3202                                 {\r
3203                                         encoder->exitAggregateType();\r
3204                                 }\r
3205                         }\r
3206                 }\r
3207         }\r
3208 \r
3209         GLenum OutputASM::glVariableType(const TType &type)\r
3210         {\r
3211                 switch(type.getBasicType())\r
3212                 {\r
3213                 case EbtFloat:\r
3214                         if(type.isScalar())\r
3215                         {\r
3216                                 return GL_FLOAT;\r
3217                         }\r
3218                         else if(type.isVector())\r
3219                         {\r
3220                                 switch(type.getNominalSize())\r
3221                                 {\r
3222                                 case 2: return GL_FLOAT_VEC2;\r
3223                                 case 3: return GL_FLOAT_VEC3;\r
3224                                 case 4: return GL_FLOAT_VEC4;\r
3225                                 default: UNREACHABLE(type.getNominalSize());\r
3226                                 }\r
3227                         }\r
3228                         else if(type.isMatrix())\r
3229                         {\r
3230                                 switch(type.getNominalSize())\r
3231                                 {\r
3232                                 case 2:\r
3233                                         switch(type.getSecondarySize())\r
3234                                         {\r
3235                                         case 2: return GL_FLOAT_MAT2;\r
3236                                         case 3: return GL_FLOAT_MAT2x3;\r
3237                                         case 4: return GL_FLOAT_MAT2x4;\r
3238                                         default: UNREACHABLE(type.getSecondarySize());\r
3239                                         }\r
3240                                 case 3:\r
3241                                         switch(type.getSecondarySize())\r
3242                                         {\r
3243                                         case 2: return GL_FLOAT_MAT3x2;\r
3244                                         case 3: return GL_FLOAT_MAT3;\r
3245                                         case 4: return GL_FLOAT_MAT3x4;\r
3246                                         default: UNREACHABLE(type.getSecondarySize());\r
3247                                         }\r
3248                                 case 4:\r
3249                                         switch(type.getSecondarySize())\r
3250                                         {\r
3251                                         case 2: return GL_FLOAT_MAT4x2;\r
3252                                         case 3: return GL_FLOAT_MAT4x3;\r
3253                                         case 4: return GL_FLOAT_MAT4;\r
3254                                         default: UNREACHABLE(type.getSecondarySize());\r
3255                                         }\r
3256                                 default: UNREACHABLE(type.getNominalSize());\r
3257                                 }\r
3258                         }\r
3259                         else UNREACHABLE(0);\r
3260                         break;\r
3261                 case EbtInt:\r
3262                         if(type.isScalar())\r
3263                         {\r
3264                                 return GL_INT;\r
3265                         }\r
3266                         else if(type.isVector())\r
3267                         {\r
3268                                 switch(type.getNominalSize())\r
3269                                 {\r
3270                                 case 2: return GL_INT_VEC2;\r
3271                                 case 3: return GL_INT_VEC3;\r
3272                                 case 4: return GL_INT_VEC4;\r
3273                                 default: UNREACHABLE(type.getNominalSize());\r
3274                                 }\r
3275                         }\r
3276                         else UNREACHABLE(0);\r
3277                         break;\r
3278                 case EbtUInt:\r
3279                         if(type.isScalar())\r
3280                         {\r
3281                                 return GL_UNSIGNED_INT;\r
3282                         }\r
3283                         else if(type.isVector())\r
3284                         {\r
3285                                 switch(type.getNominalSize())\r
3286                                 {\r
3287                                 case 2: return GL_UNSIGNED_INT_VEC2;\r
3288                                 case 3: return GL_UNSIGNED_INT_VEC3;\r
3289                                 case 4: return GL_UNSIGNED_INT_VEC4;\r
3290                                 default: UNREACHABLE(type.getNominalSize());\r
3291                                 }\r
3292                         }\r
3293                         else UNREACHABLE(0);\r
3294                         break;\r
3295                 case EbtBool:\r
3296                         if(type.isScalar())\r
3297                         {\r
3298                                 return GL_BOOL;\r
3299                         }\r
3300                         else if(type.isVector())\r
3301                         {\r
3302                                 switch(type.getNominalSize())\r
3303                                 {\r
3304                                 case 2: return GL_BOOL_VEC2;\r
3305                                 case 3: return GL_BOOL_VEC3;\r
3306                                 case 4: return GL_BOOL_VEC4;\r
3307                                 default: UNREACHABLE(type.getNominalSize());\r
3308                                 }\r
3309                         }\r
3310                         else UNREACHABLE(0);\r
3311                         break;\r
3312                 case EbtSampler2D:\r
3313                         return GL_SAMPLER_2D;\r
3314                 case EbtISampler2D:\r
3315                         return GL_INT_SAMPLER_2D;\r
3316                 case EbtUSampler2D:\r
3317                         return GL_UNSIGNED_INT_SAMPLER_2D;\r
3318                 case EbtSamplerCube:\r
3319                         return GL_SAMPLER_CUBE;\r
3320                 case EbtISamplerCube:\r
3321                         return GL_INT_SAMPLER_CUBE;\r
3322                 case EbtUSamplerCube:\r
3323                         return GL_UNSIGNED_INT_SAMPLER_CUBE;\r
3324                 case EbtSamplerExternalOES:\r
3325                         return GL_SAMPLER_EXTERNAL_OES;\r
3326                 case EbtSampler3D:\r
3327                         return GL_SAMPLER_3D_OES;\r
3328                 case EbtISampler3D:\r
3329                         return GL_INT_SAMPLER_3D;\r
3330                 case EbtUSampler3D:\r
3331                         return GL_UNSIGNED_INT_SAMPLER_3D;\r
3332                 case EbtSampler2DArray:\r
3333                         return GL_SAMPLER_2D_ARRAY;\r
3334                 case EbtISampler2DArray:\r
3335                         return GL_INT_SAMPLER_2D_ARRAY;\r
3336                 case EbtUSampler2DArray:\r
3337                         return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;\r
3338                 case EbtSampler2DShadow:\r
3339                         return GL_SAMPLER_2D_SHADOW;\r
3340                 case EbtSamplerCubeShadow:\r
3341                         return GL_SAMPLER_CUBE_SHADOW;\r
3342                 case EbtSampler2DArrayShadow:\r
3343                         return GL_SAMPLER_2D_ARRAY_SHADOW;\r
3344                 default:\r
3345                         UNREACHABLE(type.getBasicType());\r
3346                         break;\r
3347                 }\r
3348 \r
3349                 return GL_NONE;\r
3350         }\r
3351 \r
3352         GLenum OutputASM::glVariablePrecision(const TType &type)\r
3353         {\r
3354                 if(type.getBasicType() == EbtFloat)\r
3355                 {\r
3356                         switch(type.getPrecision())\r
3357                         {\r
3358                         case EbpHigh:   return GL_HIGH_FLOAT;\r
3359                         case EbpMedium: return GL_MEDIUM_FLOAT;\r
3360                         case EbpLow:    return GL_LOW_FLOAT;\r
3361                         case EbpUndefined:\r
3362                                 // Should be defined as the default precision by the parser\r
3363                         default: UNREACHABLE(type.getPrecision());\r
3364                         }\r
3365                 }\r
3366                 else if(type.getBasicType() == EbtInt)\r
3367                 {\r
3368                         switch(type.getPrecision())\r
3369                         {\r
3370                         case EbpHigh:   return GL_HIGH_INT;\r
3371                         case EbpMedium: return GL_MEDIUM_INT;\r
3372                         case EbpLow:    return GL_LOW_INT;\r
3373                         case EbpUndefined:\r
3374                                 // Should be defined as the default precision by the parser\r
3375                         default: UNREACHABLE(type.getPrecision());\r
3376                         }\r
3377                 }\r
3378 \r
3379                 // Other types (boolean, sampler) don't have a precision\r
3380                 return GL_NONE;\r
3381         }\r
3382 \r
3383         int OutputASM::dim(TIntermNode *v)\r
3384         {\r
3385                 TIntermTyped *vector = v->getAsTyped();\r
3386                 ASSERT(vector && vector->isRegister());\r
3387                 return vector->getNominalSize();\r
3388         }\r
3389 \r
3390         int OutputASM::dim2(TIntermNode *m)\r
3391         {\r
3392                 TIntermTyped *matrix = m->getAsTyped();\r
3393                 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());\r
3394                 return matrix->getSecondarySize();\r
3395         }\r
3396 \r
3397         // Returns ~0u if no loop count could be determined\r
3398         unsigned int OutputASM::loopCount(TIntermLoop *node)\r
3399         {\r
3400                 // Parse loops of the form:\r
3401                 // for(int index = initial; index [comparator] limit; index += increment)\r
3402                 TIntermSymbol *index = 0;\r
3403                 TOperator comparator = EOpNull;\r
3404                 int initial = 0;\r
3405                 int limit = 0;\r
3406                 int increment = 0;\r
3407 \r
3408                 // Parse index name and intial value\r
3409                 if(node->getInit())\r
3410                 {\r
3411                         TIntermAggregate *init = node->getInit()->getAsAggregate();\r
3412 \r
3413                         if(init)\r
3414                         {\r
3415                                 TIntermSequence &sequence = init->getSequence();\r
3416                                 TIntermTyped *variable = sequence[0]->getAsTyped();\r
3417 \r
3418                                 if(variable && variable->getQualifier() == EvqTemporary)\r
3419                                 {\r
3420                                         TIntermBinary *assign = variable->getAsBinaryNode();\r
3421 \r
3422                                         if(assign->getOp() == EOpInitialize)\r
3423                                         {\r
3424                                                 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();\r
3425                                                 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();\r
3426 \r
3427                                                 if(symbol && constant)\r
3428                                                 {\r
3429                                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3430                                                         {\r
3431                                                                 index = symbol;\r
3432                                                                 initial = constant->getUnionArrayPointer()[0].getIConst();\r
3433                                                         }\r
3434                                                 }\r
3435                                         }\r
3436                                 }\r
3437                         }\r
3438                 }\r
3439 \r
3440                 // Parse comparator and limit value\r
3441                 if(index && node->getCondition())\r
3442                 {\r
3443                         TIntermBinary *test = node->getCondition()->getAsBinaryNode();\r
3444 \r
3445                         if(test && test->getLeft()->getAsSymbolNode()->getId() == index->getId())\r
3446                         {\r
3447                                 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();\r
3448 \r
3449                                 if(constant)\r
3450                                 {\r
3451                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3452                                         {\r
3453                                                 comparator = test->getOp();\r
3454                                                 limit = constant->getUnionArrayPointer()[0].getIConst();\r
3455                                         }\r
3456                                 }\r
3457                         }\r
3458                 }\r
3459 \r
3460                 // Parse increment\r
3461                 if(index && comparator != EOpNull && node->getExpression())\r
3462                 {\r
3463                         TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();\r
3464                         TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();\r
3465 \r
3466                         if(binaryTerminal)\r
3467                         {\r
3468                                 TOperator op = binaryTerminal->getOp();\r
3469                                 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();\r
3470 \r
3471                                 if(constant)\r
3472                                 {\r
3473                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3474                                         {\r
3475                                                 int value = constant->getUnionArrayPointer()[0].getIConst();\r
3476 \r
3477                                                 switch(op)\r
3478                                                 {\r
3479                                                 case EOpAddAssign: increment = value;  break;\r
3480                                                 case EOpSubAssign: increment = -value; break;\r
3481                                                 default: UNIMPLEMENTED();\r
3482                                                 }\r
3483                                         }\r
3484                                 }\r
3485                         }\r
3486                         else if(unaryTerminal)\r
3487                         {\r
3488                                 TOperator op = unaryTerminal->getOp();\r
3489 \r
3490                                 switch(op)\r
3491                                 {\r
3492                                 case EOpPostIncrement: increment = 1;  break;\r
3493                                 case EOpPostDecrement: increment = -1; break;\r
3494                                 case EOpPreIncrement:  increment = 1;  break;\r
3495                                 case EOpPreDecrement:  increment = -1; break;\r
3496                                 default: UNIMPLEMENTED();\r
3497                                 }\r
3498                         }\r
3499                 }\r
3500 \r
3501                 if(index && comparator != EOpNull && increment != 0)\r
3502                 {\r
3503                         if(comparator == EOpLessThanEqual)\r
3504                         {\r
3505                                 comparator = EOpLessThan;\r
3506                                 limit += 1;\r
3507                         }\r
3508 \r
3509                         if(comparator == EOpLessThan)\r
3510                         {\r
3511                                 int iterations = (limit - initial) / increment;\r
3512 \r
3513                                 if(iterations <= 0)\r
3514                                 {\r
3515                                         iterations = 0;\r
3516                                 }\r
3517 \r
3518                                 return iterations;\r
3519                         }\r
3520                         else UNIMPLEMENTED();   // Falls through\r
3521                 }\r
3522 \r
3523                 return ~0u;\r
3524         }\r
3525 \r
3526         bool LoopUnrollable::traverse(TIntermNode *node)\r
3527         {\r
3528                 loopDepth = 0;\r
3529                 loopUnrollable = true;\r
3530 \r
3531                 node->traverse(this);\r
3532 \r
3533                 return loopUnrollable;\r
3534         }\r
3535 \r
3536         bool LoopUnrollable::visitLoop(Visit visit, TIntermLoop *loop)\r
3537         {\r
3538                 if(visit == PreVisit)\r
3539                 {\r
3540                         loopDepth++;\r
3541                 }\r
3542                 else if(visit == PostVisit)\r
3543                 {\r
3544                         loopDepth++;\r
3545                 }\r
3546 \r
3547                 return true;\r
3548         }\r
3549 \r
3550         bool LoopUnrollable::visitBranch(Visit visit, TIntermBranch *node)\r
3551         {\r
3552                 if(!loopUnrollable)\r
3553                 {\r
3554                         return false;\r
3555                 }\r
3556 \r
3557                 if(!loopDepth)\r
3558                 {\r
3559                         return true;\r
3560                 }\r
3561 \r
3562                 switch(node->getFlowOp())\r
3563                 {\r
3564                 case EOpKill:\r
3565                 case EOpReturn:\r
3566                         break;\r
3567                 case EOpBreak:\r
3568                 case EOpContinue:\r
3569                         loopUnrollable = false;\r
3570                         break;\r
3571                 default: UNREACHABLE(node->getFlowOp());\r
3572                 }\r
3573 \r
3574                 return loopUnrollable;\r
3575         }\r
3576 \r
3577         bool LoopUnrollable::visitAggregate(Visit visit, TIntermAggregate *node)\r
3578         {\r
3579                 return loopUnrollable;\r
3580         }\r
3581 }\r