OSDN Git Service

Matrix determinant and inverse implementation
[android-x86/external-swiftshader.git] / src / OpenGL / compiler / OutputASM.cpp
1 // SwiftShader Software Renderer\r
2 //\r
3 // Copyright(c) 2005-2013 TransGaming Inc.\r
4 //\r
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,\r
6 // transcribed, stored in a retrieval system, translated into any human or computer\r
7 // language by any means, or disclosed to third parties without the explicit written\r
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express\r
9 // or implied, including but not limited to any patent rights, are granted to you.\r
10 //\r
11 \r
12 #include "OutputASM.h"\r
13 \r
14 #include "common/debug.h"\r
15 #include "InfoSink.h"\r
16 \r
17 #include "libGLESv2/Shader.h"\r
18 \r
19 #include <GLES2/gl2.h>\r
20 #include <GLES2/gl2ext.h>\r
21 #include <GLES3/gl3.h>\r
22 \r
23 namespace glsl\r
24 {\r
25         // Integer to TString conversion\r
26         TString str(int i)\r
27         {\r
28                 char buffer[20];\r
29                 sprintf(buffer, "%d", i);\r
30                 return buffer;\r
31         }\r
32 \r
33         class Temporary : public TIntermSymbol\r
34         {\r
35         public:\r
36                 Temporary(OutputASM *assembler) : TIntermSymbol(TSymbolTableLevel::nextUniqueId(), "tmp", TType(EbtFloat, EbpHigh, EvqTemporary, 4, 1, false)), assembler(assembler)\r
37                 {\r
38                 }\r
39 \r
40                 ~Temporary()\r
41                 {\r
42                         assembler->freeTemporary(this);\r
43                 }\r
44 \r
45         private:\r
46                 OutputASM *const assembler;\r
47         };\r
48 \r
49         class Constant : public TIntermConstantUnion\r
50         {\r
51         public:\r
52                 Constant(float x, float y, float z, float w) : TIntermConstantUnion(constants, TType(EbtFloat, EbpHigh, EvqConstExpr, 4, 1, false))\r
53                 {\r
54                         constants[0].setFConst(x);\r
55                         constants[1].setFConst(y);\r
56                         constants[2].setFConst(z);\r
57                         constants[3].setFConst(w);\r
58                 }\r
59 \r
60                 Constant(bool b) : TIntermConstantUnion(constants, TType(EbtBool, EbpHigh, EvqConstExpr, 1, 1, false))\r
61                 {\r
62                         constants[0].setBConst(b);\r
63                 }\r
64 \r
65                 Constant(int i) : TIntermConstantUnion(constants, TType(EbtInt, EbpHigh, EvqConstExpr, 1, 1, false))\r
66                 {\r
67                         constants[0].setIConst(i);\r
68                 }\r
69 \r
70                 ~Constant()\r
71                 {\r
72                 }\r
73 \r
74         private:\r
75                 ConstantUnion constants[4];\r
76         };\r
77 \r
78         Uniform::Uniform(GLenum type, GLenum precision, const std::string &name, int arraySize, int registerIndex, int offset, int blockId) :\r
79                 type(type), precision(precision), name(name), arraySize(arraySize), registerIndex(registerIndex), offset(offset), blockId(blockId)\r
80         {\r
81         }\r
82 \r
83         UniformBlock::UniformBlock(const std::string& name, const std::string& instanceName, unsigned int dataSize, unsigned int arraySize,\r
84                                    TLayoutBlockStorage layout, bool isRowMajorLayout, int registerIndex, int blockId) :\r
85                 name(name), instanceName(instanceName), dataSize(dataSize), arraySize(arraySize), layout(layout),\r
86                 isRowMajorLayout(isRowMajorLayout), registerIndex(registerIndex), blockId(blockId)\r
87         {\r
88         }\r
89 \r
90         Attribute::Attribute()\r
91         {\r
92                 type = GL_NONE;\r
93                 arraySize = 0;\r
94                 registerIndex = 0;\r
95         }\r
96 \r
97         Attribute::Attribute(GLenum type, const std::string &name, int arraySize, int location, int registerIndex)\r
98         {\r
99                 this->type = type;\r
100                 this->name = name;\r
101                 this->arraySize = arraySize;\r
102                 this->location = location;\r
103                 this->registerIndex = registerIndex;\r
104         }\r
105 \r
106         sw::PixelShader *Shader::getPixelShader() const\r
107         {\r
108                 return 0;\r
109         }\r
110 \r
111         sw::VertexShader *Shader::getVertexShader() const\r
112         {\r
113                 return 0;\r
114         }\r
115 \r
116         OutputASM::OutputASM(TParseContext &context, Shader *shaderObject) : TIntermTraverser(true, true, true), mContext(context), shaderObject(shaderObject)\r
117         {\r
118                 shader = 0;\r
119                 pixelShader = 0;\r
120                 vertexShader = 0;\r
121 \r
122                 if(shaderObject)\r
123                 {\r
124                         shader = shaderObject->getShader();\r
125                         pixelShader = shaderObject->getPixelShader();\r
126                         vertexShader = shaderObject->getVertexShader();\r
127                 }\r
128 \r
129                 functionArray.push_back(Function(0, "main(", 0, 0));\r
130                 currentFunction = 0;\r
131                 outputQualifier = EvqOutput; // Set outputQualifier to any value other than EvqFragColor or EvqFragData\r
132         }\r
133 \r
134         OutputASM::~OutputASM()\r
135         {\r
136         }\r
137 \r
138         void OutputASM::output()\r
139         {\r
140                 if(shader)\r
141                 {\r
142                         emitShader(GLOBAL);\r
143 \r
144                         if(functionArray.size() > 1)   // Only call main() when there are other functions\r
145                         {\r
146                                 Instruction *callMain = emit(sw::Shader::OPCODE_CALL);\r
147                                 callMain->dst.type = sw::Shader::PARAMETER_LABEL;\r
148                                 callMain->dst.index = 0;   // main()\r
149 \r
150                                 emit(sw::Shader::OPCODE_RET);\r
151                         }\r
152 \r
153                         emitShader(FUNCTION);\r
154                 }\r
155         }\r
156 \r
157         void OutputASM::emitShader(Scope scope)\r
158         {\r
159                 emitScope = scope;\r
160                 currentScope = GLOBAL;\r
161                 mContext.getTreeRoot()->traverse(this);\r
162         }\r
163 \r
164         void OutputASM::freeTemporary(Temporary *temporary)\r
165         {\r
166                 free(temporaries, temporary);\r
167         }\r
168 \r
169         sw::Shader::Opcode OutputASM::getOpcode(sw::Shader::Opcode op, TIntermTyped *in) const\r
170         {\r
171                 TBasicType baseType = in->getType().getBasicType();\r
172 \r
173                 switch(op)\r
174                 {\r
175                 case sw::Shader::OPCODE_NEG:\r
176                         switch(baseType)\r
177                         {\r
178                         case EbtInt:\r
179                         case EbtUInt:\r
180                                 return sw::Shader::OPCODE_INEG;\r
181                         case EbtFloat:\r
182                         default:\r
183                                 return op;\r
184                         }\r
185                 case sw::Shader::OPCODE_ADD:\r
186                         switch(baseType)\r
187                         {\r
188                         case EbtInt:\r
189                         case EbtUInt:\r
190                                 return sw::Shader::OPCODE_IADD;\r
191                         case EbtFloat:\r
192                         default:\r
193                                 return op;\r
194                         }\r
195                 case sw::Shader::OPCODE_SUB:\r
196                         switch(baseType)\r
197                         {\r
198                         case EbtInt:\r
199                         case EbtUInt:\r
200                                 return sw::Shader::OPCODE_ISUB;\r
201                         case EbtFloat:\r
202                         default:\r
203                                 return op;\r
204                         }\r
205                 case sw::Shader::OPCODE_MUL:\r
206                         switch(baseType)\r
207                         {\r
208                         case EbtInt:\r
209                         case EbtUInt:\r
210                                 return sw::Shader::OPCODE_IMUL;\r
211                         case EbtFloat:\r
212                         default:\r
213                                 return op;\r
214                         }\r
215                 case sw::Shader::OPCODE_DIV:\r
216                         switch(baseType)\r
217                         {\r
218                         case EbtInt:\r
219                                 return sw::Shader::OPCODE_IDIV;\r
220                         case EbtUInt:\r
221                                 return sw::Shader::OPCODE_UDIV;\r
222                         case EbtFloat:\r
223                         default:\r
224                                 return op;\r
225                         }\r
226                 case sw::Shader::OPCODE_IMOD:\r
227                         return baseType == EbtUInt ? sw::Shader::OPCODE_UMOD : op;\r
228                 case sw::Shader::OPCODE_ISHR:\r
229                         return baseType == EbtUInt ? sw::Shader::OPCODE_USHR : op;\r
230                 case sw::Shader::OPCODE_MIN:\r
231                         switch(baseType)\r
232                         {\r
233                         case EbtInt:\r
234                                 return sw::Shader::OPCODE_IMIN;\r
235                         case EbtUInt:\r
236                                 return sw::Shader::OPCODE_UMIN;\r
237                         case EbtFloat:\r
238                         default:\r
239                                 return op;\r
240                         }\r
241                 case sw::Shader::OPCODE_MAX:\r
242                         switch(baseType)\r
243                         {\r
244                         case EbtInt:\r
245                                 return sw::Shader::OPCODE_IMAX;\r
246                         case EbtUInt:\r
247                                 return sw::Shader::OPCODE_UMAX;\r
248                         case EbtFloat:\r
249                         default:\r
250                                 return op;\r
251                         }\r
252                 default:\r
253                         return op;\r
254                 }\r
255         }\r
256 \r
257         void OutputASM::visitSymbol(TIntermSymbol *symbol)\r
258         {\r
259                 // Vertex varyings don't have to be actively used to successfully link\r
260                 // against pixel shaders that use them. So make sure they're declared.\r
261                 if(symbol->getQualifier() == EvqVaryingOut || symbol->getQualifier() == EvqInvariantVaryingOut || symbol->getQualifier() == EvqVertexOut)\r
262                 {\r
263                         if(symbol->getBasicType() != EbtInvariant)   // Typeless declarations are not new varyings\r
264                         {\r
265                                 declareVarying(symbol, -1);\r
266                         }\r
267                 }\r
268         }\r
269 \r
270         bool OutputASM::visitBinary(Visit visit, TIntermBinary *node)\r
271         {\r
272                 if(currentScope != emitScope)\r
273                 {\r
274                         return false;\r
275                 }\r
276 \r
277                 TIntermTyped *result = node;\r
278                 TIntermTyped *left = node->getLeft();\r
279                 TIntermTyped *right = node->getRight();\r
280                 const TType &leftType = left->getType();\r
281                 const TType &rightType = right->getType();\r
282                 const TType &resultType = node->getType();\r
283                 \r
284                 switch(node->getOp())\r
285                 {\r
286                 case EOpAssign:\r
287                         if(visit == PostVisit)\r
288                         {\r
289                                 assignLvalue(left, right);\r
290                                 copy(result, right);\r
291                         }\r
292                         break;\r
293                 case EOpInitialize:\r
294                         if(visit == PostVisit)\r
295                         {\r
296                                 copy(left, right);\r
297                         }\r
298                         break;\r
299                 case EOpMatrixTimesScalarAssign:\r
300                         if(visit == PostVisit)\r
301                         {\r
302                                 for(int i = 0; i < leftType.getNominalSize(); i++)\r
303                                 {\r
304                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
305                                         mul->dst.index += i;\r
306                                         argument(mul->src[0], left, i);\r
307                                 }\r
308 \r
309                                 assignLvalue(left, result);\r
310                         }\r
311                         break;\r
312                 case EOpVectorTimesMatrixAssign:\r
313                         if(visit == PostVisit)\r
314                         {\r
315                                 int size = leftType.getNominalSize();\r
316 \r
317                                 for(int i = 0; i < size; i++)\r
318                                 {\r
319                                         Instruction *dot = emit(sw::Shader::OPCODE_DP(size), result, left, right);\r
320                                         dot->dst.mask = 1 << i;\r
321                                         argument(dot->src[1], right, i);\r
322                                 }\r
323 \r
324                                 assignLvalue(left, result);\r
325                         }\r
326                         break;\r
327                 case EOpMatrixTimesMatrixAssign:\r
328                         if(visit == PostVisit)\r
329                         {\r
330                                 int dim = leftType.getNominalSize();\r
331 \r
332                                 for(int i = 0; i < dim; i++)\r
333                                 {\r
334                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
335                                         mul->dst.index += i;\r
336                                         argument(mul->src[1], right, i);\r
337                                         mul->src[1].swizzle = 0x00;\r
338 \r
339                                         for(int j = 1; j < dim; j++)\r
340                                         {\r
341                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, left, right, result);\r
342                                                 mad->dst.index += i;\r
343                                                 argument(mad->src[0], left, j);\r
344                                                 argument(mad->src[1], right, i);\r
345                                                 mad->src[1].swizzle = j * 0x55;\r
346                                                 argument(mad->src[2], result, i);\r
347                                         }\r
348                                 }\r
349 \r
350                                 assignLvalue(left, result);\r
351                         }\r
352                         break;\r
353                 case EOpIndexDirect:\r
354                         if(visit == PostVisit)\r
355                         {\r
356                                 int index = right->getAsConstantUnion()->getIConst(0);\r
357 \r
358                                 if(result->isMatrix() || result->isStruct())\r
359                                 {\r
360                                         ASSERT(left->isArray());\r
361                                         copy(result, left, index * left->elementRegisterCount());\r
362                                 }\r
363                                 else if(result->isRegister())\r
364                                 {\r
365                                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
366 \r
367                                         if(left->isRegister())\r
368                                         {\r
369                                                 mov->src[0].swizzle = index;\r
370                                         }\r
371                                         else if(left->isArray())\r
372                                         {\r
373                                                 argument(mov->src[0], left, index * left->elementRegisterCount());\r
374                                         }\r
375                                         else if(left->isMatrix())\r
376                                         {\r
377                                                 ASSERT(index < left->getNominalSize());   // FIXME: Report semantic error\r
378                                                 argument(mov->src[0], left, index);\r
379                                         }\r
380                                         else UNREACHABLE(0);\r
381                                 }\r
382                                 else UNREACHABLE(0);\r
383                         }\r
384                         break;\r
385                 case EOpIndexIndirect:\r
386                         if(visit == PostVisit)\r
387                         {\r
388                                 if(left->isArray() || left->isMatrix())\r
389                                 {\r
390                                         for(int index = 0; index < result->totalRegisterCount(); index++)\r
391                                         {\r
392                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
393                                                 mov->dst.index += index;\r
394                                                 mov->dst.mask = writeMask(result, index);\r
395                                                 argument(mov->src[0], left, index);\r
396 \r
397                                                 if(left->totalRegisterCount() > 1)\r
398                                                 {\r
399                                                         sw::Shader::SourceParameter relativeRegister;\r
400                                                         argument(relativeRegister, right);\r
401 \r
402                                                         mov->src[0].rel.type = relativeRegister.type;\r
403                                                         mov->src[0].rel.index = relativeRegister.index;\r
404                                                         mov->src[0].rel.scale = result->totalRegisterCount();\r
405                                                         mov->src[0].rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
406                                                 }\r
407                                         }\r
408                                 }\r
409                                 else if(left->isRegister())\r
410                                 {\r
411                                         emit(sw::Shader::OPCODE_EXTRACT, result, left, right);\r
412                                 }\r
413                                 else UNREACHABLE(0);\r
414                         }\r
415                         break;\r
416                 case EOpIndexDirectStruct:\r
417                 case EOpIndexDirectInterfaceBlock:\r
418                         if(visit == PostVisit)\r
419                         {\r
420                                 ASSERT(leftType.isStruct() || (leftType.isInterfaceBlock()));\r
421 \r
422                                 const TFieldList& fields = (node->getOp() == EOpIndexDirectStruct) ?\r
423                                                            leftType.getStruct()->fields() :\r
424                                                            leftType.getInterfaceBlock()->fields();\r
425                                 int index = right->getAsConstantUnion()->getIConst(0);\r
426                                 int fieldOffset = 0;\r
427 \r
428                                 for(int i = 0; i < index; i++)\r
429                                 {\r
430                                         fieldOffset += fields[i]->type()->totalRegisterCount();\r
431                                 }\r
432 \r
433                                 copy(result, left, fieldOffset);\r
434                         }\r
435                         break;\r
436                 case EOpVectorSwizzle:\r
437                         if(visit == PostVisit)\r
438                         {\r
439                                 int swizzle = 0;\r
440                                 TIntermAggregate *components = right->getAsAggregate();\r
441 \r
442                                 if(components)\r
443                                 {\r
444                                         TIntermSequence &sequence = components->getSequence();\r
445                                         int component = 0;\r
446 \r
447                                         for(TIntermSequence::iterator sit = sequence.begin(); sit != sequence.end(); sit++)\r
448                                         {\r
449                                                 TIntermConstantUnion *element = (*sit)->getAsConstantUnion();\r
450 \r
451                                                 if(element)\r
452                                                 {\r
453                                                         int i = element->getUnionArrayPointer()[0].getIConst();\r
454                                                         swizzle |= i << (component * 2);\r
455                                                         component++;\r
456                                                 }\r
457                                                 else UNREACHABLE(0);\r
458                                         }\r
459                                 }\r
460                                 else UNREACHABLE(0);\r
461 \r
462                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, left);\r
463                                 mov->src[0].swizzle = swizzle;\r
464                         }\r
465                         break;\r
466                 case EOpAddAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, left, right); break;\r
467                 case EOpAdd:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ADD, result), result, left, right);       break;\r
468                 case EOpSubAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, left, right); break;\r
469                 case EOpSub:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_SUB, result), result, left, right);       break;\r
470                 case EOpMulAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, left, right); break;\r
471                 case EOpMul:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_MUL, result), result, left, right);       break;\r
472                 case EOpDivAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, left, right); break;\r
473                 case EOpDiv:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_DIV, result), result, left, right);       break;\r
474                 case EOpIModAssign:          if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, left, right); break;\r
475                 case EOpIMod:                if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_IMOD, result), result, left, right);       break;\r
476                 case EOpBitShiftLeftAssign:  if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_SHL, result, left, left, right); break;\r
477                 case EOpBitShiftLeft:        if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_SHL, result, left, right);       break;\r
478                 case EOpBitShiftRightAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, left, right); break;\r
479                 case EOpBitShiftRight:       if(visit == PostVisit) emitBinary(getOpcode(sw::Shader::OPCODE_ISHR, result), result, left, right);       break;\r
480                 case EOpBitwiseAndAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_AND, result, left, left, right); break;\r
481                 case EOpBitwiseAnd:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_AND, result, left, right);       break;\r
482                 case EOpBitwiseXorAssign:    if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_XOR, result, left, left, right); break;\r
483                 case EOpBitwiseXor:          if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_XOR, result, left, right);       break;\r
484                 case EOpBitwiseOrAssign:     if(visit == PostVisit) emitAssign(sw::Shader::OPCODE_OR, result, left, left, right);  break;\r
485                 case EOpBitwiseOr:           if(visit == PostVisit) emitBinary(sw::Shader::OPCODE_OR, result, left, right);        break;\r
486                 case EOpEqual:\r
487                         if(visit == PostVisit)\r
488                         {\r
489                                 emitBinary(sw::Shader::OPCODE_EQ, result, left, right);\r
490 \r
491                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
492                                 {\r
493                                         Temporary equal(this);\r
494                                         Instruction *eq = emit(sw::Shader::OPCODE_EQ, &equal, left, right);\r
495                                         argument(eq->src[0], left, index);\r
496                                         argument(eq->src[1], right, index);\r
497                                         emit(sw::Shader::OPCODE_AND, result, result, &equal);\r
498                                 }\r
499                         }\r
500                         break;\r
501                 case EOpNotEqual:\r
502                         if(visit == PostVisit)\r
503                         {\r
504                                 emitBinary(sw::Shader::OPCODE_NE, result, left, right);\r
505 \r
506                                 for(int index = 1; index < left->totalRegisterCount(); index++)\r
507                                 {\r
508                                         Temporary notEqual(this);\r
509                                         Instruction *eq = emit(sw::Shader::OPCODE_NE, &notEqual, left, right);\r
510                                         argument(eq->src[0], left, index);\r
511                                         argument(eq->src[1], right, index);\r
512                                         emit(sw::Shader::OPCODE_OR, result, result, &notEqual);\r
513                                 }\r
514                         }\r
515                         break;\r
516                 case EOpLessThan:                if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, left, right); break;\r
517                 case EOpGreaterThan:             if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, left, right); break;\r
518                 case EOpLessThanEqual:           if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, left, right); break;\r
519                 case EOpGreaterThanEqual:        if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, left, right); break;\r
520                 case EOpVectorTimesScalarAssign: if(visit == PostVisit) emitAssign(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, left, right); break;\r
521                 case EOpVectorTimesScalar:       if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MUL, left), result, left, right); break;\r
522                 case EOpMatrixTimesScalar:\r
523                         if(visit == PostVisit)\r
524                         {\r
525                                 for(int i = 0; i < leftType.getNominalSize(); i++)\r
526                                 {\r
527                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
528                                         mul->dst.index += i;\r
529                                         argument(mul->src[0], left, i);\r
530                                 }\r
531                         }\r
532                         break;\r
533                 case EOpVectorTimesMatrix:\r
534                         if(visit == PostVisit)\r
535                         {\r
536                                 sw::Shader::Opcode dpOpcode = sw::Shader::OPCODE_DP(leftType.getNominalSize());\r
537 \r
538                                 int size = rightType.getNominalSize();\r
539                                 for(int i = 0; i < size; i++)\r
540                                 {\r
541                                         Instruction *dot = emit(dpOpcode, result, left, right);\r
542                                         dot->dst.mask = 1 << i;\r
543                                         argument(dot->src[1], right, i);\r
544                                 }\r
545                         }\r
546                         break;\r
547                 case EOpMatrixTimesVector:\r
548                         if(visit == PostVisit)\r
549                         {\r
550                                 Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
551                                 mul->src[1].swizzle = 0x00;\r
552 \r
553                                 int size = rightType.getNominalSize();\r
554                                 for(int i = 1; i < size; i++)\r
555                                 {\r
556                                         Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, left, right, result);\r
557                                         argument(mad->src[0], left, i);\r
558                                         mad->src[1].swizzle = i * 0x55;\r
559                                 }\r
560                         }\r
561                         break;\r
562                 case EOpMatrixTimesMatrix:\r
563                         if(visit == PostVisit)\r
564                         {\r
565                                 int dim = leftType.getNominalSize();\r
566 \r
567                                 int size = rightType.getNominalSize();\r
568                                 for(int i = 0; i < size; i++)\r
569                                 {\r
570                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, left, right);\r
571                                         mul->dst.index += i;\r
572                                         argument(mul->src[1], right, i);\r
573                                         mul->src[1].swizzle = 0x00;\r
574 \r
575                                         for(int j = 1; j < dim; j++)\r
576                                         {\r
577                                                 Instruction *mad = emit(sw::Shader::OPCODE_MAD, result, left, right, result);\r
578                                                 mad->dst.index += i;\r
579                                                 argument(mad->src[0], left, j);\r
580                                                 argument(mad->src[1], right, i);\r
581                                                 mad->src[1].swizzle = j * 0x55;\r
582                                                 argument(mad->src[2], result, i);\r
583                                         }\r
584                                 }\r
585                         }\r
586                         break;\r
587                 case EOpLogicalOr:\r
588                         if(trivial(right, 6))\r
589                         {\r
590                                 if(visit == PostVisit)\r
591                                 {\r
592                                         emit(sw::Shader::OPCODE_OR, result, left, right);\r
593                                 }\r
594                         }\r
595                         else   // Short-circuit evaluation\r
596                         {\r
597                                 if(visit == InVisit)\r
598                                 {\r
599                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
600                                         Instruction *ifnot = emit(sw::Shader::OPCODE_IF, 0, result);\r
601                                         ifnot->src[0].modifier = sw::Shader::MODIFIER_NOT;\r
602                                 }\r
603                                 else if(visit == PostVisit)\r
604                                 {\r
605                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
606                                         emit(sw::Shader::OPCODE_ENDIF);\r
607                                 }\r
608                         }\r
609                         break;\r
610                 case EOpLogicalXor:        if(visit == PostVisit) emit(sw::Shader::OPCODE_XOR, result, left, right); break;\r
611                 case EOpLogicalAnd:\r
612                         if(trivial(right, 6))\r
613                         {\r
614                                 if(visit == PostVisit)\r
615                                 {\r
616                                         emit(sw::Shader::OPCODE_AND, result, left, right);\r
617                                 }\r
618                         }\r
619                         else   // Short-circuit evaluation\r
620                         {\r
621                                 if(visit == InVisit)\r
622                                 {\r
623                                         emit(sw::Shader::OPCODE_MOV, result, left);\r
624                                         emit(sw::Shader::OPCODE_IF, 0, result);\r
625                                 }\r
626                                 else if(visit == PostVisit)\r
627                                 {\r
628                                         emit(sw::Shader::OPCODE_MOV, result, right);\r
629                                         emit(sw::Shader::OPCODE_ENDIF);\r
630                                 }\r
631                         }\r
632                         break;\r
633                 default: UNREACHABLE(node->getOp());\r
634                 }\r
635 \r
636                 return true;\r
637         }\r
638 \r
639         void OutputASM::emitDeterminant(TIntermTyped *result, TIntermTyped *arg, int size, int col, int row, int outCol, int outRow)\r
640         {\r
641                 switch(size)\r
642                 {\r
643                 case 1: // Used for cofactor computation only\r
644                         {\r
645                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
646                                 bool isMov = (row == col);\r
647                                 sw::Shader::Opcode op = isMov ? sw::Shader::OPCODE_MOV : sw::Shader::OPCODE_NEG;\r
648                                 Instruction *mov = emit(op, result, arg);\r
649                                 mov->src[0].index += isMov ? 1 - row : row;\r
650                                 mov->src[0].swizzle = 0x55 * (isMov ? 1 - col : col);\r
651                                 mov->dst.index += outCol;\r
652                                 mov->dst.mask = 1 << outRow;\r
653                         }\r
654                         break;\r
655                 case 2:\r
656                         {\r
657                                 static const unsigned int swizzle[3] = { 0x99, 0x88, 0x44 }; // xy?? : yzyz, xzxz, xyxy\r
658 \r
659                                 bool isCofactor = (col >= 0) && (row >= 0);\r
660                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
661                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
662                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
663 \r
664                                 Instruction *det = emit(sw::Shader::OPCODE_DET2, result, arg, arg);\r
665                                 det->src[0].index += negate ? col1 : col0;\r
666                                 det->src[1].index += negate ? col0 : col1;\r
667                                 det->src[0].swizzle = det->src[1].swizzle = swizzle[isCofactor ? row : 2];\r
668                                 det->dst.index += outCol;\r
669                                 det->dst.mask = 1 << outRow;\r
670                         }\r
671                         break;\r
672                 case 3:\r
673                         {\r
674                                 static const unsigned int swizzle[4] = { 0xF9, 0xF8, 0xF4, 0xE4 }; // xyz? : yzww, xzww, xyww, xyzw\r
675 \r
676                                 bool isCofactor = (col >= 0) && (row >= 0);\r
677                                 int col0 = (isCofactor && (col <= 0)) ? 1 : 0;\r
678                                 int col1 = (isCofactor && (col <= 1)) ? 2 : 1;\r
679                                 int col2 = (isCofactor && (col <= 2)) ? 3 : 2;\r
680                                 bool negate = isCofactor && ((col & 0x01) ^ (row & 0x01));\r
681 \r
682                                 Instruction *det = emit(sw::Shader::OPCODE_DET3, result, arg, arg, arg);\r
683                                 det->src[0].index += col0;\r
684                                 det->src[1].index += negate ? col2 : col1;\r
685                                 det->src[2].index += negate ? col1 : col2;\r
686                                 det->src[0].swizzle = det->src[1].swizzle = det->src[2].swizzle = swizzle[isCofactor ? row : 3];\r
687                                 det->dst.index += outCol;\r
688                                 det->dst.mask = 1 << outRow;\r
689                         }\r
690                         break;\r
691                 case 4:\r
692                         {\r
693                                 Instruction *det = emit(sw::Shader::OPCODE_DET4, result, arg, arg, arg, arg);\r
694                                 det->src[1].index += 1;\r
695                                 det->src[2].index += 2;\r
696                                 det->src[3].index += 3;\r
697                                 det->dst.index += outCol;\r
698                                 det->dst.mask = 1 << outRow;\r
699                         }\r
700                         break;\r
701                 default:\r
702                         UNREACHABLE(size);\r
703                         break;\r
704                 }\r
705         }\r
706 \r
707         bool OutputASM::visitUnary(Visit visit, TIntermUnary *node)\r
708         {\r
709                 if(currentScope != emitScope)\r
710                 {\r
711                         return false;\r
712                 }\r
713 \r
714                 TIntermTyped *result = node;\r
715                 TIntermTyped *arg = node->getOperand();\r
716                 TBasicType basicType = arg->getType().getBasicType();\r
717 \r
718                 union\r
719                 {\r
720                         float f;\r
721                         int i;\r
722                 } one_value;\r
723 \r
724                 if(basicType == EbtInt || basicType == EbtUInt)\r
725                 {\r
726                         one_value.i = 1;\r
727                 }\r
728                 else\r
729                 {\r
730                         one_value.f = 1.0f;\r
731                 }\r
732 \r
733                 Constant one(one_value.f, one_value.f, one_value.f, one_value.f);\r
734                 Constant rad(1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f, 1.74532925e-2f);\r
735                 Constant deg(5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f, 5.72957795e+1f);\r
736 \r
737                 switch(node->getOp())\r
738                 {\r
739                 case EOpNegative:\r
740                         if(visit == PostVisit)\r
741                         {\r
742                                 sw::Shader::Opcode negOpcode = getOpcode(sw::Shader::OPCODE_NEG, arg);\r
743                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
744                                 {\r
745                                         Instruction *neg = emit(negOpcode, result, arg);\r
746                                         neg->dst.index += index;\r
747                                         argument(neg->src[0], arg, index);\r
748                                 }\r
749                         }\r
750                         break;\r
751                 case EOpVectorLogicalNot: if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
752                 case EOpLogicalNot:       if(visit == PostVisit) emit(sw::Shader::OPCODE_NOT, result, arg); break;\r
753                 case EOpPostIncrement:\r
754                         if(visit == PostVisit)\r
755                         {\r
756                                 copy(result, arg);\r
757 \r
758                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
759                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
760                                 {\r
761                                         Instruction *add = emit(addOpcode, arg, arg, &one);\r
762                                         add->dst.index += index;\r
763                                         argument(add->src[0], arg, index);\r
764                                 }\r
765 \r
766                                 assignLvalue(arg, arg);\r
767                         }\r
768                         break;\r
769                 case EOpPostDecrement:\r
770                         if(visit == PostVisit)\r
771                         {\r
772                                 copy(result, arg);\r
773 \r
774                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
775                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
776                                 {\r
777                                         Instruction *sub = emit(subOpcode, arg, arg, &one);\r
778                                         sub->dst.index += index;\r
779                                         argument(sub->src[0], arg, index);\r
780                                 }\r
781 \r
782                                 assignLvalue(arg, arg);\r
783                         }\r
784                         break;\r
785                 case EOpPreIncrement:\r
786                         if(visit == PostVisit)\r
787                         {\r
788                                 sw::Shader::Opcode addOpcode = getOpcode(sw::Shader::OPCODE_ADD, arg);\r
789                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
790                                 {\r
791                                         Instruction *add = emit(addOpcode, result, arg, &one);\r
792                                         add->dst.index += index;\r
793                                         argument(add->src[0], arg, index);\r
794                                 }\r
795 \r
796                                 assignLvalue(arg, result);\r
797                         }\r
798                         break;\r
799                 case EOpPreDecrement:\r
800                         if(visit == PostVisit)\r
801                         {\r
802                                 sw::Shader::Opcode subOpcode = getOpcode(sw::Shader::OPCODE_SUB, arg);\r
803                                 for(int index = 0; index < arg->totalRegisterCount(); index++)\r
804                                 {\r
805                                         Instruction *sub = emit(subOpcode, result, arg, &one);\r
806                                         sub->dst.index += index;\r
807                                         argument(sub->src[0], arg, index);\r
808                                 }\r
809 \r
810                                 assignLvalue(arg, result);\r
811                         }\r
812                         break;\r
813                 case EOpRadians:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &rad); break;\r
814                 case EOpDegrees:          if(visit == PostVisit) emit(sw::Shader::OPCODE_MUL, result, arg, &deg); break;\r
815                 case EOpSin:              if(visit == PostVisit) emit(sw::Shader::OPCODE_SIN, result, arg); break;\r
816                 case EOpCos:              if(visit == PostVisit) emit(sw::Shader::OPCODE_COS, result, arg); break;\r
817                 case EOpTan:              if(visit == PostVisit) emit(sw::Shader::OPCODE_TAN, result, arg); break;\r
818                 case EOpAsin:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ASIN, result, arg); break;\r
819                 case EOpAcos:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOS, result, arg); break;\r
820                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN, result, arg); break;\r
821                 case EOpSinh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SINH, result, arg); break;\r
822                 case EOpCosh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_COSH, result, arg); break;\r
823                 case EOpTanh:             if(visit == PostVisit) emit(sw::Shader::OPCODE_TANH, result, arg); break;\r
824                 case EOpAsinh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ASINH, result, arg); break;\r
825                 case EOpAcosh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ACOSH, result, arg); break;\r
826                 case EOpAtanh:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ATANH, result, arg); break;\r
827                 case EOpExp:              if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP, result, arg); break;\r
828                 case EOpLog:              if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG, result, arg); break;\r
829                 case EOpExp2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_EXP2, result, arg); break;\r
830                 case EOpLog2:             if(visit == PostVisit) emit(sw::Shader::OPCODE_LOG2, result, arg); break;\r
831                 case EOpSqrt:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SQRT, result, arg); break;\r
832                 case EOpInverseSqrt:      if(visit == PostVisit) emit(sw::Shader::OPCODE_RSQ, result, arg); break;\r
833                 case EOpAbs:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ABS, result, arg); break;\r
834                 case EOpSign:             if(visit == PostVisit) emit(sw::Shader::OPCODE_SGN, result, arg); break;\r
835                 case EOpFloor:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOOR, result, arg); break;\r
836                 case EOpTrunc:            if(visit == PostVisit) emit(sw::Shader::OPCODE_TRUNC, result, arg); break;\r
837                 case EOpRound:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUND, result, arg); break;\r
838                 case EOpRoundEven:        if(visit == PostVisit) emit(sw::Shader::OPCODE_ROUNDEVEN, result, arg); break;\r
839                 case EOpCeil:             if(visit == PostVisit) emit(sw::Shader::OPCODE_CEIL, result, arg, result); break;\r
840                 case EOpFract:            if(visit == PostVisit) emit(sw::Shader::OPCODE_FRC, result, arg); break;\r
841                 case EOpIsNan:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISNAN, result, arg); break;\r
842                 case EOpIsInf:            if(visit == PostVisit) emit(sw::Shader::OPCODE_ISINF, result, arg); break;\r
843                 case EOpLength:           if(visit == PostVisit) emit(sw::Shader::OPCODE_LEN(dim(arg)), result, arg); break;\r
844                 case EOpNormalize:        if(visit == PostVisit) emit(sw::Shader::OPCODE_NRM(dim(arg)), result, arg); break;\r
845                 case EOpDFdx:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDX, result, arg); break;\r
846                 case EOpDFdy:             if(visit == PostVisit) emit(sw::Shader::OPCODE_DFDY, result, arg); break;\r
847                 case EOpFwidth:           if(visit == PostVisit) emit(sw::Shader::OPCODE_FWIDTH, result, arg); break;\r
848                 case EOpAny:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ANY, result, arg); break;\r
849                 case EOpAll:              if(visit == PostVisit) emit(sw::Shader::OPCODE_ALL, result, arg); break;\r
850                 case EOpFloatBitsToInt:   if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOINT, result, arg); break;\r
851                 case EOpFloatBitsToUint:  if(visit == PostVisit) emit(sw::Shader::OPCODE_FLOATBITSTOUINT, result, arg); break;\r
852                 case EOpIntBitsToFloat:   if(visit == PostVisit) emit(sw::Shader::OPCODE_INTBITSTOFLOAT, result, arg); break;\r
853                 case EOpUintBitsToFloat:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UINTBITSTOFLOAT, result, arg); break;\r
854                 case EOpPackSnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKSNORM2x16, result, arg); break;\r
855                 case EOpPackUnorm2x16:    if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKUNORM2x16, result, arg); break;\r
856                 case EOpPackHalf2x16:     if(visit == PostVisit) emit(sw::Shader::OPCODE_PACKHALF2x16, result, arg); break;\r
857                 case EOpUnpackSnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKSNORM2x16, result, arg); break;\r
858                 case EOpUnpackUnorm2x16:  if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKUNORM2x16, result, arg); break;\r
859                 case EOpUnpackHalf2x16:   if(visit == PostVisit) emit(sw::Shader::OPCODE_UNPACKHALF2x16, result, arg); break;\r
860                 case EOpTranspose:\r
861                         if(visit == PostVisit)\r
862                         {\r
863                                 int numCols = arg->getNominalSize();\r
864                                 int numRows = arg->getSecondarySize();\r
865                                 for(int i = 0; i < numCols; ++i)\r
866                                 {\r
867                                         for(int j = 0; j < numRows; ++j)\r
868                                         {\r
869                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, arg);\r
870                                                 mov->src[0].index += i;\r
871                                                 mov->src[0].swizzle = 0x55 * j;\r
872                                                 mov->dst.index += j;\r
873                                                 mov->dst.mask = 1 << i;\r
874                                         }\r
875                                 }\r
876                         }\r
877                         break;\r
878                 case EOpDeterminant:\r
879                         if(visit == PostVisit)\r
880                         {\r
881                                 int size = arg->getNominalSize();\r
882                                 ASSERT(size == arg->getSecondarySize());\r
883 \r
884                                 emitDeterminant(result, arg, size);\r
885                         }\r
886                         break;\r
887                 case EOpInverse:\r
888                         if(visit == PostVisit)\r
889                         {\r
890                                 int size = arg->getNominalSize();\r
891                                 ASSERT(size == arg->getSecondarySize());\r
892 \r
893                                 // Compute transposed matrix of cofactors\r
894                                 for(int i = 0; i < size; ++i)\r
895                                 {\r
896                                         for(int j = 0; j < size; ++j)\r
897                                         {\r
898                                                 // For a 2x2 matrix, the cofactor is simply a transposed move or negate\r
899                                                 // For a 3x3 or 4x4 matrix, the cofactor is a transposed determinant\r
900                                                 emitDeterminant(result, arg, size - 1, j, i, i, j);\r
901                                         }\r
902                                 }\r
903 \r
904                                 // Compute 1 / determinant\r
905                                 Temporary invDet(this);\r
906                                 emitDeterminant(&invDet, arg, size);\r
907                                 Constant one(1.0f, 1.0f, 1.0f, 1.0f);\r
908                                 Instruction *div = emit(sw::Shader::OPCODE_DIV, &invDet, &one, &invDet);\r
909                                 div->src[1].swizzle = 0x00; // xxxx\r
910 \r
911                                 // Divide transposed matrix of cofactors by determinant\r
912                                 for(int i = 0; i < size; ++i)\r
913                                 {\r
914                                         Instruction *div = emit(sw::Shader::OPCODE_MUL, result, result, &invDet);\r
915                                         div->src[0].index += i;\r
916                                         div->dst.index += i;\r
917                                 }\r
918                         }\r
919                         break;\r
920                 default: UNREACHABLE(node->getOp());\r
921                 }\r
922 \r
923                 return true;\r
924         }\r
925 \r
926         bool OutputASM::visitAggregate(Visit visit, TIntermAggregate *node)\r
927         {\r
928                 if(currentScope != emitScope && node->getOp() != EOpFunction && node->getOp() != EOpSequence)\r
929                 {\r
930                         return false;\r
931                 }\r
932 \r
933                 Constant zero(0.0f, 0.0f, 0.0f, 0.0f);\r
934 \r
935                 TIntermTyped *result = node;\r
936                 const TType &resultType = node->getType();\r
937                 TIntermSequence &arg = node->getSequence();\r
938                 int argumentCount = arg.size();\r
939 \r
940                 switch(node->getOp())\r
941                 {\r
942                 case EOpSequence:           break;\r
943                 case EOpDeclaration:        break;\r
944                 case EOpPrototype:          break;\r
945                 case EOpComma:\r
946                         if(visit == PostVisit)\r
947                         {\r
948                                 copy(result, arg[1]);\r
949                         }\r
950                         break;\r
951                 case EOpFunction:\r
952                         if(visit == PreVisit)\r
953                         {\r
954                                 const TString &name = node->getName();\r
955 \r
956                                 if(emitScope == FUNCTION)\r
957                                 {\r
958                                         if(functionArray.size() > 1)   // No need for a label when there's only main()\r
959                                         {\r
960                                                 Instruction *label = emit(sw::Shader::OPCODE_LABEL);\r
961                                                 label->dst.type = sw::Shader::PARAMETER_LABEL;\r
962 \r
963                                                 const Function *function = findFunction(name);\r
964                                                 ASSERT(function);   // Should have been added during global pass\r
965                                                 label->dst.index = function->label;\r
966                                                 currentFunction = function->label;\r
967                                         }\r
968                                 }\r
969                                 else if(emitScope == GLOBAL)\r
970                                 {\r
971                                         if(name != "main(")\r
972                                         {\r
973                                                 TIntermSequence &arguments = node->getSequence()[0]->getAsAggregate()->getSequence();\r
974                                                 functionArray.push_back(Function(functionArray.size(), name, &arguments, node));\r
975                                         }\r
976                                 }\r
977                                 else UNREACHABLE(emitScope);\r
978 \r
979                                 currentScope = FUNCTION;\r
980                         }\r
981                         else if(visit == PostVisit)\r
982                         {\r
983                                 if(emitScope == FUNCTION)\r
984                                 {\r
985                                         if(functionArray.size() > 1)   // No need to return when there's only main()\r
986                                         {\r
987                                                 emit(sw::Shader::OPCODE_RET);\r
988                                         }\r
989                                 }\r
990 \r
991                                 currentScope = GLOBAL;\r
992                         }\r
993                         break;\r
994                 case EOpFunctionCall:\r
995                         if(visit == PostVisit)\r
996                         {\r
997                                 if(node->isUserDefined())\r
998                                 {\r
999                                         const TString &name = node->getName();\r
1000                                         const Function *function = findFunction(name);\r
1001 \r
1002                                         if(!function)\r
1003                                         {\r
1004                                                 mContext.error(node->getLine(), "function definition not found", name.c_str());\r
1005                                                 return false;\r
1006                                         }\r
1007 \r
1008                                         TIntermSequence &arguments = *function->arg;\r
1009 \r
1010                                         for(int i = 0; i < argumentCount; i++)\r
1011                                         {\r
1012                                                 TIntermTyped *in = arguments[i]->getAsTyped();\r
1013 \r
1014                                                 if(in->getQualifier() == EvqIn ||\r
1015                                                    in->getQualifier() == EvqInOut ||\r
1016                                                    in->getQualifier() == EvqConstReadOnly)\r
1017                                                 {\r
1018                                                         copy(in, arg[i]);\r
1019                                                 }\r
1020                                         }\r
1021 \r
1022                                         Instruction *call = emit(sw::Shader::OPCODE_CALL);\r
1023                                         call->dst.type = sw::Shader::PARAMETER_LABEL;\r
1024                                         call->dst.index = function->label;\r
1025 \r
1026                                         if(function->ret && function->ret->getType().getBasicType() != EbtVoid)\r
1027                                         {\r
1028                                                 copy(result, function->ret);\r
1029                                         }\r
1030 \r
1031                                         for(int i = 0; i < argumentCount; i++)\r
1032                                         {\r
1033                                                 TIntermTyped *argument = arguments[i]->getAsTyped();\r
1034                                                 TIntermTyped *out = arg[i]->getAsTyped();\r
1035                                                                 \r
1036                                                 if(argument->getQualifier() == EvqOut ||\r
1037                                                    argument->getQualifier() == EvqInOut)\r
1038                                                 {\r
1039                                                         copy(out, argument);\r
1040                                                 }\r
1041                                         }\r
1042                                 }\r
1043                                 else\r
1044                                 {\r
1045                                         TString name = TFunction::unmangleName(node->getName());\r
1046 \r
1047                                         if(name == "texture" || name == "texture2D" || name == "textureCube" || name == "texture3D")\r
1048                                         {\r
1049                                                 if(argumentCount == 2)\r
1050                                                 {\r
1051                                                         emit(sw::Shader::OPCODE_TEX, result, arg[1], arg[0]);\r
1052                                                 }\r
1053                                                 else if(argumentCount == 3)   // bias\r
1054                                                 {\r
1055                                                         Temporary uvwb(this);\r
1056                                                         emit(sw::Shader::OPCODE_MOV, &uvwb, arg[1]);\r
1057                                                         Instruction *bias = emit(sw::Shader::OPCODE_MOV, &uvwb, arg[2]);\r
1058                                                         bias->dst.mask = 0x8;\r
1059 \r
1060                                                         Instruction *tex = emit(sw::Shader::OPCODE_TEX, result, &uvwb, arg[0]);   // FIXME: Implement an efficient TEXLDB instruction\r
1061                                                         tex->bias = true;\r
1062                                                 }\r
1063                                                 else UNREACHABLE(argumentCount);\r
1064                                         }\r
1065                                         else if(name == "texture2DProj" || name == "textureProj")\r
1066                                         {\r
1067                                                 TIntermTyped *t = arg[1]->getAsTyped();\r
1068 \r
1069                                                 if(argumentCount == 2)\r
1070                                                 {\r
1071                                                         Instruction *tex = emit(sw::Shader::OPCODE_TEX, result, arg[1], arg[0]);\r
1072                                                         tex->project = true;\r
1073 \r
1074                                                         if(t->getNominalSize() == 3)\r
1075                                                         {\r
1076                                                                 tex->src[0].swizzle = 0xA4;\r
1077                                                         }\r
1078                                                         else ASSERT(t->getNominalSize() == 4);\r
1079                                                 }\r
1080                                                 else if(argumentCount == 3)   // bias\r
1081                                                 {\r
1082                                                         Temporary proj(this);\r
1083 \r
1084                                                         if(t->getNominalSize() == 3)\r
1085                                                         {\r
1086                                                                 Instruction *div = emit(sw::Shader::OPCODE_DIV, &proj, arg[1], arg[1]);\r
1087                                                                 div->src[1].swizzle = 0xAA;\r
1088                                                                 div->dst.mask = 0x3;\r
1089                                                         }\r
1090                                                         else if(t->getNominalSize() == 4)\r
1091                                                         {\r
1092                                                                 Instruction *div = emit(sw::Shader::OPCODE_DIV, &proj, arg[1], arg[1]);\r
1093                                                                 div->src[1].swizzle = 0xFF;\r
1094                                                                 div->dst.mask = 0x3;\r
1095                                                         }\r
1096                                                         else UNREACHABLE(t->getNominalSize());\r
1097 \r
1098                                                         Instruction *bias = emit(sw::Shader::OPCODE_MOV, &proj, arg[2]);\r
1099                                                         bias->dst.mask = 0x8;\r
1100 \r
1101                                                         Instruction *tex = emit(sw::Shader::OPCODE_TEX, result, &proj, arg[0]);\r
1102                                                         tex->bias = true;\r
1103                                                 }\r
1104                                                 else UNREACHABLE(argumentCount);\r
1105                                         }\r
1106                                         else if(name == "texture2DLod" || name == "textureCubeLod" || name == "textureLod")\r
1107                                         {\r
1108                                                 Temporary uvwb(this);\r
1109                                                 emit(sw::Shader::OPCODE_MOV, &uvwb, arg[1]);\r
1110                                                 Instruction *lod = emit(sw::Shader::OPCODE_MOV, &uvwb, arg[2]);\r
1111                                                 lod->dst.mask = 0x8;\r
1112 \r
1113                                                 emit(sw::Shader::OPCODE_TEXLDL, result, &uvwb, arg[0]);\r
1114                                         }\r
1115                                         else if(name == "texture2DProjLod" || name == "textureProjLod")\r
1116                                         {\r
1117                                                 TIntermTyped *t = arg[1]->getAsTyped();\r
1118                                                 Temporary proj(this);\r
1119 \r
1120                                                 if(t->getNominalSize() == 3)\r
1121                                                 {\r
1122                                                         Instruction *div = emit(sw::Shader::OPCODE_DIV, &proj, arg[1], arg[1]);\r
1123                                                         div->src[1].swizzle = 0xAA;\r
1124                                                         div->dst.mask = 0x3;\r
1125                                                 }\r
1126                                                 else if(t->getNominalSize() == 4)\r
1127                                                 {\r
1128                                                         Instruction *div = emit(sw::Shader::OPCODE_DIV, &proj, arg[1], arg[1]);\r
1129                                                         div->src[1].swizzle = 0xFF;\r
1130                                                         div->dst.mask = 0x3;\r
1131                                                 }\r
1132                                                 else UNREACHABLE(t->getNominalSize());\r
1133 \r
1134                                                 Instruction *lod = emit(sw::Shader::OPCODE_MOV, &proj, arg[2]);\r
1135                                                 lod->dst.mask = 0x8;\r
1136 \r
1137                                                 emit(sw::Shader::OPCODE_TEXLDL, result, &proj, arg[0]);\r
1138                                         }\r
1139                                         else UNREACHABLE(0);\r
1140                                 }\r
1141                         }\r
1142                         break;\r
1143                 case EOpParameters:\r
1144                         break;\r
1145                 case EOpConstructFloat:\r
1146                 case EOpConstructVec2:\r
1147                 case EOpConstructVec3:\r
1148                 case EOpConstructVec4:\r
1149                 case EOpConstructBool:\r
1150                 case EOpConstructBVec2:\r
1151                 case EOpConstructBVec3:\r
1152                 case EOpConstructBVec4:\r
1153                 case EOpConstructInt:\r
1154                 case EOpConstructIVec2:\r
1155                 case EOpConstructIVec3:\r
1156                 case EOpConstructIVec4:\r
1157                 case EOpConstructUInt:\r
1158                 case EOpConstructUVec2:\r
1159                 case EOpConstructUVec3:\r
1160                 case EOpConstructUVec4:\r
1161                         if(visit == PostVisit)\r
1162                         {\r
1163                                 int component = 0;\r
1164 \r
1165                                 for(int i = 0; i < argumentCount; i++)\r
1166                                 {\r
1167                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1168                                         int size = argi->getNominalSize();\r
1169 \r
1170                                         if(!argi->isMatrix())\r
1171                                         {\r
1172                                                 Instruction *mov = emitCast(result, argi);\r
1173                                                 mov->dst.mask = (0xF << component) & 0xF;\r
1174                                                 mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1175 \r
1176                                                 component += size;\r
1177                                         }\r
1178                                         else   // Matrix\r
1179                                         {\r
1180                                                 int column = 0;\r
1181 \r
1182                                                 while(component < resultType.getNominalSize())\r
1183                                                 {\r
1184                                                         Instruction *mov = emitCast(result, argi);\r
1185                                                         mov->dst.mask = (0xF << component) & 0xF;\r
1186                                                         mov->src[0].index += column;\r
1187                                                         mov->src[0].swizzle = readSwizzle(argi, size) << (component * 2);\r
1188 \r
1189                                                         column++;\r
1190                                                         component += size;\r
1191                                                 }\r
1192                                         }\r
1193                                 }\r
1194                         }\r
1195                         break;\r
1196                 case EOpConstructMat2:\r
1197                 case EOpConstructMat2x3:\r
1198                 case EOpConstructMat2x4:\r
1199                 case EOpConstructMat3x2:\r
1200                 case EOpConstructMat3:\r
1201                 case EOpConstructMat3x4:\r
1202                 case EOpConstructMat4x2:\r
1203                 case EOpConstructMat4x3:\r
1204                 case EOpConstructMat4:\r
1205                         if(visit == PostVisit)\r
1206                         {\r
1207                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1208                                 const int outCols = result->getNominalSize();\r
1209                                 const int outRows = result->getSecondarySize();\r
1210 \r
1211                                 if(arg0->isScalar() && arg.size() == 1)   // Construct scale matrix\r
1212                                 {\r
1213                                         for(int i = 0; i < outCols; i++)\r
1214                                         {\r
1215                                                 Instruction *init = emit(sw::Shader::OPCODE_MOV, result, &zero);\r
1216                                                 init->dst.index += i;\r
1217                                                 Instruction *mov = emitCast(result, arg0);\r
1218                                                 mov->dst.index += i;\r
1219                                                 mov->dst.mask = 1 << i;\r
1220                                                 ASSERT(mov->src[0].swizzle == 0x00);\r
1221                                         }\r
1222                                 }\r
1223                                 else if(arg0->isMatrix())\r
1224                                 {\r
1225                                         const int inCols = arg0->getNominalSize();\r
1226                                         const int inRows = arg0->getSecondarySize();\r
1227 \r
1228                                         for(int i = 0; i < outCols; i++)\r
1229                                         {\r
1230                                                 if(i >= inCols || outRows > inRows)\r
1231                                                 {\r
1232                                                         // Initialize to identity matrix\r
1233                                                         Constant col((i == 0 ? 1.0f : 0.0f), (i == 1 ? 1.0f : 0.0f), (i == 2 ? 1.0f : 0.0f), (i == 3 ? 1.0f : 0.0f));\r
1234                                                         Instruction *mov = emitCast(result, &col);\r
1235                                                         mov->dst.index += i;\r
1236                                                 }\r
1237 \r
1238                                                 if(i < inCols)\r
1239                                                 {\r
1240                                                         Instruction *mov = emitCast(result, arg0);\r
1241                                                         mov->dst.index += i;\r
1242                                                         mov->dst.mask = 0xF >> (4 - inRows);\r
1243                                                         argument(mov->src[0], arg0, i);\r
1244                                                 }\r
1245                                         }\r
1246                                 }\r
1247                                 else\r
1248                                 {\r
1249                                         int column = 0;\r
1250                                         int row = 0;\r
1251 \r
1252                                         for(int i = 0; i < argumentCount; i++)\r
1253                                         {\r
1254                                                 TIntermTyped *argi = arg[i]->getAsTyped();\r
1255                                                 int size = argi->getNominalSize();\r
1256                                                 int element = 0;\r
1257 \r
1258                                                 while(element < size)\r
1259                                                 {\r
1260                                                         Instruction *mov = emitCast(result, argi);\r
1261                                                         mov->dst.index += column;\r
1262                                                         mov->dst.mask = (0xF << row) & 0xF;\r
1263                                                         mov->src[0].swizzle = (readSwizzle(argi, size) << (row * 2)) + 0x55 * element;\r
1264 \r
1265                                                         int end = row + size - element;\r
1266                                                         column = end >= outRows ? column + 1 : column;\r
1267                                                         element = element + outRows - row;\r
1268                                                         row = end >= outRows ? 0 : end;\r
1269                                                 }\r
1270                                         }\r
1271                                 }\r
1272                         }\r
1273                         break;\r
1274                 case EOpConstructStruct:\r
1275                         if(visit == PostVisit)\r
1276                         {\r
1277                                 int offset = 0;\r
1278                                 for(int i = 0; i < argumentCount; i++)\r
1279                                 {\r
1280                                         TIntermTyped *argi = arg[i]->getAsTyped();\r
1281                                         int size = argi->totalRegisterCount();\r
1282 \r
1283                                         for(int index = 0; index < size; index++)\r
1284                                         {\r
1285                                                 Instruction *mov = emit(sw::Shader::OPCODE_MOV, result, argi);\r
1286                                                 mov->dst.index += index + offset;\r
1287                                                 mov->dst.mask = writeMask(result, offset + index);\r
1288                                                 argument(mov->src[0], argi, index);\r
1289                                         }\r
1290 \r
1291                                         offset += size;\r
1292                                 }\r
1293                         }\r
1294                         break;\r
1295                 case EOpLessThan:         if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LT, result, arg[0], arg[1]); break;\r
1296                 case EOpGreaterThan:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GT, result, arg[0], arg[1]); break;\r
1297                 case EOpLessThanEqual:    if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_LE, result, arg[0], arg[1]); break;\r
1298                 case EOpGreaterThanEqual: if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_GE, result, arg[0], arg[1]); break;\r
1299                 case EOpVectorEqual:      if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_EQ, result, arg[0], arg[1]); break;\r
1300                 case EOpVectorNotEqual:   if(visit == PostVisit) emitCmp(sw::Shader::CONTROL_NE, result, arg[0], arg[1]); break;\r
1301                 case EOpMod:              if(visit == PostVisit) emit(sw::Shader::OPCODE_MOD, result, arg[0], arg[1]); break;\r
1302                 case EOpPow:              if(visit == PostVisit) emit(sw::Shader::OPCODE_POW, result, arg[0], arg[1]); break;\r
1303                 case EOpAtan:             if(visit == PostVisit) emit(sw::Shader::OPCODE_ATAN2, result, arg[0], arg[1]); break;\r
1304                 case EOpMin:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, arg[0], arg[1]); break;\r
1305                 case EOpMax:              if(visit == PostVisit) emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]); break;\r
1306                 case EOpClamp:\r
1307                         if(visit == PostVisit)\r
1308                         {\r
1309                                 emit(getOpcode(sw::Shader::OPCODE_MAX, result), result, arg[0], arg[1]);\r
1310                                 emit(getOpcode(sw::Shader::OPCODE_MIN, result), result, result, arg[2]);\r
1311                         }\r
1312                         break;\r
1313                 case EOpMix:         if(visit == PostVisit) emit(sw::Shader::OPCODE_LRP, result, arg[2], arg[1], arg[0]); break;\r
1314                 case EOpStep:        if(visit == PostVisit) emit(sw::Shader::OPCODE_STEP, result, arg[0], arg[1]); break;\r
1315                 case EOpSmoothStep:  if(visit == PostVisit) emit(sw::Shader::OPCODE_SMOOTH, result, arg[0], arg[1], arg[2]); break;\r
1316                 case EOpDistance:    if(visit == PostVisit) emit(sw::Shader::OPCODE_DIST(dim(arg[0])), result, arg[0], arg[1]); break;\r
1317                 case EOpDot:         if(visit == PostVisit) emit(sw::Shader::OPCODE_DP(dim(arg[0])), result, arg[0], arg[1]); break;\r
1318                 case EOpCross:       if(visit == PostVisit) emit(sw::Shader::OPCODE_CRS, result, arg[0], arg[1]); break;\r
1319                 case EOpFaceForward: if(visit == PostVisit) emit(sw::Shader::OPCODE_FORWARD(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1320                 case EOpReflect:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFLECT(dim(arg[0])), result, arg[0], arg[1]); break;\r
1321                 case EOpRefract:     if(visit == PostVisit) emit(sw::Shader::OPCODE_REFRACT(dim(arg[0])), result, arg[0], arg[1], arg[2]); break;\r
1322                 case EOpMul:\r
1323                         if(visit == PostVisit)\r
1324                         {\r
1325                                 TIntermTyped *arg0 = arg[0]->getAsTyped();\r
1326                                 TIntermTyped *arg1 = arg[1]->getAsTyped();\r
1327                                 ASSERT((arg0->getNominalSize() == arg1->getNominalSize()) && (arg0->getSecondarySize() == arg1->getSecondarySize()));\r
1328 \r
1329                                 int size = arg0->getNominalSize();\r
1330                                 for(int i = 0; i < size; i++)\r
1331                                 {\r
1332                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, arg[0], arg[1]);\r
1333                                         mul->dst.index += i;\r
1334                                         argument(mul->src[0], arg[0], i);\r
1335                                         argument(mul->src[1], arg[1], i);\r
1336                                 }\r
1337                         }\r
1338                         break;\r
1339                 case EOpOuterProduct:\r
1340                         if(visit == PostVisit)\r
1341                         {\r
1342                                 for(int i = 0; i < dim(arg[1]); i++)\r
1343                                 {\r
1344                                         Instruction *mul = emit(sw::Shader::OPCODE_MUL, result, arg[0], arg[1]);\r
1345                                         mul->dst.index += i;\r
1346                                         mul->src[1].swizzle = 0x55 * i;\r
1347                                 }\r
1348                         }\r
1349                         break;\r
1350                 default: UNREACHABLE(node->getOp());\r
1351                 }\r
1352 \r
1353                 return true;\r
1354         }\r
1355 \r
1356         bool OutputASM::visitSelection(Visit visit, TIntermSelection *node)\r
1357         {\r
1358                 if(currentScope != emitScope)\r
1359                 {\r
1360                         return false;\r
1361                 }\r
1362 \r
1363                 TIntermTyped *condition = node->getCondition();\r
1364                 TIntermNode *trueBlock = node->getTrueBlock();\r
1365                 TIntermNode *falseBlock = node->getFalseBlock();\r
1366                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
1367 \r
1368                 condition->traverse(this);\r
1369 \r
1370                 if(node->usesTernaryOperator())\r
1371                 {\r
1372                         if(constantCondition)\r
1373                         {\r
1374                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1375 \r
1376                                 if(trueCondition)\r
1377                                 {\r
1378                                         trueBlock->traverse(this);\r
1379                                         copy(node, trueBlock);\r
1380                                 }\r
1381                                 else\r
1382                                 {\r
1383                                         falseBlock->traverse(this);\r
1384                                         copy(node, falseBlock);\r
1385                                 }\r
1386                         }\r
1387                         else if(trivial(node, 6))   // Fast to compute both potential results and no side effects\r
1388                         {\r
1389                                 trueBlock->traverse(this);\r
1390                                 falseBlock->traverse(this);\r
1391                                 emit(sw::Shader::OPCODE_SELECT, node, condition, trueBlock, falseBlock);\r
1392                         }\r
1393                         else\r
1394                         {\r
1395                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1396 \r
1397                                 if(trueBlock)\r
1398                                 {\r
1399                                         trueBlock->traverse(this);\r
1400                                         copy(node, trueBlock);\r
1401                                 }\r
1402 \r
1403                                 if(falseBlock)\r
1404                                 {\r
1405                                         emit(sw::Shader::OPCODE_ELSE);\r
1406                                         falseBlock->traverse(this);\r
1407                                         copy(node, falseBlock);\r
1408                                 }\r
1409 \r
1410                                 emit(sw::Shader::OPCODE_ENDIF);\r
1411                         }\r
1412                 }\r
1413                 else  // if/else statement\r
1414                 {\r
1415                         if(constantCondition)\r
1416                         {\r
1417                                 bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
1418 \r
1419                                 if(trueCondition)\r
1420                                 {\r
1421                                         if(trueBlock)\r
1422                                         {\r
1423                                                 trueBlock->traverse(this);\r
1424                                         }\r
1425                                 }\r
1426                                 else\r
1427                                 {\r
1428                                         if(falseBlock)\r
1429                                         {\r
1430                                                 falseBlock->traverse(this);\r
1431                                         }\r
1432                                 }\r
1433                         }\r
1434                         else\r
1435                         {\r
1436                                 emit(sw::Shader::OPCODE_IF, 0, condition);\r
1437 \r
1438                                 if(trueBlock)\r
1439                                 {\r
1440                                         trueBlock->traverse(this);\r
1441                                 }\r
1442 \r
1443                                 if(falseBlock)\r
1444                                 {\r
1445                                         emit(sw::Shader::OPCODE_ELSE);\r
1446                                         falseBlock->traverse(this);\r
1447                                 }\r
1448 \r
1449                                 emit(sw::Shader::OPCODE_ENDIF);\r
1450                         }\r
1451                 }\r
1452 \r
1453                 return false;\r
1454         }\r
1455 \r
1456         bool OutputASM::visitLoop(Visit visit, TIntermLoop *node)\r
1457         {\r
1458                 if(currentScope != emitScope)\r
1459                 {\r
1460                         return false;\r
1461                 }\r
1462 \r
1463                 unsigned int iterations = loopCount(node);\r
1464 \r
1465                 if(iterations == 0)\r
1466                 {\r
1467                         return false;\r
1468                 }\r
1469 \r
1470                 bool unroll = (iterations <= 4);\r
1471 \r
1472                 if(unroll)\r
1473                 {\r
1474                         DetectLoopDiscontinuity detectLoopDiscontinuity;\r
1475                         unroll = !detectLoopDiscontinuity.traverse(node);\r
1476                 }\r
1477 \r
1478                 TIntermNode *init = node->getInit();\r
1479                 TIntermTyped *condition = node->getCondition();\r
1480                 TIntermTyped *expression = node->getExpression();\r
1481                 TIntermNode *body = node->getBody();\r
1482 \r
1483                 if(node->getType() == ELoopDoWhile)\r
1484                 {\r
1485                         Temporary iterate(this);\r
1486                         Constant True(true);\r
1487                         emit(sw::Shader::OPCODE_MOV, &iterate, &True);\r
1488 \r
1489                         emit(sw::Shader::OPCODE_WHILE, 0, &iterate);   // FIXME: Implement real do-while\r
1490 \r
1491                         if(body)\r
1492                         {\r
1493                                 body->traverse(this);\r
1494                         }\r
1495 \r
1496                         emit(sw::Shader::OPCODE_TEST);\r
1497 \r
1498                         condition->traverse(this);\r
1499                         emit(sw::Shader::OPCODE_MOV, &iterate, condition);\r
1500 \r
1501                         emit(sw::Shader::OPCODE_ENDWHILE);\r
1502                 }\r
1503                 else\r
1504                 {\r
1505                         if(init)\r
1506                         {\r
1507                                 init->traverse(this);\r
1508                         }\r
1509 \r
1510                         if(unroll)\r
1511                         {\r
1512                                 for(unsigned int i = 0; i < iterations; i++)\r
1513                                 {\r
1514                                 //      condition->traverse(this);   // Condition could contain statements, but not in an unrollable loop\r
1515 \r
1516                                         if(body)\r
1517                                         {\r
1518                                                 body->traverse(this);\r
1519                                         }\r
1520 \r
1521                                         if(expression)\r
1522                                         {\r
1523                                                 expression->traverse(this);\r
1524                                         }\r
1525                                 }\r
1526                         }\r
1527                         else\r
1528                         {\r
1529                                 if(condition)\r
1530                                 {\r
1531                                         condition->traverse(this);\r
1532                                 }\r
1533 \r
1534                                 emit(sw::Shader::OPCODE_WHILE, 0, condition);\r
1535 \r
1536                                 if(body)\r
1537                                 {\r
1538                                         body->traverse(this);\r
1539                                 }\r
1540 \r
1541                                 emit(sw::Shader::OPCODE_TEST);\r
1542 \r
1543                                 if(expression)\r
1544                                 {\r
1545                                         expression->traverse(this);\r
1546                                 }\r
1547 \r
1548                                 if(condition)\r
1549                                 {\r
1550                                         condition->traverse(this);\r
1551                                 }\r
1552 \r
1553                                 emit(sw::Shader::OPCODE_ENDWHILE);\r
1554                         }\r
1555                 }\r
1556 \r
1557                 return false;\r
1558         }\r
1559 \r
1560         bool OutputASM::visitBranch(Visit visit, TIntermBranch *node)\r
1561         {\r
1562                 if(currentScope != emitScope)\r
1563                 {\r
1564                         return false;\r
1565                 }\r
1566 \r
1567                 switch(node->getFlowOp())\r
1568                 {\r
1569                 case EOpKill:      if(visit == PostVisit) emit(sw::Shader::OPCODE_DISCARD);  break;\r
1570                 case EOpBreak:     if(visit == PostVisit) emit(sw::Shader::OPCODE_BREAK);    break;\r
1571                 case EOpContinue:  if(visit == PostVisit) emit(sw::Shader::OPCODE_CONTINUE); break;\r
1572                 case EOpReturn:\r
1573                         if(visit == PostVisit)\r
1574                         {\r
1575                                 TIntermTyped *value = node->getExpression();\r
1576 \r
1577                                 if(value)\r
1578                                 {\r
1579                                         copy(functionArray[currentFunction].ret, value);\r
1580                                 }\r
1581 \r
1582                                 emit(sw::Shader::OPCODE_LEAVE);\r
1583                         }\r
1584                         break;\r
1585                 default: UNREACHABLE(node->getFlowOp());\r
1586                 }\r
1587 \r
1588                 return true;\r
1589         }\r
1590 \r
1591         bool OutputASM::isSamplerRegister(TIntermTyped *operand)\r
1592         {\r
1593                 return operand && isSamplerRegister(operand->getType());\r
1594         }\r
1595 \r
1596         bool OutputASM::isSamplerRegister(const TType &type)\r
1597         {\r
1598                 // A sampler register's qualifiers can be:\r
1599                 // - EvqUniform: The sampler uniform is used as is in the code (default case).\r
1600                 // - EvqTemporary: The sampler is indexed. It's still a sampler register.\r
1601                 // - EvqIn (and other similar types): The sampler has been passed as a function argument. At this point,\r
1602                 //                                    the sampler has been copied and is no longer a sampler register.\r
1603                 return IsSampler(type.getBasicType()) && (type.getQualifier() == EvqUniform || type.getQualifier() == EvqTemporary);\r
1604         }\r
1605 \r
1606         Instruction *OutputASM::emit(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2, TIntermNode *src3, int index)\r
1607         {\r
1608                 if(isSamplerRegister(dst))\r
1609                 {\r
1610                         op = sw::Shader::OPCODE_NULL;   // Can't assign to a sampler, but this is hit when indexing sampler arrays\r
1611                 }\r
1612 \r
1613                 Instruction *instruction = new Instruction(op);\r
1614 \r
1615                 if(dst)\r
1616                 {\r
1617                         instruction->dst.type = registerType(dst);\r
1618                         instruction->dst.index = registerIndex(dst) + index;\r
1619                         instruction->dst.mask = writeMask(dst);\r
1620                         instruction->dst.integer = (dst->getBasicType() == EbtInt);\r
1621                 }\r
1622 \r
1623                 argument(instruction->src[0], src0, index);\r
1624                 argument(instruction->src[1], src1, index);\r
1625                 argument(instruction->src[2], src2, index);\r
1626                 argument(instruction->src[3], src3, index);\r
1627 \r
1628                 shader->append(instruction);\r
1629 \r
1630                 return instruction;\r
1631         }\r
1632 \r
1633         Instruction *OutputASM::emitCast(TIntermTyped *dst, TIntermTyped *src)\r
1634         {\r
1635                 switch(src->getBasicType())\r
1636                 {\r
1637                 case EbtBool:\r
1638                         switch(dst->getBasicType())\r
1639                         {\r
1640                         case EbtInt:   return emit(sw::Shader::OPCODE_B2I, dst, src);\r
1641                         case EbtUInt:  return emit(sw::Shader::OPCODE_B2U, dst, src);\r
1642                         case EbtFloat: return emit(sw::Shader::OPCODE_B2F, dst, src);\r
1643                         default:       break;\r
1644                         }\r
1645                         break;\r
1646                 case EbtInt:\r
1647                         switch(dst->getBasicType())\r
1648                         {\r
1649                         case EbtBool:  return emit(sw::Shader::OPCODE_I2B, dst, src);\r
1650                         case EbtFloat: return emit(sw::Shader::OPCODE_I2F, dst, src);\r
1651                         default:       break;\r
1652                         }\r
1653                         break;\r
1654                 case EbtUInt:\r
1655                         switch(dst->getBasicType())\r
1656                         {\r
1657                         case EbtBool:  return emit(sw::Shader::OPCODE_U2B, dst, src);\r
1658                         case EbtFloat: return emit(sw::Shader::OPCODE_U2F, dst, src);\r
1659                         default:       break;\r
1660                         }\r
1661                         break;\r
1662                 case EbtFloat:\r
1663                         switch(dst->getBasicType())\r
1664                         {\r
1665                         case EbtBool: return emit(sw::Shader::OPCODE_F2B, dst, src);\r
1666                         case EbtInt:  return emit(sw::Shader::OPCODE_F2I, dst, src);\r
1667                         case EbtUInt: return emit(sw::Shader::OPCODE_F2U, dst, src);\r
1668                         default:      break;\r
1669                         }\r
1670                         break;\r
1671                 default:\r
1672                         break;\r
1673                 }\r
1674 \r
1675                 return emit(sw::Shader::OPCODE_MOV, dst, src);\r
1676         }\r
1677 \r
1678         void OutputASM::emitBinary(sw::Shader::Opcode op, TIntermTyped *dst, TIntermNode *src0, TIntermNode *src1, TIntermNode *src2)\r
1679         {\r
1680                 for(int index = 0; index < dst->elementRegisterCount(); index++)\r
1681                 {\r
1682                         emit(op, dst, src0, src1, src2, 0, index);\r
1683                 }\r
1684         }\r
1685 \r
1686         void OutputASM::emitAssign(sw::Shader::Opcode op, TIntermTyped *result, TIntermTyped *lhs, TIntermTyped *src0, TIntermTyped *src1)\r
1687         {\r
1688                 emitBinary(op, result, src0, src1);\r
1689                 assignLvalue(lhs, result);\r
1690         }\r
1691 \r
1692         void OutputASM::emitCmp(sw::Shader::Control cmpOp, TIntermTyped *dst, TIntermNode *left, TIntermNode *right, int index)\r
1693         {\r
1694                 sw::Shader::Opcode opcode;\r
1695                 switch(left->getAsTyped()->getBasicType())\r
1696                 {\r
1697                 case EbtBool:\r
1698                 case EbtInt:\r
1699                         opcode = sw::Shader::OPCODE_ICMP;\r
1700                         break;\r
1701                 case EbtUInt:\r
1702                         opcode = sw::Shader::OPCODE_UCMP;\r
1703                         break;\r
1704                 default:\r
1705                         opcode = sw::Shader::OPCODE_CMP;\r
1706                         break;\r
1707                 }\r
1708 \r
1709                 Instruction *cmp = emit(opcode, dst, left, right);\r
1710                 cmp->control = cmpOp;\r
1711                 argument(cmp->src[0], left, index);\r
1712                 argument(cmp->src[1], right, index);\r
1713         }\r
1714 \r
1715         int componentCount(const TType &type, int registers)\r
1716         {\r
1717                 if(registers == 0)\r
1718                 {\r
1719                         return 0;\r
1720                 }\r
1721 \r
1722                 if(type.isArray() && registers >= type.elementRegisterCount())\r
1723                 {\r
1724                         int index = registers / type.elementRegisterCount();\r
1725                         registers -= index * type.elementRegisterCount();\r
1726                         return index * type.getElementSize() + componentCount(type, registers);\r
1727                 }\r
1728 \r
1729                 if(type.isStruct() || type.isInterfaceBlock())\r
1730                 {\r
1731                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
1732                         int elements = 0;\r
1733 \r
1734                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
1735                         {\r
1736                                 const TType &fieldType = *((*field)->type());\r
1737 \r
1738                                 if(fieldType.totalRegisterCount() <= registers)\r
1739                                 {\r
1740                                         registers -= fieldType.totalRegisterCount();\r
1741                                         elements += fieldType.getObjectSize();\r
1742                                 }\r
1743                                 else   // Register within this field\r
1744                                 {\r
1745                                         return elements + componentCount(fieldType, registers);\r
1746                                 }\r
1747                         }\r
1748                 }\r
1749                 else if(type.isMatrix())\r
1750                 {\r
1751                         return registers * type.registerSize();\r
1752                 }\r
1753                 \r
1754                 UNREACHABLE(0);\r
1755                 return 0;\r
1756         }\r
1757 \r
1758         int registerSize(const TType &type, int registers)\r
1759         {\r
1760                 if(registers == 0)\r
1761                 {\r
1762                         if(type.isStruct())\r
1763                         {\r
1764                                 return registerSize(*((*(type.getStruct()->fields().begin()))->type()), 0);\r
1765                         }\r
1766 \r
1767                         return type.registerSize();\r
1768                 }\r
1769 \r
1770                 if(type.isArray() && registers >= type.elementRegisterCount())\r
1771                 {\r
1772                         int index = registers / type.elementRegisterCount();\r
1773                         registers -= index * type.elementRegisterCount();\r
1774                         return registerSize(type, registers);\r
1775                 }\r
1776 \r
1777                 if(type.isStruct() || type.isInterfaceBlock())\r
1778                 {\r
1779                         const TFieldList& fields = type.getStruct() ? type.getStruct()->fields() : type.getInterfaceBlock()->fields();\r
1780                         int elements = 0;\r
1781 \r
1782                         for(TFieldList::const_iterator field = fields.begin(); field != fields.end(); field++)\r
1783                         {\r
1784                                 const TType &fieldType = *((*field)->type());\r
1785                                 \r
1786                                 if(fieldType.totalRegisterCount() <= registers)\r
1787                                 {\r
1788                                         registers -= fieldType.totalRegisterCount();\r
1789                                         elements += fieldType.getObjectSize();\r
1790                                 }\r
1791                                 else   // Register within this field\r
1792                                 {\r
1793                                         return registerSize(fieldType, registers);\r
1794                                 }\r
1795                         }\r
1796                 }\r
1797                 else if(type.isMatrix())\r
1798                 {\r
1799                         return registerSize(type, 0);\r
1800                 }\r
1801                 \r
1802                 UNREACHABLE(0);\r
1803                 return 0;\r
1804         }\r
1805 \r
1806         void OutputASM::argument(sw::Shader::SourceParameter &parameter, TIntermNode *argument, int index)\r
1807         {\r
1808                 if(argument)\r
1809                 {\r
1810                         TIntermTyped *arg = argument->getAsTyped();\r
1811                         const TType &type = arg->getType();\r
1812                         index = (index >= arg->totalRegisterCount()) ? arg->totalRegisterCount() - 1 : index;\r
1813 \r
1814                         int size = registerSize(type, index);\r
1815 \r
1816                         parameter.type = registerType(arg);\r
1817 \r
1818                         if(arg->getQualifier() == EvqConstExpr)\r
1819                         {\r
1820                                 int component = componentCount(type, index);\r
1821                                 ConstantUnion *constants = arg->getAsConstantUnion()->getUnionArrayPointer();\r
1822 \r
1823                                 for(int i = 0; i < 4; i++)\r
1824                                 {\r
1825                                         if(size == 1)   // Replicate\r
1826                                         {\r
1827                                                 parameter.value[i] = constants[component + 0].getAsFloat();\r
1828                                         }\r
1829                                         else if(i < size)\r
1830                                         {\r
1831                                                 parameter.value[i] = constants[component + i].getAsFloat();\r
1832                                         }\r
1833                                         else\r
1834                                         {\r
1835                                                 parameter.value[i] = 0.0f;\r
1836                                         }\r
1837                                 }\r
1838                         }\r
1839                         else\r
1840                         {\r
1841                                 parameter.index = registerIndex(arg) + index;\r
1842 \r
1843                                 if(isSamplerRegister(arg))\r
1844                                 {\r
1845                                         TIntermBinary *binary = argument->getAsBinaryNode();\r
1846 \r
1847                                         if(binary)\r
1848                                         {\r
1849                                                 TIntermTyped *left = binary->getLeft();\r
1850                                                 TIntermTyped *right = binary->getRight();\r
1851 \r
1852                                                 switch(binary->getOp())\r
1853                                                 {\r
1854                                                 case EOpIndexDirect:\r
1855                                                         parameter.index += right->getAsConstantUnion()->getIConst(0);\r
1856                                                         break;\r
1857                                                 case EOpIndexIndirect:\r
1858                                                         if(left->getArraySize() > 1)\r
1859                                                         {\r
1860                                                                 parameter.rel.type = registerType(binary->getRight());\r
1861                                                                 parameter.rel.index = registerIndex(binary->getRight());\r
1862                                                                 parameter.rel.scale = 1;\r
1863                                                                 parameter.rel.deterministic = true;\r
1864                                                         }\r
1865                                                         break;\r
1866                                                 case EOpIndexDirectStruct:\r
1867                                                 case EOpIndexDirectInterfaceBlock:\r
1868                                                         parameter.index += right->getAsConstantUnion()->getIConst(0);\r
1869                                                         break;\r
1870                                                 default:\r
1871                                                         UNREACHABLE(binary->getOp());\r
1872                                                 }\r
1873                                         }\r
1874                                 }\r
1875                         }\r
1876 \r
1877                         if(!IsSampler(arg->getBasicType()))\r
1878                         {\r
1879                                 parameter.swizzle = readSwizzle(arg, size);\r
1880                         }\r
1881                 }\r
1882         }\r
1883 \r
1884         void OutputASM::copy(TIntermTyped *dst, TIntermNode *src, int offset)\r
1885         {\r
1886                 for(int index = 0; index < dst->totalRegisterCount(); index++)\r
1887                 {\r
1888                         Instruction *mov = emit(sw::Shader::OPCODE_MOV, dst, src);\r
1889                         mov->dst.index += index;\r
1890                         mov->dst.mask = writeMask(dst, index);\r
1891                         argument(mov->src[0], src, offset + index);\r
1892                 }\r
1893         }\r
1894 \r
1895         int swizzleElement(int swizzle, int index)\r
1896         {\r
1897                 return (swizzle >> (index * 2)) & 0x03;\r
1898         }\r
1899 \r
1900         int swizzleSwizzle(int leftSwizzle, int rightSwizzle)\r
1901         {\r
1902                 return (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 0)) << 0) |\r
1903                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 1)) << 2) |\r
1904                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 2)) << 4) |\r
1905                        (swizzleElement(leftSwizzle, swizzleElement(rightSwizzle, 3)) << 6);\r
1906         }\r
1907 \r
1908         void OutputASM::assignLvalue(TIntermTyped *dst, TIntermTyped *src)\r
1909         {\r
1910                 if(src &&\r
1911                         ((src->isVector() && (!dst->isVector() || (dst->getNominalSize() != dst->getNominalSize()))) ||\r
1912                          (src->isMatrix() && (!dst->isMatrix() || (src->getNominalSize() != dst->getNominalSize()) || (src->getSecondarySize() != dst->getSecondarySize())))))\r
1913                 {\r
1914                         return mContext.error(src->getLine(), "Result type should match the l-value type in compound assignment", src->isVector() ? "vector" : "matrix");\r
1915                 }\r
1916 \r
1917                 TIntermBinary *binary = dst->getAsBinaryNode();\r
1918 \r
1919                 if(binary && binary->getOp() == EOpIndexIndirect && dst->isScalar())\r
1920                 {\r
1921                         Instruction *insert = new Instruction(sw::Shader::OPCODE_INSERT);\r
1922                         \r
1923                         Temporary address(this);\r
1924                         lvalue(insert->dst, address, dst);\r
1925 \r
1926                         insert->src[0].type = insert->dst.type;\r
1927                         insert->src[0].index = insert->dst.index;\r
1928                         insert->src[0].rel = insert->dst.rel;\r
1929                         argument(insert->src[1], src);\r
1930                         argument(insert->src[2], binary->getRight());\r
1931 \r
1932                         shader->append(insert);\r
1933                 }\r
1934                 else\r
1935                 {\r
1936                         for(int offset = 0; offset < dst->totalRegisterCount(); offset++)\r
1937                         {\r
1938                                 Instruction *mov = new Instruction(sw::Shader::OPCODE_MOV);\r
1939                         \r
1940                                 Temporary address(this);\r
1941                                 int swizzle = lvalue(mov->dst, address, dst);\r
1942                                 mov->dst.index += offset;\r
1943 \r
1944                                 if(offset > 0)\r
1945                                 {\r
1946                                         mov->dst.mask = writeMask(dst, offset);\r
1947                                 }\r
1948 \r
1949                                 argument(mov->src[0], src, offset);\r
1950                                 mov->src[0].swizzle = swizzleSwizzle(mov->src[0].swizzle, swizzle);\r
1951 \r
1952                                 shader->append(mov);\r
1953                         }\r
1954                 }\r
1955         }\r
1956 \r
1957         int OutputASM::lvalue(sw::Shader::DestinationParameter &dst, Temporary &address, TIntermTyped *node)\r
1958         {\r
1959                 TIntermTyped *result = node;\r
1960                 TIntermBinary *binary = node->getAsBinaryNode();\r
1961                 TIntermSymbol *symbol = node->getAsSymbolNode();\r
1962 \r
1963                 if(binary)\r
1964                 {\r
1965                         TIntermTyped *left = binary->getLeft();\r
1966                         TIntermTyped *right = binary->getRight();\r
1967 \r
1968                         int leftSwizzle = lvalue(dst, address, left);   // Resolve the l-value of the left side\r
1969 \r
1970                         switch(binary->getOp())\r
1971                         {\r
1972                         case EOpIndexDirect:\r
1973                                 {\r
1974                                         int rightIndex = right->getAsConstantUnion()->getIConst(0);\r
1975 \r
1976                                         if(left->isRegister())\r
1977                                         {\r
1978                                                 int leftMask = dst.mask;\r
1979                                                 \r
1980                                                 dst.mask = 1;\r
1981                                                 while((leftMask & dst.mask) == 0)\r
1982                                                 {\r
1983                                                         dst.mask = dst.mask << 1;\r
1984                                                 }\r
1985 \r
1986                                                 int element = swizzleElement(leftSwizzle, rightIndex);\r
1987                                                 dst.mask = 1 << element;\r
1988                                                 \r
1989                                                 return element;\r
1990                                         }\r
1991                                         else if(left->isArray() || left->isMatrix())\r
1992                                         {\r
1993                                                 dst.index += rightIndex * result->totalRegisterCount();\r
1994                                                 return 0xE4;\r
1995                                         }\r
1996                                         else UNREACHABLE(0);\r
1997                                 }\r
1998                                 break;\r
1999                         case EOpIndexIndirect:\r
2000                                 {\r
2001                                         if(left->isRegister())\r
2002                                         {\r
2003                                                 // Requires INSERT instruction (handled by calling function)\r
2004                                         }\r
2005                                         else if(left->isArray() || left->isMatrix())\r
2006                                         {\r
2007                                                 int scale = result->totalRegisterCount();\r
2008 \r
2009                                                 if(dst.rel.type == sw::Shader::PARAMETER_VOID)   // Use the index register as the relative address directly\r
2010                                                 {\r
2011                                                         if(left->totalRegisterCount() > 1)\r
2012                                                         {\r
2013                                                                 sw::Shader::SourceParameter relativeRegister;\r
2014                                                                 argument(relativeRegister, right);\r
2015 \r
2016                                                                 dst.rel.index = relativeRegister.index;\r
2017                                                                 dst.rel.type = relativeRegister.type;\r
2018                                                                 dst.rel.scale = scale;\r
2019                                                                 dst.rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);\r
2020                                                         }\r
2021                                                 }\r
2022                                                 else if(dst.rel.index != registerIndex(&address))   // Move the previous index register to the address register\r
2023                                                 {\r
2024                                                         if(scale == 1)\r
2025                                                         {\r
2026                                                                 Constant oldScale((int)dst.rel.scale);\r
2027                                                                 Instruction *mad = emit(sw::Shader::OPCODE_IMAD, &address, &address, &oldScale, right);\r
2028                                                                 mad->src[0].index = dst.rel.index;\r
2029                                                                 mad->src[0].type = dst.rel.type;\r
2030                                                         }\r
2031                                                         else\r
2032                                                         {\r
2033                                                                 Constant oldScale((int)dst.rel.scale);\r
2034                                                                 Instruction *mul = emit(sw::Shader::OPCODE_IMUL, &address, &address, &oldScale);\r
2035                                                                 mul->src[0].index = dst.rel.index;\r
2036                                                                 mul->src[0].type = dst.rel.type;\r
2037 \r
2038                                                                 Constant newScale(scale);\r
2039                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2040                                                         }\r
2041 \r
2042                                                         dst.rel.type = sw::Shader::PARAMETER_TEMP;\r
2043                                                         dst.rel.index = registerIndex(&address);\r
2044                                                         dst.rel.scale = 1;\r
2045                                                 }\r
2046                                                 else   // Just add the new index to the address register\r
2047                                                 {\r
2048                                                         if(scale == 1)\r
2049                                                         {\r
2050                                                                 emit(sw::Shader::OPCODE_IADD, &address, &address, right);\r
2051                                                         }\r
2052                                                         else\r
2053                                                         {\r
2054                                                                 Constant newScale(scale);\r
2055                                                                 emit(sw::Shader::OPCODE_IMAD, &address, right, &newScale, &address);\r
2056                                                         }\r
2057                                                 }\r
2058                                         }\r
2059                                         else UNREACHABLE(0);\r
2060                                 }\r
2061                                 break;\r
2062                         case EOpIndexDirectStruct:\r
2063                         case EOpIndexDirectInterfaceBlock:\r
2064                                 {\r
2065                                         const TFieldList& fields = (binary->getOp() == EOpIndexDirectStruct) ?\r
2066                                                                left->getType().getStruct()->fields() :\r
2067                                                                left->getType().getInterfaceBlock()->fields();\r
2068                                         int index = right->getAsConstantUnion()->getIConst(0);\r
2069                                         int fieldOffset = 0;\r
2070 \r
2071                                         for(int i = 0; i < index; i++)\r
2072                                         {\r
2073                                                 fieldOffset += fields[i]->type()->totalRegisterCount();\r
2074                                         }\r
2075 \r
2076                                         dst.type = registerType(left);\r
2077                                         dst.index += fieldOffset;\r
2078                                         dst.mask = writeMask(right);\r
2079 \r
2080                                         return 0xE4;\r
2081                                 }\r
2082                                 break;\r
2083                         case EOpVectorSwizzle:\r
2084                                 {\r
2085                                         ASSERT(left->isRegister());\r
2086 \r
2087                                         int leftMask = dst.mask;\r
2088 \r
2089                                         int swizzle = 0;\r
2090                                         int rightMask = 0;\r
2091 \r
2092                                         TIntermSequence &sequence = right->getAsAggregate()->getSequence();\r
2093 \r
2094                                         for(unsigned int i = 0; i < sequence.size(); i++)\r
2095                                         {\r
2096                                                 int index = sequence[i]->getAsConstantUnion()->getIConst(0);\r
2097 \r
2098                                                 int element = swizzleElement(leftSwizzle, index);\r
2099                                                 rightMask = rightMask | (1 << element);\r
2100                                                 swizzle = swizzle | swizzleElement(leftSwizzle, i) << (element * 2);\r
2101                                         }\r
2102                                         \r
2103                                         dst.mask = leftMask & rightMask;\r
2104 \r
2105                                         return swizzle;\r
2106                                 }\r
2107                                 break;\r
2108                         default:\r
2109                                 UNREACHABLE(binary->getOp());   // Not an l-value operator\r
2110                                 break;\r
2111                         }\r
2112                 }\r
2113                 else if(symbol)\r
2114                 {\r
2115                         dst.type = registerType(symbol);\r
2116                         dst.index = registerIndex(symbol);\r
2117                         dst.mask = writeMask(symbol);\r
2118                         return 0xE4;\r
2119                 }\r
2120 \r
2121                 return 0xE4;\r
2122         }\r
2123 \r
2124         sw::Shader::ParameterType OutputASM::registerType(TIntermTyped *operand)\r
2125         {\r
2126                 if(isSamplerRegister(operand))\r
2127                 {\r
2128                         return sw::Shader::PARAMETER_SAMPLER;\r
2129                 }\r
2130 \r
2131                 const TQualifier qualifier = operand->getQualifier();\r
2132                 if((EvqFragColor == qualifier) || (EvqFragData == qualifier))\r
2133                 {\r
2134                         if(((EvqFragData == qualifier) && (EvqFragColor == outputQualifier)) ||\r
2135                            ((EvqFragColor == qualifier) && (EvqFragData == outputQualifier)))\r
2136                         {\r
2137                                 mContext.error(operand->getLine(), "static assignment to both gl_FragData and gl_FragColor", "");\r
2138                         }\r
2139                         outputQualifier = qualifier;\r
2140                 }\r
2141 \r
2142                 switch(qualifier)\r
2143                 {\r
2144                 case EvqTemporary:           return sw::Shader::PARAMETER_TEMP;\r
2145                 case EvqGlobal:              return sw::Shader::PARAMETER_TEMP;\r
2146                 case EvqConstExpr:           return sw::Shader::PARAMETER_FLOAT4LITERAL;   // All converted to float\r
2147                 case EvqAttribute:           return sw::Shader::PARAMETER_INPUT;\r
2148                 case EvqVaryingIn:           return sw::Shader::PARAMETER_INPUT;\r
2149                 case EvqVaryingOut:          return sw::Shader::PARAMETER_OUTPUT;\r
2150                 case EvqVertexIn:            return sw::Shader::PARAMETER_INPUT;\r
2151                 case EvqFragmentOut:         return sw::Shader::PARAMETER_COLOROUT;\r
2152                 case EvqVertexOut:           return sw::Shader::PARAMETER_OUTPUT;\r
2153                 case EvqFragmentIn:          return sw::Shader::PARAMETER_INPUT;\r
2154                 case EvqInvariantVaryingIn:  return sw::Shader::PARAMETER_INPUT;    // FIXME: Guarantee invariance at the backend\r
2155                 case EvqInvariantVaryingOut: return sw::Shader::PARAMETER_OUTPUT;   // FIXME: Guarantee invariance at the backend \r
2156                 case EvqSmooth:              return sw::Shader::PARAMETER_OUTPUT;\r
2157                 case EvqFlat:                return sw::Shader::PARAMETER_OUTPUT;\r
2158                 case EvqCentroidOut:         return sw::Shader::PARAMETER_OUTPUT;\r
2159                 case EvqSmoothIn:            return sw::Shader::PARAMETER_INPUT;\r
2160                 case EvqFlatIn:              return sw::Shader::PARAMETER_INPUT;\r
2161                 case EvqCentroidIn:          return sw::Shader::PARAMETER_INPUT;\r
2162                 case EvqUniform:             return sw::Shader::PARAMETER_CONST;\r
2163                 case EvqIn:                  return sw::Shader::PARAMETER_TEMP;\r
2164                 case EvqOut:                 return sw::Shader::PARAMETER_TEMP;\r
2165                 case EvqInOut:               return sw::Shader::PARAMETER_TEMP;\r
2166                 case EvqConstReadOnly:       return sw::Shader::PARAMETER_TEMP;\r
2167                 case EvqPosition:            return sw::Shader::PARAMETER_OUTPUT;\r
2168                 case EvqPointSize:           return sw::Shader::PARAMETER_OUTPUT;\r
2169                 case EvqInstanceID:          return sw::Shader::PARAMETER_MISCTYPE;\r
2170                 case EvqFragCoord:           return sw::Shader::PARAMETER_MISCTYPE;\r
2171                 case EvqFrontFacing:         return sw::Shader::PARAMETER_MISCTYPE;\r
2172                 case EvqPointCoord:          return sw::Shader::PARAMETER_INPUT;\r
2173                 case EvqFragColor:           return sw::Shader::PARAMETER_COLOROUT;\r
2174                 case EvqFragData:            return sw::Shader::PARAMETER_COLOROUT;\r
2175                 case EvqFragDepth:           return sw::Shader::PARAMETER_DEPTHOUT;\r
2176                 default: UNREACHABLE(qualifier);\r
2177                 }\r
2178 \r
2179                 return sw::Shader::PARAMETER_VOID;\r
2180         }\r
2181 \r
2182         int OutputASM::registerIndex(TIntermTyped *operand)\r
2183         {\r
2184                 if(isSamplerRegister(operand))\r
2185                 {\r
2186                         return samplerRegister(operand);\r
2187                 }\r
2188 \r
2189                 switch(operand->getQualifier())\r
2190                 {\r
2191                 case EvqTemporary:           return temporaryRegister(operand);\r
2192                 case EvqGlobal:              return temporaryRegister(operand);\r
2193                 case EvqConstExpr:           UNREACHABLE(EvqConstExpr);\r
2194                 case EvqAttribute:           return attributeRegister(operand);\r
2195                 case EvqVaryingIn:           return varyingRegister(operand);\r
2196                 case EvqVaryingOut:          return varyingRegister(operand);\r
2197                 case EvqVertexIn:            return attributeRegister(operand);\r
2198                 case EvqFragmentOut:         return 0;\r
2199                 case EvqVertexOut:           return varyingRegister(operand);\r
2200                 case EvqFragmentIn:          return varyingRegister(operand);\r
2201                 case EvqInvariantVaryingIn:  return varyingRegister(operand);\r
2202                 case EvqInvariantVaryingOut: return varyingRegister(operand);\r
2203                 case EvqSmooth:              return varyingRegister(operand);\r
2204                 case EvqFlat:                return varyingRegister(operand);\r
2205                 case EvqCentroidOut:         return varyingRegister(operand);\r
2206                 case EvqSmoothIn:            return varyingRegister(operand);\r
2207                 case EvqFlatIn:              return varyingRegister(operand);\r
2208                 case EvqCentroidIn:          return varyingRegister(operand);\r
2209                 case EvqUniform:             return uniformRegister(operand);\r
2210                 case EvqIn:                  return temporaryRegister(operand);\r
2211                 case EvqOut:                 return temporaryRegister(operand);\r
2212                 case EvqInOut:               return temporaryRegister(operand);\r
2213                 case EvqConstReadOnly:       return temporaryRegister(operand);\r
2214                 case EvqPosition:            return varyingRegister(operand);\r
2215                 case EvqPointSize:           return varyingRegister(operand);\r
2216                 case EvqInstanceID:          vertexShader->instanceIdDeclared = true; return 0;\r
2217                 case EvqFragCoord:           pixelShader->vPosDeclared = true;  return 0;\r
2218                 case EvqFrontFacing:         pixelShader->vFaceDeclared = true; return 1;\r
2219                 case EvqPointCoord:          return varyingRegister(operand);\r
2220                 case EvqFragColor:           return 0;\r
2221                 case EvqFragData:            return 0;\r
2222                 case EvqFragDepth:           return 0;\r
2223                 default: UNREACHABLE(operand->getQualifier());\r
2224                 }\r
2225 \r
2226                 return 0;\r
2227         }\r
2228 \r
2229         int OutputASM::writeMask(TIntermTyped *destination, int index)\r
2230         {\r
2231                 if(destination->getQualifier() == EvqPointSize)\r
2232                 {\r
2233                         return 0x2;   // Point size stored in the y component\r
2234                 }\r
2235 \r
2236                 return 0xF >> (4 - registerSize(destination->getType(), index));\r
2237         }\r
2238 \r
2239         int OutputASM::readSwizzle(TIntermTyped *argument, int size)\r
2240         {\r
2241                 if(argument->getQualifier() == EvqPointSize)\r
2242                 {\r
2243                         return 0x55;   // Point size stored in the y component\r
2244                 }\r
2245 \r
2246                 static const unsigned char swizzleSize[5] = {0x00, 0x00, 0x54, 0xA4, 0xE4};   // (void), xxxx, xyyy, xyzz, xyzw\r
2247 \r
2248                 return swizzleSize[size];\r
2249         }\r
2250 \r
2251         // Conservatively checks whether an expression is fast to compute and has no side effects\r
2252         bool OutputASM::trivial(TIntermTyped *expression, int budget)\r
2253         {\r
2254                 if(!expression->isRegister())\r
2255                 {\r
2256                         return false;\r
2257                 }\r
2258 \r
2259                 return cost(expression, budget) >= 0;\r
2260         }\r
2261 \r
2262         // Returns the remaining computing budget (if < 0 the expression is too expensive or has side effects)\r
2263         int OutputASM::cost(TIntermNode *expression, int budget)\r
2264         {\r
2265                 if(budget < 0)\r
2266                 {\r
2267                         return budget;\r
2268                 }\r
2269 \r
2270                 if(expression->getAsSymbolNode())\r
2271                 {\r
2272                         return budget;\r
2273                 }\r
2274                 else if(expression->getAsConstantUnion())\r
2275                 {\r
2276                         return budget;\r
2277                 }\r
2278                 else if(expression->getAsBinaryNode())\r
2279                 {\r
2280                         TIntermBinary *binary = expression->getAsBinaryNode();\r
2281 \r
2282                         switch(binary->getOp())\r
2283                         {\r
2284                         case EOpVectorSwizzle:\r
2285                         case EOpIndexDirect:\r
2286                         case EOpIndexDirectStruct:\r
2287                         case EOpIndexDirectInterfaceBlock:\r
2288                                 return cost(binary->getLeft(), budget - 0);\r
2289                         case EOpAdd:\r
2290                         case EOpSub:\r
2291                         case EOpMul:\r
2292                                 return cost(binary->getLeft(), cost(binary->getRight(), budget - 1));\r
2293                         default:\r
2294                                 return -1;\r
2295                         }\r
2296                 }\r
2297                 else if(expression->getAsUnaryNode())\r
2298                 {\r
2299                         TIntermUnary *unary = expression->getAsUnaryNode();\r
2300 \r
2301                         switch(unary->getOp())\r
2302                         {\r
2303                         case EOpAbs:\r
2304                         case EOpNegative:\r
2305                                 return cost(unary->getOperand(), budget - 1);\r
2306                         default:\r
2307                                 return -1;\r
2308                         }\r
2309                 }\r
2310                 else if(expression->getAsSelectionNode())\r
2311                 {\r
2312                         TIntermSelection *selection = expression->getAsSelectionNode();\r
2313 \r
2314                         if(selection->usesTernaryOperator())\r
2315                         {\r
2316                                 TIntermTyped *condition = selection->getCondition();\r
2317                                 TIntermNode *trueBlock = selection->getTrueBlock();\r
2318                                 TIntermNode *falseBlock = selection->getFalseBlock();\r
2319                                 TIntermConstantUnion *constantCondition = condition->getAsConstantUnion();\r
2320 \r
2321                                 if(constantCondition)\r
2322                                 {\r
2323                                         bool trueCondition = constantCondition->getUnionArrayPointer()->getBConst();\r
2324 \r
2325                                         if(trueCondition)\r
2326                                         {\r
2327                                                 return cost(trueBlock, budget - 0);\r
2328                                         }\r
2329                                         else\r
2330                                         {\r
2331                                                 return cost(falseBlock, budget - 0);\r
2332                                         }\r
2333                                 }\r
2334                                 else\r
2335                                 {\r
2336                                         return cost(trueBlock, cost(falseBlock, budget - 2));\r
2337                                 }\r
2338                         }\r
2339                 }\r
2340 \r
2341                 return -1;\r
2342         }\r
2343 \r
2344         const Function *OutputASM::findFunction(const TString &name)\r
2345         {\r
2346                 for(unsigned int f = 0; f < functionArray.size(); f++)\r
2347                 {\r
2348                         if(functionArray[f].name == name)\r
2349                         {\r
2350                                 return &functionArray[f];\r
2351                         }\r
2352                 }\r
2353 \r
2354                 return 0;\r
2355         }\r
2356         \r
2357         int OutputASM::temporaryRegister(TIntermTyped *temporary)\r
2358         {\r
2359                 return allocate(temporaries, temporary);\r
2360         }\r
2361 \r
2362         int OutputASM::varyingRegister(TIntermTyped *varying)\r
2363         {\r
2364                 int var = lookup(varyings, varying);\r
2365 \r
2366                 if(var == -1)\r
2367                 {\r
2368                         var = allocate(varyings, varying);\r
2369                         int componentCount = varying->registerSize();\r
2370                         int registerCount = varying->totalRegisterCount();\r
2371 \r
2372                         if(pixelShader)\r
2373                         {\r
2374                                 if((var + registerCount) > sw::PixelShader::MAX_INPUT_VARYINGS)\r
2375                                 {\r
2376                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "fragment shader");\r
2377                                         return 0;\r
2378                                 }\r
2379 \r
2380                                 if(varying->getQualifier() == EvqPointCoord)\r
2381                                 {\r
2382                                         ASSERT(varying->isRegister());\r
2383                                         if(componentCount >= 1) pixelShader->semantic[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2384                                         if(componentCount >= 2) pixelShader->semantic[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2385                                         if(componentCount >= 3) pixelShader->semantic[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2386                                         if(componentCount >= 4) pixelShader->semantic[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, var);\r
2387                                 }\r
2388                                 else\r
2389                                 {\r
2390                                         for(int i = 0; i < varying->totalRegisterCount(); i++)\r
2391                                         {\r
2392                                                 if(componentCount >= 1) pixelShader->semantic[var + i][0] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2393                                                 if(componentCount >= 2) pixelShader->semantic[var + i][1] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2394                                                 if(componentCount >= 3) pixelShader->semantic[var + i][2] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2395                                                 if(componentCount >= 4) pixelShader->semantic[var + i][3] = sw::Shader::Semantic(sw::Shader::USAGE_COLOR, var + i);\r
2396                                         }\r
2397                                 }\r
2398                         }\r
2399                         else if(vertexShader)\r
2400                         {\r
2401                                 if((var + registerCount) > sw::VertexShader::MAX_OUTPUT_VARYINGS)\r
2402                                 {\r
2403                                         mContext.error(varying->getLine(), "Varyings packing failed: Too many varyings", "vertex shader");\r
2404                                         return 0;\r
2405                                 }\r
2406 \r
2407                                 if(varying->getQualifier() == EvqPosition)\r
2408                                 {\r
2409                                         ASSERT(varying->isRegister());\r
2410                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2411                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2412                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2413                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0);\r
2414                                         vertexShader->positionRegister = var;\r
2415                                 }\r
2416                                 else if(varying->getQualifier() == EvqPointSize)\r
2417                                 {\r
2418                                         ASSERT(varying->isRegister());\r
2419                                         vertexShader->output[var][0] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2420                                         vertexShader->output[var][1] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2421                                         vertexShader->output[var][2] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2422                                         vertexShader->output[var][3] = sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0);\r
2423                                         vertexShader->pointSizeRegister = var;\r
2424                                 }\r
2425                                 else\r
2426                                 {\r
2427                                         // Semantic indexes for user varyings will be assigned during program link to match the pixel shader\r
2428                                 }\r
2429                         }\r
2430                         else UNREACHABLE(0);\r
2431 \r
2432                         declareVarying(varying, var);\r
2433                 }\r
2434 \r
2435                 return var;\r
2436         }\r
2437 \r
2438         void OutputASM::declareVarying(TIntermTyped *varying, int reg)\r
2439         {\r
2440                 if(varying->getQualifier() != EvqPointCoord)   // gl_PointCoord does not need linking\r
2441                 {\r
2442                         const TType &type = varying->getType();\r
2443                         const char *name = varying->getAsSymbolNode()->getSymbol().c_str();\r
2444                         VaryingList &activeVaryings = shaderObject->varyings;\r
2445                         \r
2446                         // Check if this varying has been declared before without having a register assigned\r
2447                         for(VaryingList::iterator v = activeVaryings.begin(); v != activeVaryings.end(); v++)\r
2448                         {\r
2449                                 if(v->name == name)\r
2450                                 {\r
2451                                         if(reg >= 0)\r
2452                                         {\r
2453                                                 ASSERT(v->reg < 0 || v->reg == reg);\r
2454                                                 v->reg = reg;\r
2455                                         }\r
2456 \r
2457                                         return;\r
2458                                 }\r
2459                         }\r
2460                         \r
2461                         activeVaryings.push_back(glsl::Varying(glVariableType(type), name, varying->getArraySize(), reg, 0));\r
2462                 }\r
2463         }\r
2464 \r
2465         int OutputASM::uniformRegister(TIntermTyped *uniform)\r
2466         {\r
2467                 const TType &type = uniform->getType();\r
2468                 ASSERT(!IsSampler(type.getBasicType()));\r
2469                 TInterfaceBlock *block = type.getAsInterfaceBlock();\r
2470                 TIntermSymbol *symbol = uniform->getAsSymbolNode();\r
2471                 ASSERT(symbol || block);\r
2472 \r
2473                 if(symbol || block)\r
2474                 {\r
2475                         int index = lookup(uniforms, uniform);\r
2476 \r
2477                         if(index == -1)\r
2478                         {\r
2479                                 index = allocate(uniforms, uniform);\r
2480                                 const TString &name = symbol ? symbol->getSymbol() : block->name();\r
2481 \r
2482                                 declareUniform(type, name, index);\r
2483                         }\r
2484 \r
2485                         return index;\r
2486                 }\r
2487 \r
2488                 return 0;\r
2489         }\r
2490 \r
2491         int OutputASM::attributeRegister(TIntermTyped *attribute)\r
2492         {\r
2493                 ASSERT(!attribute->isArray());\r
2494 \r
2495                 int index = lookup(attributes, attribute);\r
2496 \r
2497                 if(index == -1)\r
2498                 {\r
2499                         TIntermSymbol *symbol = attribute->getAsSymbolNode();\r
2500                         ASSERT(symbol);\r
2501 \r
2502                         if(symbol)\r
2503                         {\r
2504                                 index = allocate(attributes, attribute);\r
2505                                 const TType &type = attribute->getType();\r
2506                                 int registerCount = attribute->totalRegisterCount();\r
2507 \r
2508                                 if(vertexShader && (index + registerCount) <= sw::VertexShader::MAX_INPUT_ATTRIBUTES)\r
2509                                 {\r
2510                                         for(int i = 0; i < registerCount; i++)\r
2511                                         {\r
2512                                                 vertexShader->input[index + i] = sw::Shader::Semantic(sw::Shader::USAGE_TEXCOORD, index + i);\r
2513                                         }\r
2514                                 }\r
2515 \r
2516                                 ActiveAttributes &activeAttributes = shaderObject->activeAttributes;\r
2517 \r
2518                                 const char *name = symbol->getSymbol().c_str();\r
2519                                 activeAttributes.push_back(Attribute(glVariableType(type), name, type.getArraySize(), type.getLayoutQualifier().location, index));\r
2520                         }\r
2521                 }\r
2522 \r
2523                 return index;\r
2524         }\r
2525 \r
2526         int OutputASM::samplerRegister(TIntermTyped *sampler)\r
2527         {\r
2528                 ASSERT(IsSampler(sampler->getType().getBasicType()));\r
2529                 TIntermSymbol *symbol = sampler->getAsSymbolNode();\r
2530                 TIntermBinary *binary = sampler->getAsBinaryNode();\r
2531 \r
2532                 if(symbol)\r
2533                 {\r
2534                         return samplerRegister(symbol);\r
2535                 }\r
2536                 else if(binary)\r
2537                 {\r
2538                         ASSERT(binary->getOp() == EOpIndexDirect || binary->getOp() == EOpIndexIndirect ||\r
2539                                    binary->getOp() == EOpIndexDirectStruct || binary->getOp() == EOpIndexDirectInterfaceBlock);\r
2540 \r
2541                         return samplerRegister(binary->getLeft());   // Index added later\r
2542                 }\r
2543                 else UNREACHABLE(0);\r
2544 \r
2545                 return 0;\r
2546         }\r
2547 \r
2548         int OutputASM::samplerRegister(TIntermSymbol *sampler)\r
2549         {\r
2550                 const TType &type = sampler->getType();\r
2551                 ASSERT(IsSampler(type.getBasicType()) || type.getStruct());   // Structures can contain samplers\r
2552 \r
2553                 int index = lookup(samplers, sampler);\r
2554 \r
2555                 if(index == -1)\r
2556                 {\r
2557                         index = allocate(samplers, sampler);\r
2558 \r
2559                         if(sampler->getQualifier() == EvqUniform)\r
2560                         {\r
2561                                 const char *name = sampler->getSymbol().c_str();\r
2562                                 declareUniform(type, name, index);\r
2563                         }\r
2564                 }\r
2565 \r
2566                 return index;\r
2567         }\r
2568 \r
2569         int OutputASM::lookup(VariableArray &list, TIntermTyped *variable)\r
2570         {\r
2571                 for(unsigned int i = 0; i < list.size(); i++)\r
2572                 {\r
2573                         if(list[i] == variable)\r
2574                         {\r
2575                                 return i;   // Pointer match\r
2576                         }\r
2577                 }\r
2578 \r
2579                 TIntermSymbol *varSymbol = variable->getAsSymbolNode();\r
2580                 TInterfaceBlock *varBlock = variable->getType().getAsInterfaceBlock();\r
2581 \r
2582                 if(varSymbol)\r
2583                 {\r
2584                         for(unsigned int i = 0; i < list.size(); i++)\r
2585                         {\r
2586                                 if(list[i])\r
2587                                 {\r
2588                                         TIntermSymbol *listSymbol = list[i]->getAsSymbolNode();\r
2589 \r
2590                                         if(listSymbol)\r
2591                                         {\r
2592                                                 if(listSymbol->getId() == varSymbol->getId())\r
2593                                                 {\r
2594                                                         ASSERT(listSymbol->getSymbol() == varSymbol->getSymbol());\r
2595                                                         ASSERT(listSymbol->getType() == varSymbol->getType());\r
2596                                                         ASSERT(listSymbol->getQualifier() == varSymbol->getQualifier());\r
2597 \r
2598                                                         return i;\r
2599                                                 }\r
2600                                         }\r
2601                                 }\r
2602                         }\r
2603                 }\r
2604                 else if(varBlock)\r
2605                 {\r
2606                         for(unsigned int i = 0; i < list.size(); i++)\r
2607                         {\r
2608                                 if(list[i])\r
2609                                 {\r
2610                                         TInterfaceBlock *listBlock = list[i]->getType().getAsInterfaceBlock();\r
2611 \r
2612                                         if(listBlock)\r
2613                                         {\r
2614                                                 if(listBlock->name() == varBlock->name())\r
2615                                                 {\r
2616                                                         ASSERT(listBlock->arraySize() == varBlock->arraySize());\r
2617                                                         ASSERT(listBlock->fields() == varBlock->fields());\r
2618                                                         ASSERT(listBlock->blockStorage() == varBlock->blockStorage());\r
2619                                                         ASSERT(listBlock->matrixPacking() == varBlock->matrixPacking());\r
2620 \r
2621                                                         return i;\r
2622                                                 }\r
2623                                         }\r
2624                                 }\r
2625                         }\r
2626                 }\r
2627 \r
2628                 return -1;\r
2629         }\r
2630 \r
2631         int OutputASM::allocate(VariableArray &list, TIntermTyped *variable)\r
2632         {\r
2633                 int index = lookup(list, variable);\r
2634 \r
2635                 if(index == -1)\r
2636                 {\r
2637                         unsigned int registerCount = variable->totalRegisterCount();\r
2638 \r
2639                         for(unsigned int i = 0; i < list.size(); i++)\r
2640                         {\r
2641                                 if(list[i] == 0)\r
2642                                 {\r
2643                                         unsigned int j = 1;\r
2644                                         for( ; j < registerCount && (i + j) < list.size(); j++)\r
2645                                         {\r
2646                                                 if(list[i + j] != 0)\r
2647                                                 {\r
2648                                                         break;\r
2649                                                 }\r
2650                                         }\r
2651 \r
2652                                         if(j == registerCount)   // Found free slots\r
2653                                         {\r
2654                                                 for(unsigned int j = 0; j < registerCount; j++)\r
2655                                                 {\r
2656                                                         list[i + j] = variable;\r
2657                                                 }\r
2658 \r
2659                                                 return i;\r
2660                                         }\r
2661                                 }\r
2662                         }\r
2663 \r
2664                         index = list.size();\r
2665 \r
2666                         for(unsigned int i = 0; i < registerCount; i++)\r
2667                         {\r
2668                                 list.push_back(variable);\r
2669                         }\r
2670                 }\r
2671 \r
2672                 return index;\r
2673         }\r
2674 \r
2675         void OutputASM::free(VariableArray &list, TIntermTyped *variable)\r
2676         {\r
2677                 int index = lookup(list, variable);\r
2678 \r
2679                 if(index >= 0)\r
2680                 {\r
2681                         list[index] = 0;\r
2682                 }\r
2683         }\r
2684 \r
2685         void OutputASM::declareUniform(const TType &type, const TString &name, int registerIndex, int offset, int blockId)\r
2686         {\r
2687                 const TStructure *structure = type.getStruct();\r
2688                 const TInterfaceBlock *block = (type.isInterfaceBlock() || (blockId == -1)) ? type.getInterfaceBlock() : nullptr;\r
2689                 ActiveUniforms &activeUniforms = shaderObject->activeUniforms;\r
2690 \r
2691                 if(block)\r
2692                 {\r
2693                         ActiveUniformBlocks &activeUniformBlocks = shaderObject->activeUniformBlocks;\r
2694                         blockId = activeUniformBlocks.size();\r
2695                         unsigned int dataSize = block->objectSize() * 4; // FIXME: assuming 4 bytes per element\r
2696                         activeUniformBlocks.push_back(UniformBlock(block->name().c_str(), block->hasInstanceName() ? block->instanceName().c_str() : std::string(), dataSize,\r
2697                                                                    block->arraySize(), block->blockStorage(), block->matrixPacking() == EmpRowMajor, registerIndex, blockId));\r
2698                 }\r
2699 \r
2700                 if(!structure && !block)\r
2701                 {\r
2702                         if(blockId >= 0)\r
2703                         {\r
2704                                 shaderObject->activeUniformBlocks[blockId].fields.push_back(activeUniforms.size());\r
2705                         }\r
2706                         activeUniforms.push_back(Uniform(glVariableType(type), glVariablePrecision(type), name.c_str(), type.getArraySize(), registerIndex, offset, blockId));\r
2707 \r
2708                         if(isSamplerRegister(type))\r
2709                         {\r
2710                                 for(int i = 0; i < type.totalRegisterCount(); i++)\r
2711                                 {\r
2712                                         shader->declareSampler(registerIndex + i);\r
2713                                 }\r
2714                         }\r
2715                 }\r
2716                 else\r
2717                 {\r
2718                         const TFieldList& fields = structure ? structure->fields() : block->fields();\r
2719                         const bool containerHasName = structure || block->hasInstanceName();\r
2720                         const TString &containerName = structure ? name : (containerHasName ? block->instanceName() : TString());\r
2721                         if(type.isArray() && (structure || type.isInterfaceBlock()))\r
2722                         {\r
2723                                 int fieldRegisterIndex = (blockId == -1) ? registerIndex : 0;\r
2724                                 int fieldOffset = 0;\r
2725 \r
2726                                 for(int i = 0; i < type.getArraySize(); i++)\r
2727                                 {\r
2728                                         for(size_t j = 0; j < fields.size(); j++)\r
2729                                         {\r
2730                                                 const TType &fieldType = *(fields[j]->type());\r
2731                                                 const TString &fieldName = fields[j]->name();\r
2732 \r
2733                                                 const TString uniformName = containerHasName ? containerName + "[" + str(i) + "]." + fieldName : fieldName;\r
2734                                                 declareUniform(fieldType, uniformName, fieldRegisterIndex, fieldOffset, blockId);\r
2735                                                 int registerCount = fieldType.totalRegisterCount();\r
2736                                                 fieldRegisterIndex += registerCount;\r
2737                                                 fieldOffset += registerCount * fieldType.registerSize();\r
2738                                         }\r
2739                                 }\r
2740                         }\r
2741                         else\r
2742                         {\r
2743                                 int fieldRegisterIndex = (blockId == -1) ? registerIndex : 0;\r
2744                                 int fieldOffset = 0;\r
2745 \r
2746                                 for(size_t i = 0; i < fields.size(); i++)\r
2747                                 {\r
2748                                         const TType &fieldType = *(fields[i]->type());\r
2749                                         const TString &fieldName = fields[i]->name();\r
2750 \r
2751                                         const TString uniformName = containerHasName ? containerName + "." + fieldName : fieldName;\r
2752                                         declareUniform(fieldType, uniformName, fieldRegisterIndex, fieldOffset, blockId);\r
2753                                         int registerCount = fieldType.totalRegisterCount();\r
2754                                         fieldRegisterIndex += registerCount;\r
2755                                         fieldOffset += registerCount * fieldType.registerSize();\r
2756                                 }\r
2757                         }\r
2758                 }\r
2759         }\r
2760 \r
2761         GLenum OutputASM::glVariableType(const TType &type)\r
2762         {\r
2763                 switch(type.getBasicType())\r
2764                 {\r
2765                 case EbtFloat:\r
2766                         if(type.isScalar())\r
2767                         {\r
2768                                 return GL_FLOAT;\r
2769                         }\r
2770                         else if(type.isVector())\r
2771                         {\r
2772                                 switch(type.getNominalSize())\r
2773                                 {\r
2774                                 case 2: return GL_FLOAT_VEC2;\r
2775                                 case 3: return GL_FLOAT_VEC3;\r
2776                                 case 4: return GL_FLOAT_VEC4;\r
2777                                 default: UNREACHABLE(type.getNominalSize());\r
2778                                 }\r
2779                         }\r
2780                         else if(type.isMatrix())\r
2781                         {\r
2782                                 switch(type.getNominalSize())\r
2783                                 {\r
2784                                 case 2:\r
2785                                         switch(type.getSecondarySize())\r
2786                                         {\r
2787                                         case 2: return GL_FLOAT_MAT2;\r
2788                                         case 3: return GL_FLOAT_MAT2x3;\r
2789                                         case 4: return GL_FLOAT_MAT2x4;\r
2790                                         default: UNREACHABLE(type.getSecondarySize());\r
2791                                         }\r
2792                                 case 3:\r
2793                                         switch(type.getSecondarySize())\r
2794                                         {\r
2795                                         case 2: return GL_FLOAT_MAT3x2;\r
2796                                         case 3: return GL_FLOAT_MAT3;\r
2797                                         case 4: return GL_FLOAT_MAT3x4;\r
2798                                         default: UNREACHABLE(type.getSecondarySize());\r
2799                                         }\r
2800                                 case 4:\r
2801                                         switch(type.getSecondarySize())\r
2802                                         {\r
2803                                         case 2: return GL_FLOAT_MAT4x2;\r
2804                                         case 3: return GL_FLOAT_MAT4x3;\r
2805                                         case 4: return GL_FLOAT_MAT4;\r
2806                                         default: UNREACHABLE(type.getSecondarySize());\r
2807                                         }\r
2808                                 default: UNREACHABLE(type.getNominalSize());\r
2809                                 }\r
2810                         }\r
2811                         else UNREACHABLE(0);\r
2812                         break;\r
2813                 case EbtInt:\r
2814                         if(type.isScalar())\r
2815                         {\r
2816                                 return GL_INT;\r
2817                         }\r
2818                         else if(type.isVector())\r
2819                         {\r
2820                                 switch(type.getNominalSize())\r
2821                                 {\r
2822                                 case 2: return GL_INT_VEC2;\r
2823                                 case 3: return GL_INT_VEC3;\r
2824                                 case 4: return GL_INT_VEC4;\r
2825                                 default: UNREACHABLE(type.getNominalSize());\r
2826                                 }\r
2827                         }\r
2828                         else UNREACHABLE(0);\r
2829                         break;\r
2830                 case EbtUInt:\r
2831                         if(type.isScalar())\r
2832                         {\r
2833                                 return GL_UNSIGNED_INT;\r
2834                         }\r
2835                         else if(type.isVector())\r
2836                         {\r
2837                                 switch(type.getNominalSize())\r
2838                                 {\r
2839                                 case 2: return GL_UNSIGNED_INT_VEC2;\r
2840                                 case 3: return GL_UNSIGNED_INT_VEC3;\r
2841                                 case 4: return GL_UNSIGNED_INT_VEC4;\r
2842                                 default: UNREACHABLE(type.getNominalSize());\r
2843                                 }\r
2844                         }\r
2845                         else UNREACHABLE(0);\r
2846                         break;\r
2847                 case EbtBool:\r
2848                         if(type.isScalar())\r
2849                         {\r
2850                                 return GL_BOOL;\r
2851                         }\r
2852                         else if(type.isVector())\r
2853                         {\r
2854                                 switch(type.getNominalSize())\r
2855                                 {\r
2856                                 case 2: return GL_BOOL_VEC2;\r
2857                                 case 3: return GL_BOOL_VEC3;\r
2858                                 case 4: return GL_BOOL_VEC4;\r
2859                                 default: UNREACHABLE(type.getNominalSize());\r
2860                                 }\r
2861                         }\r
2862                         else UNREACHABLE(0);\r
2863                         break;\r
2864                 case EbtSampler2D:\r
2865                         return GL_SAMPLER_2D;\r
2866                 case EbtISampler2D:\r
2867                         return GL_INT_SAMPLER_2D;\r
2868                 case EbtUSampler2D:\r
2869                         return GL_UNSIGNED_INT_SAMPLER_2D;\r
2870                 case EbtSamplerCube:\r
2871                         return GL_SAMPLER_CUBE;\r
2872                 case EbtISamplerCube:\r
2873                         return GL_INT_SAMPLER_CUBE;\r
2874                 case EbtUSamplerCube:\r
2875                         return GL_UNSIGNED_INT_SAMPLER_CUBE;\r
2876                 case EbtSamplerExternalOES:\r
2877                         return GL_SAMPLER_EXTERNAL_OES;\r
2878                 case EbtSampler3D:\r
2879                         return GL_SAMPLER_3D_OES;\r
2880                 case EbtISampler3D:\r
2881                         return GL_INT_SAMPLER_3D;\r
2882                 case EbtUSampler3D:\r
2883                         return GL_UNSIGNED_INT_SAMPLER_3D;\r
2884                 case EbtSampler2DArray:\r
2885                         return GL_SAMPLER_2D_ARRAY;\r
2886                 case EbtISampler2DArray:\r
2887                         return GL_INT_SAMPLER_2D_ARRAY;\r
2888                 case EbtUSampler2DArray:\r
2889                         return GL_UNSIGNED_INT_SAMPLER_2D_ARRAY;\r
2890                 case EbtSampler2DShadow:\r
2891                         return GL_SAMPLER_2D_SHADOW;\r
2892                 case EbtSamplerCubeShadow:\r
2893                         return GL_SAMPLER_CUBE_SHADOW;\r
2894                 case EbtSampler2DArrayShadow:\r
2895                         return GL_SAMPLER_2D_ARRAY_SHADOW;\r
2896                 default:\r
2897                         UNREACHABLE(type.getBasicType());\r
2898                         break;\r
2899                 }\r
2900 \r
2901                 return GL_NONE;\r
2902         }\r
2903 \r
2904         GLenum OutputASM::glVariablePrecision(const TType &type)\r
2905         {\r
2906                 if(type.getBasicType() == EbtFloat)\r
2907                 {\r
2908                         switch(type.getPrecision())\r
2909                         {\r
2910                         case EbpHigh:   return GL_HIGH_FLOAT;\r
2911                         case EbpMedium: return GL_MEDIUM_FLOAT;\r
2912                         case EbpLow:    return GL_LOW_FLOAT;\r
2913                         case EbpUndefined:\r
2914                                 // Should be defined as the default precision by the parser\r
2915                         default: UNREACHABLE(type.getPrecision());\r
2916                         }\r
2917                 }\r
2918                 else if(type.getBasicType() == EbtInt)\r
2919                 {\r
2920                         switch(type.getPrecision())\r
2921                         {\r
2922                         case EbpHigh:   return GL_HIGH_INT;\r
2923                         case EbpMedium: return GL_MEDIUM_INT;\r
2924                         case EbpLow:    return GL_LOW_INT;\r
2925                         case EbpUndefined:\r
2926                                 // Should be defined as the default precision by the parser\r
2927                         default: UNREACHABLE(type.getPrecision());\r
2928                         }\r
2929                 }\r
2930 \r
2931                 // Other types (boolean, sampler) don't have a precision\r
2932                 return GL_NONE;\r
2933         }\r
2934 \r
2935         int OutputASM::dim(TIntermNode *v)\r
2936         {\r
2937                 TIntermTyped *vector = v->getAsTyped();\r
2938                 ASSERT(vector && vector->isRegister());\r
2939                 return vector->getNominalSize();\r
2940         }\r
2941 \r
2942         int OutputASM::dim2(TIntermNode *m)\r
2943         {\r
2944                 TIntermTyped *matrix = m->getAsTyped();\r
2945                 ASSERT(matrix && matrix->isMatrix() && !matrix->isArray());\r
2946                 return matrix->getSecondarySize();\r
2947         }\r
2948 \r
2949         // Returns ~0 if no loop count could be determined\r
2950         unsigned int OutputASM::loopCount(TIntermLoop *node)\r
2951         {\r
2952                 // Parse loops of the form:\r
2953                 // for(int index = initial; index [comparator] limit; index += increment)\r
2954                 TIntermSymbol *index = 0;\r
2955                 TOperator comparator = EOpNull;\r
2956                 int initial = 0;\r
2957                 int limit = 0;\r
2958                 int increment = 0;\r
2959 \r
2960                 // Parse index name and intial value\r
2961                 if(node->getInit())\r
2962                 {\r
2963                         TIntermAggregate *init = node->getInit()->getAsAggregate();\r
2964 \r
2965                         if(init)\r
2966                         {\r
2967                                 TIntermSequence &sequence = init->getSequence();\r
2968                                 TIntermTyped *variable = sequence[0]->getAsTyped();\r
2969 \r
2970                                 if(variable && variable->getQualifier() == EvqTemporary)\r
2971                                 {\r
2972                                         TIntermBinary *assign = variable->getAsBinaryNode();\r
2973 \r
2974                                         if(assign->getOp() == EOpInitialize)\r
2975                                         {\r
2976                                                 TIntermSymbol *symbol = assign->getLeft()->getAsSymbolNode();\r
2977                                                 TIntermConstantUnion *constant = assign->getRight()->getAsConstantUnion();\r
2978 \r
2979                                                 if(symbol && constant)\r
2980                                                 {\r
2981                                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
2982                                                         {\r
2983                                                                 index = symbol;\r
2984                                                                 initial = constant->getUnionArrayPointer()[0].getIConst();\r
2985                                                         }\r
2986                                                 }\r
2987                                         }\r
2988                                 }\r
2989                         }\r
2990                 }\r
2991 \r
2992                 // Parse comparator and limit value\r
2993                 if(index && node->getCondition())\r
2994                 {\r
2995                         TIntermBinary *test = node->getCondition()->getAsBinaryNode();\r
2996 \r
2997                         if(test && test->getLeft()->getAsSymbolNode()->getId() == index->getId())\r
2998                         {\r
2999                                 TIntermConstantUnion *constant = test->getRight()->getAsConstantUnion();\r
3000 \r
3001                                 if(constant)\r
3002                                 {\r
3003                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3004                                         {\r
3005                                                 comparator = test->getOp();\r
3006                                                 limit = constant->getUnionArrayPointer()[0].getIConst();\r
3007                                         }\r
3008                                 }\r
3009                         }\r
3010                 }\r
3011 \r
3012                 // Parse increment\r
3013                 if(index && comparator != EOpNull && node->getExpression())\r
3014                 {\r
3015                         TIntermBinary *binaryTerminal = node->getExpression()->getAsBinaryNode();\r
3016                         TIntermUnary *unaryTerminal = node->getExpression()->getAsUnaryNode();\r
3017 \r
3018                         if(binaryTerminal)\r
3019                         {\r
3020                                 TOperator op = binaryTerminal->getOp();\r
3021                                 TIntermConstantUnion *constant = binaryTerminal->getRight()->getAsConstantUnion();\r
3022 \r
3023                                 if(constant)\r
3024                                 {\r
3025                                         if(constant->getBasicType() == EbtInt && constant->getNominalSize() == 1)\r
3026                                         {\r
3027                                                 int value = constant->getUnionArrayPointer()[0].getIConst();\r
3028 \r
3029                                                 switch(op)\r
3030                                                 {\r
3031                                                 case EOpAddAssign: increment = value;  break;\r
3032                                                 case EOpSubAssign: increment = -value; break;\r
3033                                                 default: UNIMPLEMENTED();\r
3034                                                 }\r
3035                                         }\r
3036                                 }\r
3037                         }\r
3038                         else if(unaryTerminal)\r
3039                         {\r
3040                                 TOperator op = unaryTerminal->getOp();\r
3041 \r
3042                                 switch(op)\r
3043                                 {\r
3044                                 case EOpPostIncrement: increment = 1;  break;\r
3045                                 case EOpPostDecrement: increment = -1; break;\r
3046                                 case EOpPreIncrement:  increment = 1;  break;\r
3047                                 case EOpPreDecrement:  increment = -1; break;\r
3048                                 default: UNIMPLEMENTED();\r
3049                                 }\r
3050                         }\r
3051                 }\r
3052 \r
3053                 if(index && comparator != EOpNull && increment != 0)\r
3054                 {\r
3055                         if(comparator == EOpLessThanEqual)\r
3056                         {\r
3057                                 comparator = EOpLessThan;\r
3058                                 limit += 1;\r
3059                         }\r
3060 \r
3061                         if(comparator == EOpLessThan)\r
3062                         {\r
3063                                 int iterations = (limit - initial) / increment;\r
3064 \r
3065                                 if(iterations <= 0)\r
3066                                 {\r
3067                                         iterations = 0;\r
3068                                 }\r
3069 \r
3070                                 return iterations;\r
3071                         }\r
3072                         else UNIMPLEMENTED();   // Falls through\r
3073                 }\r
3074 \r
3075                 return ~0;\r
3076         }\r
3077 \r
3078         bool DetectLoopDiscontinuity::traverse(TIntermNode *node)\r
3079         {\r
3080                 loopDepth = 0;\r
3081                 loopDiscontinuity = false;\r
3082                 \r
3083                 node->traverse(this);\r
3084                 \r
3085                 return loopDiscontinuity;\r
3086         }\r
3087 \r
3088         bool DetectLoopDiscontinuity::visitLoop(Visit visit, TIntermLoop *loop)\r
3089         {\r
3090                 if(visit == PreVisit)\r
3091                 {\r
3092                         loopDepth++;\r
3093                 }\r
3094                 else if(visit == PostVisit)\r
3095                 {\r
3096                         loopDepth++;\r
3097                 }\r
3098 \r
3099                 return true;\r
3100         }\r
3101 \r
3102         bool DetectLoopDiscontinuity::visitBranch(Visit visit, TIntermBranch *node)\r
3103         {\r
3104                 if(loopDiscontinuity)\r
3105                 {\r
3106                         return false;\r
3107                 }\r
3108 \r
3109                 if(!loopDepth)\r
3110                 {\r
3111                         return true;\r
3112                 }\r
3113         \r
3114                 switch(node->getFlowOp())\r
3115                 {\r
3116                 case EOpKill:\r
3117                         break;\r
3118                 case EOpBreak:\r
3119                 case EOpContinue:\r
3120                 case EOpReturn:\r
3121                         loopDiscontinuity = true;\r
3122                         break;\r
3123                 default: UNREACHABLE(node->getFlowOp());\r
3124                 }\r
3125 \r
3126                 return !loopDiscontinuity;\r
3127         }\r
3128 \r
3129         bool DetectLoopDiscontinuity::visitAggregate(Visit visit, TIntermAggregate *node)\r
3130         {\r
3131                 return !loopDiscontinuity;\r
3132         }\r
3133 }\r