OSDN Git Service

56129bcf3120f12815133dabf6b8754925325ae2
[android-x86/external-swiftshader.git] / src / Pipeline / SpirvShader.cpp
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
23
24 #include <queue>
25
26 #ifdef Bool
27 #undef Bool // b/127920555
28 #endif
29
30 namespace sw
31 {
32         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
33
34         SpirvShader::SpirvShader(InsnStore const &insns)
35                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
36                           outputs{MAX_INTERFACE_COMPONENTS},
37                           serialID{serialCounter++}, modes{}
38         {
39                 ASSERT(insns.size() > 0);
40
41                 // Simplifying assumptions (to be satisfied by earlier transformations)
42                 // - There is exactly one entrypoint in the module, and it's the one we want
43                 // - The only input/output OpVariables present are those used by the entrypoint
44
45                 Block::ID currentBlock;
46                 InsnIterator blockStart;
47
48                 for (auto insn : *this)
49                 {
50                         switch (insn.opcode())
51                         {
52                         case spv::OpExecutionMode:
53                                 ProcessExecutionMode(insn);
54                                 break;
55
56                         case spv::OpDecorate:
57                         {
58                                 TypeOrObjectID targetId = insn.word(1);
59                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
60                                 decorations[targetId].Apply(
61                                                 decoration,
62                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
63
64                                 if (decoration == spv::DecorationCentroid)
65                                         modes.NeedsCentroid = true;
66                                 break;
67                         }
68
69                         case spv::OpMemberDecorate:
70                         {
71                                 Type::ID targetId = insn.word(1);
72                                 auto memberIndex = insn.word(2);
73                                 auto &d = memberDecorations[targetId];
74                                 if (memberIndex >= d.size())
75                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
76                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
77                                 d[memberIndex].Apply(
78                                                 decoration,
79                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
80
81                                 if (decoration == spv::DecorationCentroid)
82                                         modes.NeedsCentroid = true;
83                                 break;
84                         }
85
86                         case spv::OpDecorationGroup:
87                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
88                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
89                                 // we might think about introducing this as a real Object.
90                                 break;
91
92                         case spv::OpGroupDecorate:
93                         {
94                                 auto const &srcDecorations = decorations[insn.word(1)];
95                                 for (auto i = 2u; i < insn.wordCount(); i++)
96                                 {
97                                         // remaining operands are targets to apply the group to.
98                                         decorations[insn.word(i)].Apply(srcDecorations);
99                                 }
100                                 break;
101                         }
102
103                         case spv::OpGroupMemberDecorate:
104                         {
105                                 auto const &srcDecorations = decorations[insn.word(1)];
106                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
107                                 {
108                                         // remaining operands are pairs of <id>, literal for members to apply to.
109                                         auto &d = memberDecorations[insn.word(i)];
110                                         auto memberIndex = insn.word(i + 1);
111                                         if (memberIndex >= d.size())
112                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
113                                         d[memberIndex].Apply(srcDecorations);
114                                 }
115                                 break;
116                         }
117
118                         case spv::OpLabel:
119                         {
120                                 ASSERT(currentBlock.value() == 0);
121                                 currentBlock = Block::ID(insn.word(1));
122                                 blockStart = insn;
123                                 break;
124                         }
125
126                         // Branch Instructions (subset of Termination Instructions):
127                         case spv::OpBranch:
128                         case spv::OpBranchConditional:
129                         case spv::OpSwitch:
130                         case spv::OpReturn:
131                         // fallthrough
132
133                         // Termination instruction:
134                         case spv::OpKill:
135                         case spv::OpUnreachable:
136                         {
137                                 ASSERT(currentBlock.value() != 0);
138                                 auto blockEnd = insn; blockEnd++;
139                                 blocks[currentBlock] = Block(blockStart, blockEnd);
140                                 currentBlock = Block::ID(0);
141
142                                 if (insn.opcode() == spv::OpKill)
143                                 {
144                                         modes.ContainsKill = true;
145                                 }
146                                 break;
147                         }
148
149                         case spv::OpTypeVoid:
150                         case spv::OpTypeBool:
151                         case spv::OpTypeInt:
152                         case spv::OpTypeFloat:
153                         case spv::OpTypeVector:
154                         case spv::OpTypeMatrix:
155                         case spv::OpTypeImage:
156                         case spv::OpTypeSampler:
157                         case spv::OpTypeSampledImage:
158                         case spv::OpTypeArray:
159                         case spv::OpTypeRuntimeArray:
160                         case spv::OpTypeStruct:
161                         case spv::OpTypePointer:
162                         case spv::OpTypeFunction:
163                                 DeclareType(insn);
164                                 break;
165
166                         case spv::OpVariable:
167                         {
168                                 Type::ID typeId = insn.word(1);
169                                 Object::ID resultId = insn.word(2);
170                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
171                                 if (insn.wordCount() > 4)
172                                         UNIMPLEMENTED("Variable initializers not yet supported");
173
174                                 auto &object = defs[resultId];
175                                 object.kind = Object::Kind::Variable;
176                                 object.definition = insn;
177                                 object.type = typeId;
178                                 object.pointerBase = insn.word(2);      // base is itself
179
180                                 ASSERT(getType(typeId).storageClass == storageClass);
181
182                                 switch (storageClass)
183                                 {
184                                 case spv::StorageClassInput:
185                                 case spv::StorageClassOutput:
186                                         ProcessInterfaceVariable(object);
187                                         break;
188                                 case spv::StorageClassUniform:
189                                 case spv::StorageClassStorageBuffer:
190                                 case spv::StorageClassPushConstant:
191                                         object.kind = Object::Kind::PhysicalPointer;
192                                         break;
193
194                                 case spv::StorageClassPrivate:
195                                 case spv::StorageClassFunction:
196                                         break; // Correctly handled.
197
198                                 case spv::StorageClassUniformConstant:
199                                 case spv::StorageClassWorkgroup:
200                                 case spv::StorageClassCrossWorkgroup:
201                                 case spv::StorageClassGeneric:
202                                 case spv::StorageClassAtomicCounter:
203                                 case spv::StorageClassImage:
204                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
205                                         break;
206
207                                 default:
208                                         UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
209                                         break;
210                                 }
211                                 break;
212                         }
213
214                         case spv::OpConstant:
215                                 CreateConstant(insn).constantValue[0] = insn.word(3);
216                                 break;
217                         case spv::OpConstantFalse:
218                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
219                                 break;
220                         case spv::OpConstantTrue:
221                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
222                                 break;
223                         case spv::OpConstantNull:
224                         case spv::OpUndef:
225                         {
226                                 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
227                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
228                                 auto &object = CreateConstant(insn);
229                                 auto &objectTy = getType(object.type);
230                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
231                                 {
232                                         object.constantValue[i] = 0;
233                                 }
234                                 break;
235                         }
236                         case spv::OpConstantComposite:
237                         {
238                                 auto &object = CreateConstant(insn);
239                                 auto offset = 0u;
240                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
241                                 {
242                                         auto &constituent = getObject(insn.word(i + 3));
243                                         auto &constituentTy = getType(constituent.type);
244                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
245                                                 object.constantValue[offset++] = constituent.constantValue[j];
246                                 }
247
248                                 auto objectId = Object::ID(insn.word(2));
249                                 auto decorationsIt = decorations.find(objectId);
250                                 if (decorationsIt != decorations.end() &&
251                                         decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
252                                 {
253                                         // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
254                                         // Decorating an object with the WorkgroupSize built-in
255                                         // decoration will make that object contain the dimensions
256                                         // of a local workgroup. If an object is decorated with the
257                                         // WorkgroupSize decoration, this must take precedence over
258                                         // any execution mode set for LocalSize.
259                                         // The object decorated with WorkgroupSize must be declared
260                                         // as a three-component vector of 32-bit integers.
261                                         ASSERT(getType(object.type).sizeInComponents == 3);
262                                         modes.WorkgroupSizeX = object.constantValue[0];
263                                         modes.WorkgroupSizeY = object.constantValue[1];
264                                         modes.WorkgroupSizeZ = object.constantValue[2];
265                                 }
266                                 break;
267                         }
268
269                         case spv::OpCapability:
270                                 break; // Various capabilities will be declared, but none affect our code generation at this point.
271                         case spv::OpMemoryModel:
272                                 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
273
274                         case spv::OpEntryPoint:
275                                 break;
276                         case spv::OpFunction:
277                                 ASSERT(mainBlockId.value() == 0); // Multiple functions found
278                                 // Scan forward to find the function's label.
279                                 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
280                                 {
281                                         switch (it.opcode())
282                                         {
283                                         case spv::OpFunction:
284                                         case spv::OpFunctionParameter:
285                                                 break;
286                                         case spv::OpLabel:
287                                                 mainBlockId = Block::ID(it.word(1));
288                                                 break;
289                                         default:
290                                                 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
291                                         }
292                                 }
293                                 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
294                                 break;
295                         case spv::OpFunctionEnd:
296                                 // Due to preprocessing, the entrypoint and its function provide no value.
297                                 break;
298                         case spv::OpExtInstImport:
299                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
300                                 // Valid shaders will not attempt to import any other instruction sets.
301                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
302                                 {
303                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
304                                 }
305                                 break;
306                         case spv::OpName:
307                         case spv::OpMemberName:
308                         case spv::OpSource:
309                         case spv::OpSourceContinued:
310                         case spv::OpSourceExtension:
311                         case spv::OpLine:
312                         case spv::OpNoLine:
313                         case spv::OpModuleProcessed:
314                         case spv::OpString:
315                                 // No semantic impact
316                                 break;
317
318                         case spv::OpFunctionParameter:
319                         case spv::OpFunctionCall:
320                         case spv::OpSpecConstant:
321                         case spv::OpSpecConstantComposite:
322                         case spv::OpSpecConstantFalse:
323                         case spv::OpSpecConstantOp:
324                         case spv::OpSpecConstantTrue:
325                                 // These should have all been removed by preprocessing passes. If we see them here,
326                                 // our assumptions are wrong and we will probably generate wrong code.
327                                 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
328                                 break;
329
330                         case spv::OpFConvert:
331                         case spv::OpSConvert:
332                         case spv::OpUConvert:
333                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
334                                 break;
335
336                         case spv::OpLoad:
337                         case spv::OpAccessChain:
338                         case spv::OpInBoundsAccessChain:
339                         case spv::OpCompositeConstruct:
340                         case spv::OpCompositeInsert:
341                         case spv::OpCompositeExtract:
342                         case spv::OpVectorShuffle:
343                         case spv::OpVectorTimesScalar:
344                         case spv::OpVectorExtractDynamic:
345                         case spv::OpVectorInsertDynamic:
346                         case spv::OpNot: // Unary ops
347                         case spv::OpSNegate:
348                         case spv::OpFNegate:
349                         case spv::OpLogicalNot:
350                         case spv::OpIAdd: // Binary ops
351                         case spv::OpISub:
352                         case spv::OpIMul:
353                         case spv::OpSDiv:
354                         case spv::OpUDiv:
355                         case spv::OpFAdd:
356                         case spv::OpFSub:
357                         case spv::OpFMul:
358                         case spv::OpFDiv:
359                         case spv::OpFMod:
360                         case spv::OpFRem:
361                         case spv::OpFOrdEqual:
362                         case spv::OpFUnordEqual:
363                         case spv::OpFOrdNotEqual:
364                         case spv::OpFUnordNotEqual:
365                         case spv::OpFOrdLessThan:
366                         case spv::OpFUnordLessThan:
367                         case spv::OpFOrdGreaterThan:
368                         case spv::OpFUnordGreaterThan:
369                         case spv::OpFOrdLessThanEqual:
370                         case spv::OpFUnordLessThanEqual:
371                         case spv::OpFOrdGreaterThanEqual:
372                         case spv::OpFUnordGreaterThanEqual:
373                         case spv::OpSMod:
374                         case spv::OpSRem:
375                         case spv::OpUMod:
376                         case spv::OpIEqual:
377                         case spv::OpINotEqual:
378                         case spv::OpUGreaterThan:
379                         case spv::OpSGreaterThan:
380                         case spv::OpUGreaterThanEqual:
381                         case spv::OpSGreaterThanEqual:
382                         case spv::OpULessThan:
383                         case spv::OpSLessThan:
384                         case spv::OpULessThanEqual:
385                         case spv::OpSLessThanEqual:
386                         case spv::OpShiftRightLogical:
387                         case spv::OpShiftRightArithmetic:
388                         case spv::OpShiftLeftLogical:
389                         case spv::OpBitwiseOr:
390                         case spv::OpBitwiseXor:
391                         case spv::OpBitwiseAnd:
392                         case spv::OpLogicalOr:
393                         case spv::OpLogicalAnd:
394                         case spv::OpLogicalEqual:
395                         case spv::OpLogicalNotEqual:
396                         case spv::OpUMulExtended:
397                         case spv::OpSMulExtended:
398                         case spv::OpDot:
399                         case spv::OpConvertFToU:
400                         case spv::OpConvertFToS:
401                         case spv::OpConvertSToF:
402                         case spv::OpConvertUToF:
403                         case spv::OpBitcast:
404                         case spv::OpSelect:
405                         case spv::OpExtInst:
406                         case spv::OpIsInf:
407                         case spv::OpIsNan:
408                         case spv::OpAny:
409                         case spv::OpAll:
410                         case spv::OpDPdx:
411                         case spv::OpDPdxCoarse:
412                         case spv::OpDPdy:
413                         case spv::OpDPdyCoarse:
414                         case spv::OpFwidth:
415                         case spv::OpFwidthCoarse:
416                         case spv::OpDPdxFine:
417                         case spv::OpDPdyFine:
418                         case spv::OpFwidthFine:
419                         case spv::OpAtomicLoad:
420                                 // Instructions that yield an intermediate value
421                         {
422                                 Type::ID typeId = insn.word(1);
423                                 Object::ID resultId = insn.word(2);
424                                 auto &object = defs[resultId];
425                                 object.type = typeId;
426                                 object.kind = Object::Kind::Value;
427                                 object.definition = insn;
428
429                                 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
430                                 {
431                                         // interior ptr has two parts:
432                                         // - logical base ptr, common across all lanes and known at compile time
433                                         // - per-lane offset
434                                         Object::ID baseId = insn.word(3);
435                                         object.pointerBase = getObject(baseId).pointerBase;
436                                 }
437                                 break;
438                         }
439
440                         case spv::OpStore:
441                         case spv::OpAtomicStore:
442                                 // Don't need to do anything during analysis pass
443                                 break;
444
445                         default:
446                                 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
447                         }
448                 }
449
450                 // Assign all Block::ins
451                 for (auto &it : blocks)
452                 {
453                         auto &blockId = it.first;
454                         auto &block = it.second;
455                         for (auto &outId : block.outs)
456                         {
457                                 auto outIt = blocks.find(outId);
458                                 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
459                                 auto &out = outIt->second;
460                                 out.ins.emplace(blockId);
461                         }
462                 }
463         }
464
465         void SpirvShader::DeclareType(InsnIterator insn)
466         {
467                 Type::ID resultId = insn.word(1);
468
469                 auto &type = types[resultId];
470                 type.definition = insn;
471                 type.sizeInComponents = ComputeTypeSize(insn);
472
473                 // A structure is a builtin block if it has a builtin
474                 // member. All members of such a structure are builtins.
475                 switch (insn.opcode())
476                 {
477                 case spv::OpTypeStruct:
478                 {
479                         auto d = memberDecorations.find(resultId);
480                         if (d != memberDecorations.end())
481                         {
482                                 for (auto &m : d->second)
483                                 {
484                                         if (m.HasBuiltIn)
485                                         {
486                                                 type.isBuiltInBlock = true;
487                                                 break;
488                                         }
489                                 }
490                         }
491                         break;
492                 }
493                 case spv::OpTypePointer:
494                 {
495                         Type::ID elementTypeId = insn.word(3);
496                         type.element = elementTypeId;
497                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
498                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
499                         break;
500                 }
501                 case spv::OpTypeVector:
502                 case spv::OpTypeMatrix:
503                 case spv::OpTypeArray:
504                 case spv::OpTypeRuntimeArray:
505                 {
506                         Type::ID elementTypeId = insn.word(2);
507                         type.element = elementTypeId;
508                         break;
509                 }
510                 default:
511                         break;
512                 }
513         }
514
515         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
516         {
517                 Type::ID typeId = insn.word(1);
518                 Object::ID resultId = insn.word(2);
519                 auto &object = defs[resultId];
520                 auto &objectTy = getType(typeId);
521                 object.type = typeId;
522                 object.kind = Object::Kind::Constant;
523                 object.definition = insn;
524                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
525                 return object;
526         }
527
528         void SpirvShader::ProcessInterfaceVariable(Object &object)
529         {
530                 auto &objectTy = getType(object.type);
531                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
532
533                 ASSERT(objectTy.opcode() == spv::OpTypePointer);
534                 auto pointeeTy = getType(objectTy.element);
535
536                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
537                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
538
539                 ASSERT(object.opcode() == spv::OpVariable);
540                 Object::ID resultId = object.definition.word(2);
541
542                 if (objectTy.isBuiltInBlock)
543                 {
544                         // walk the builtin block, registering each of its members separately.
545                         auto m = memberDecorations.find(objectTy.element);
546                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
547                         auto &structType = pointeeTy.definition;
548                         auto offset = 0u;
549                         auto word = 2u;
550                         for (auto &member : m->second)
551                         {
552                                 auto &memberType = getType(structType.word(word));
553
554                                 if (member.HasBuiltIn)
555                                 {
556                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
557                                 }
558
559                                 offset += memberType.sizeInComponents;
560                                 ++word;
561                         }
562                         return;
563                 }
564
565                 auto d = decorations.find(resultId);
566                 if (d != decorations.end() && d->second.HasBuiltIn)
567                 {
568                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
569                 }
570                 else
571                 {
572                         object.kind = Object::Kind::InterfaceVariable;
573                         VisitInterface(resultId,
574                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
575                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
576                                                            auto scalarSlot = (d.Location << 2) | d.Component;
577                                                            ASSERT(scalarSlot >= 0 &&
578                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
579
580                                                            auto &slot = userDefinedInterface[scalarSlot];
581                                                            slot.Type = type;
582                                                            slot.Flat = d.Flat;
583                                                            slot.NoPerspective = d.NoPerspective;
584                                                            slot.Centroid = d.Centroid;
585                                                    });
586                 }
587         }
588
589         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
590         {
591                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
592                 switch (mode)
593                 {
594                 case spv::ExecutionModeEarlyFragmentTests:
595                         modes.EarlyFragmentTests = true;
596                         break;
597                 case spv::ExecutionModeDepthReplacing:
598                         modes.DepthReplacing = true;
599                         break;
600                 case spv::ExecutionModeDepthGreater:
601                         modes.DepthGreater = true;
602                         break;
603                 case spv::ExecutionModeDepthLess:
604                         modes.DepthLess = true;
605                         break;
606                 case spv::ExecutionModeDepthUnchanged:
607                         modes.DepthUnchanged = true;
608                         break;
609                 case spv::ExecutionModeLocalSize:
610                         modes.WorkgroupSizeX = insn.word(3);
611                         modes.WorkgroupSizeY = insn.word(4);
612                         modes.WorkgroupSizeZ = insn.word(5);
613                         break;
614                 case spv::ExecutionModeOriginUpperLeft:
615                         // This is always the case for a Vulkan shader. Do nothing.
616                         break;
617                 default:
618                         UNIMPLEMENTED("No other execution modes are permitted");
619                 }
620         }
621
622         uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
623         {
624                 // Types are always built from the bottom up (with the exception of forward ptrs, which
625                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
626                 // already been described (and so their sizes determined)
627                 switch (insn.opcode())
628                 {
629                 case spv::OpTypeVoid:
630                 case spv::OpTypeSampler:
631                 case spv::OpTypeImage:
632                 case spv::OpTypeSampledImage:
633                 case spv::OpTypeFunction:
634                 case spv::OpTypeRuntimeArray:
635                         // Objects that don't consume any space.
636                         // Descriptor-backed objects currently only need exist at compile-time.
637                         // Runtime arrays don't appear in places where their size would be interesting
638                         return 0;
639
640                 case spv::OpTypeBool:
641                 case spv::OpTypeFloat:
642                 case spv::OpTypeInt:
643                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
644                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
645                         return 1;
646
647                 case spv::OpTypeVector:
648                 case spv::OpTypeMatrix:
649                         // Vectors and matrices both consume element count * element size.
650                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
651
652                 case spv::OpTypeArray:
653                 {
654                         // Element count * element size. Array sizes come from constant ids.
655                         auto arraySize = GetConstantInt(insn.word(3));
656                         return getType(insn.word(2)).sizeInComponents * arraySize;
657                 }
658
659                 case spv::OpTypeStruct:
660                 {
661                         uint32_t size = 0;
662                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
663                         {
664                                 size += getType(insn.word(i)).sizeInComponents;
665                         }
666                         return size;
667                 }
668
669                 case spv::OpTypePointer:
670                         // Runtime representation of a pointer is a per-lane index.
671                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
672                         return 1;
673
674                 default:
675                         // Some other random insn.
676                         UNIMPLEMENTED("Only types are supported");
677                         return 0;
678                 }
679         }
680
681         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
682         {
683                 switch (storageClass)
684                 {
685                 case spv::StorageClassUniform:
686                 case spv::StorageClassStorageBuffer:
687                 case spv::StorageClassPushConstant:
688                         return false;
689                 default:
690                         return true;
691                 }
692         }
693
694         template<typename F>
695         int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
696         {
697                 // Recursively walks variable definition and its type tree, taking into account
698                 // any explicit Location or Component decorations encountered; where explicit
699                 // Locations or Components are not specified, assigns them sequentially.
700                 // Collected decorations are carried down toward the leaves and across
701                 // siblings; Effect of decorations intentionally does not flow back up the tree.
702                 //
703                 // F is a functor to be called with the effective decoration set for every component.
704                 //
705                 // Returns the next available location, and calls f().
706
707                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
708
709                 ApplyDecorationsForId(&d, id);
710
711                 auto const &obj = getType(id);
712                 switch(obj.opcode())
713                 {
714                 case spv::OpTypePointer:
715                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
716                 case spv::OpTypeMatrix:
717                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
718                         {
719                                 // consumes same components of N consecutive locations
720                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
721                         }
722                         return d.Location;
723                 case spv::OpTypeVector:
724                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
725                         {
726                                 // consumes N consecutive components in the same location
727                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
728                         }
729                         return d.Location + 1;
730                 case spv::OpTypeFloat:
731                         f(d, ATTRIBTYPE_FLOAT);
732                         return d.Location + 1;
733                 case spv::OpTypeInt:
734                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
735                         return d.Location + 1;
736                 case spv::OpTypeBool:
737                         f(d, ATTRIBTYPE_UINT);
738                         return d.Location + 1;
739                 case spv::OpTypeStruct:
740                 {
741                         // iterate over members, which may themselves have Location/Component decorations
742                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
743                         {
744                                 ApplyDecorationsForIdMember(&d, id, i);
745                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
746                                 d.Component = 0;    // Implicit locations always have component=0
747                         }
748                         return d.Location;
749                 }
750                 case spv::OpTypeArray:
751                 {
752                         auto arraySize = GetConstantInt(obj.definition.word(3));
753                         for (auto i = 0u; i < arraySize; i++)
754                         {
755                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
756                         }
757                         return d.Location;
758                 }
759                 default:
760                         // Intentionally partial; most opcodes do not participate in type hierarchies
761                         return 0;
762                 }
763         }
764
765         template<typename F>
766         void SpirvShader::VisitInterface(Object::ID id, F f) const
767         {
768                 // Walk a variable definition and call f for each component in it.
769                 Decorations d{};
770                 ApplyDecorationsForId(&d, id);
771
772                 auto def = getObject(id).definition;
773                 ASSERT(def.opcode() == spv::OpVariable);
774                 VisitInterfaceInner<F>(def.word(1), d, f);
775         }
776
777         SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
778         {
779                 // Produce a offset into external memory in sizeof(float) units
780
781                 int constantOffset = 0;
782                 SIMD::Int dynamicOffset = SIMD::Int(0);
783                 auto &baseObject = getObject(id);
784                 Type::ID typeId = getType(baseObject.type).element;
785                 Decorations d{};
786                 ApplyDecorationsForId(&d, baseObject.type);
787
788                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
789                 // Start with its offset and build from there.
790                 if (baseObject.kind == Object::Kind::Value)
791                 {
792                         dynamicOffset += routine->getIntermediate(id).Int(0);
793                 }
794
795                 for (auto i = 0u; i < numIndexes; i++)
796                 {
797                         auto & type = getType(typeId);
798                         switch (type.definition.opcode())
799                         {
800                         case spv::OpTypeStruct:
801                         {
802                                 int memberIndex = GetConstantInt(indexIds[i]);
803                                 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
804                                 ASSERT(d.HasOffset);
805                                 constantOffset += d.Offset / sizeof(float);
806                                 typeId = type.definition.word(2u + memberIndex);
807                                 break;
808                         }
809                         case spv::OpTypeArray:
810                         case spv::OpTypeRuntimeArray:
811                         {
812                                 // TODO: b/127950082: Check bounds.
813                                 ApplyDecorationsForId(&d, typeId);
814                                 ASSERT(d.HasArrayStride);
815                                 auto & obj = getObject(indexIds[i]);
816                                 if (obj.kind == Object::Kind::Constant)
817                                         constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
818                                 else
819                                         dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
820                                 typeId = type.element;
821                                 break;
822                         }
823                         case spv::OpTypeMatrix:
824                         {
825                                 // TODO: b/127950082: Check bounds.
826                                 ApplyDecorationsForId(&d, typeId);
827                                 ASSERT(d.HasMatrixStride);
828                                 auto & obj = getObject(indexIds[i]);
829                                 if (obj.kind == Object::Kind::Constant)
830                                         constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
831                                 else
832                                         dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
833                                 typeId = type.element;
834                                 break;
835                         }
836                         case spv::OpTypeVector:
837                         {
838                                 auto & obj = getObject(indexIds[i]);
839                                 if (obj.kind == Object::Kind::Constant)
840                                         constantOffset += GetConstantInt(indexIds[i]);
841                                 else
842                                         dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
843                                 typeId = type.element;
844                                 break;
845                         }
846                         default:
847                                 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
848                         }
849                 }
850
851                 return dynamicOffset + SIMD::Int(constantOffset);
852         }
853
854         SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
855         {
856                 // TODO: avoid doing per-lane work in some cases if we can?
857                 // Produce a *component* offset into location-oriented memory
858
859                 int constantOffset = 0;
860                 SIMD::Int dynamicOffset = SIMD::Int(0);
861                 auto &baseObject = getObject(id);
862                 Type::ID typeId = getType(baseObject.type).element;
863
864                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
865                 // Start with its offset and build from there.
866                 if (baseObject.kind == Object::Kind::Value)
867                 {
868                         dynamicOffset += routine->getIntermediate(id).Int(0);
869                 }
870
871                 for (auto i = 0u; i < numIndexes; i++)
872                 {
873                         auto & type = getType(typeId);
874                         switch(type.opcode())
875                         {
876                         case spv::OpTypeStruct:
877                         {
878                                 int memberIndex = GetConstantInt(indexIds[i]);
879                                 int offsetIntoStruct = 0;
880                                 for (auto j = 0; j < memberIndex; j++) {
881                                         auto memberType = type.definition.word(2u + j);
882                                         offsetIntoStruct += getType(memberType).sizeInComponents;
883                                 }
884                                 constantOffset += offsetIntoStruct;
885                                 typeId = type.definition.word(2u + memberIndex);
886                                 break;
887                         }
888
889                         case spv::OpTypeVector:
890                         case spv::OpTypeMatrix:
891                         case spv::OpTypeArray:
892                         case spv::OpTypeRuntimeArray:
893                         {
894                                 // TODO: b/127950082: Check bounds.
895                                 auto stride = getType(type.element).sizeInComponents;
896                                 auto & obj = getObject(indexIds[i]);
897                                 if (obj.kind == Object::Kind::Constant)
898                                         constantOffset += stride * GetConstantInt(indexIds[i]);
899                                 else
900                                         dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
901                                 typeId = type.element;
902                                 break;
903                         }
904
905                         default:
906                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
907                         }
908                 }
909
910                 return dynamicOffset + SIMD::Int(constantOffset);
911         }
912
913         uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
914         {
915                 uint32_t constantOffset = 0;
916
917                 for (auto i = 0u; i < numIndexes; i++)
918                 {
919                         auto & type = getType(typeId);
920                         switch(type.opcode())
921                         {
922                         case spv::OpTypeStruct:
923                         {
924                                 int memberIndex = indexes[i];
925                                 int offsetIntoStruct = 0;
926                                 for (auto j = 0; j < memberIndex; j++) {
927                                         auto memberType = type.definition.word(2u + j);
928                                         offsetIntoStruct += getType(memberType).sizeInComponents;
929                                 }
930                                 constantOffset += offsetIntoStruct;
931                                 typeId = type.definition.word(2u + memberIndex);
932                                 break;
933                         }
934
935                         case spv::OpTypeVector:
936                         case spv::OpTypeMatrix:
937                         case spv::OpTypeArray:
938                         {
939                                 auto elementType = type.definition.word(2);
940                                 auto stride = getType(elementType).sizeInComponents;
941                                 constantOffset += stride * indexes[i];
942                                 typeId = elementType;
943                                 break;
944                         }
945
946                         default:
947                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
948                         }
949                 }
950
951                 return constantOffset;
952         }
953
954         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
955         {
956                 switch (decoration)
957                 {
958                 case spv::DecorationLocation:
959                         HasLocation = true;
960                         Location = static_cast<int32_t>(arg);
961                         break;
962                 case spv::DecorationComponent:
963                         HasComponent = true;
964                         Component = arg;
965                         break;
966                 case spv::DecorationDescriptorSet:
967                         HasDescriptorSet = true;
968                         DescriptorSet = arg;
969                         break;
970                 case spv::DecorationBinding:
971                         HasBinding = true;
972                         Binding = arg;
973                         break;
974                 case spv::DecorationBuiltIn:
975                         HasBuiltIn = true;
976                         BuiltIn = static_cast<spv::BuiltIn>(arg);
977                         break;
978                 case spv::DecorationFlat:
979                         Flat = true;
980                         break;
981                 case spv::DecorationNoPerspective:
982                         NoPerspective = true;
983                         break;
984                 case spv::DecorationCentroid:
985                         Centroid = true;
986                         break;
987                 case spv::DecorationBlock:
988                         Block = true;
989                         break;
990                 case spv::DecorationBufferBlock:
991                         BufferBlock = true;
992                         break;
993                 case spv::DecorationOffset:
994                         HasOffset = true;
995                         Offset = static_cast<int32_t>(arg);
996                         break;
997                 case spv::DecorationArrayStride:
998                         HasArrayStride = true;
999                         ArrayStride = static_cast<int32_t>(arg);
1000                         break;
1001                 case spv::DecorationMatrixStride:
1002                         HasMatrixStride = true;
1003                         MatrixStride = static_cast<int32_t>(arg);
1004                         break;
1005                 default:
1006                         // Intentionally partial, there are many decorations we just don't care about.
1007                         break;
1008                 }
1009         }
1010
1011         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1012         {
1013                 // Apply a decoration group to this set of decorations
1014                 if (src.HasBuiltIn)
1015                 {
1016                         HasBuiltIn = true;
1017                         BuiltIn = src.BuiltIn;
1018                 }
1019
1020                 if (src.HasLocation)
1021                 {
1022                         HasLocation = true;
1023                         Location = src.Location;
1024                 }
1025
1026                 if (src.HasComponent)
1027                 {
1028                         HasComponent = true;
1029                         Component = src.Component;
1030                 }
1031
1032                 if (src.HasDescriptorSet)
1033                 {
1034                         HasDescriptorSet = true;
1035                         DescriptorSet = src.DescriptorSet;
1036                 }
1037
1038                 if (src.HasBinding)
1039                 {
1040                         HasBinding = true;
1041                         Binding = src.Binding;
1042                 }
1043
1044                 if (src.HasOffset)
1045                 {
1046                         HasOffset = true;
1047                         Offset = src.Offset;
1048                 }
1049
1050                 if (src.HasArrayStride)
1051                 {
1052                         HasArrayStride = true;
1053                         ArrayStride = src.ArrayStride;
1054                 }
1055
1056                 if (src.HasMatrixStride)
1057                 {
1058                         HasMatrixStride = true;
1059                         MatrixStride = src.MatrixStride;
1060                 }
1061
1062                 Flat |= src.Flat;
1063                 NoPerspective |= src.NoPerspective;
1064                 Centroid |= src.Centroid;
1065                 Block |= src.Block;
1066                 BufferBlock |= src.BufferBlock;
1067         }
1068
1069         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1070         {
1071                 auto it = decorations.find(id);
1072                 if (it != decorations.end())
1073                         d->Apply(it->second);
1074         }
1075
1076         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1077         {
1078                 auto it = memberDecorations.find(id);
1079                 if (it != memberDecorations.end() && member < it->second.size())
1080                 {
1081                         d->Apply(it->second[member]);
1082                 }
1083         }
1084
1085         uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1086         {
1087                 // Slightly hackish access to constants very early in translation.
1088                 // General consumption of constants by other instructions should
1089                 // probably be just lowered to Reactor.
1090
1091                 // TODO: not encountered yet since we only use this for array sizes etc,
1092                 // but is possible to construct integer constant 0 via OpConstantNull.
1093                 auto insn = getObject(id).definition;
1094                 ASSERT(insn.opcode() == spv::OpConstant);
1095                 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1096                 return insn.word(3);
1097         }
1098
1099         // emit-time
1100
1101         void SpirvShader::emitProlog(SpirvRoutine *routine) const
1102         {
1103                 for (auto insn : *this)
1104                 {
1105                         switch (insn.opcode())
1106                         {
1107                         case spv::OpVariable:
1108                         {
1109                                 Type::ID resultPointerTypeId = insn.word(1);
1110                                 auto resultPointerType = getType(resultPointerTypeId);
1111                                 auto pointeeType = getType(resultPointerType.element);
1112
1113                                 if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
1114                                 {
1115                                         Object::ID resultId = insn.word(2);
1116                                         routine->createLvalue(resultId, pointeeType.sizeInComponents);
1117                                 }
1118                                 break;
1119                         }
1120                         default:
1121                                 // Nothing else produces interface variables, so can all be safely ignored.
1122                                 break;
1123                         }
1124                 }
1125         }
1126
1127         void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1128         {
1129                 EmitState state;
1130                 state.setActiveLaneMask(activeLaneMask);
1131                 state.routine = routine;
1132
1133                 // Emit everything up to the first label
1134                 // TODO: Separate out dispatch of block from non-block instructions?
1135                 for (auto insn : *this)
1136                 {
1137                         if (insn.opcode() == spv::OpLabel)
1138                         {
1139                                 break;
1140                         }
1141                         EmitInstruction(insn, &state);
1142                 }
1143
1144                 // Emit all the blocks in BFS order, starting with the main block.
1145                 std::queue<Block::ID> pending;
1146                 pending.push(mainBlockId);
1147                 while (pending.size() > 0)
1148                 {
1149                         auto id = pending.front();
1150                         pending.pop();
1151                         if (state.visited.count(id) == 0)
1152                         {
1153                                 EmitBlock(id, &state);
1154                                 for (auto it : getBlock(id).outs)
1155                                 {
1156                                         pending.push(it);
1157                                 }
1158                         }
1159                 }
1160         }
1161
1162         void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1163         {
1164                 if (state->visited.count(id) > 0)
1165                 {
1166                         return; // Already processed this block.
1167                 }
1168
1169                 state->visited.emplace(id);
1170
1171                 auto &block = getBlock(id);
1172
1173                 switch (block.kind)
1174                 {
1175                         case Block::Simple:
1176                                 if (id != mainBlockId)
1177                                 {
1178                                         // Emit all preceeding blocks and set the activeLaneMask.
1179                                         Intermediate activeLaneMask(1);
1180                                         activeLaneMask.move(0, SIMD::Int(0));
1181                                         for (auto in : block.ins)
1182                                         {
1183                                                 EmitBlock(in, state);
1184                                                 auto inMask = state->getActiveLaneMaskEdge(in, id);
1185                                                 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1186                                         }
1187                                         state->setActiveLaneMask(activeLaneMask.Int(0));
1188                                 }
1189                                 state->currentBlock = id;
1190                                 EmitInstructions(block.begin(), block.end(), state);
1191                                 break;
1192
1193                         default:
1194                                 UNIMPLEMENTED("Unhandled Block Kind: %d", int(block.kind));
1195                 }
1196         }
1197
1198         void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1199         {
1200                 for (auto insn = begin; insn != end; insn++)
1201                 {
1202                         auto res = EmitInstruction(insn, state);
1203                         switch (res)
1204                         {
1205                         case EmitResult::Continue:
1206                                 continue;
1207                         case EmitResult::Terminator:
1208                                 break;
1209                         default:
1210                                 UNREACHABLE("Unexpected EmitResult %d", int(res));
1211                                 break;
1212                         }
1213                 }
1214         }
1215
1216         SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1217         {
1218                 switch (insn.opcode())
1219                 {
1220                 case spv::OpTypeVoid:
1221                 case spv::OpTypeInt:
1222                 case spv::OpTypeFloat:
1223                 case spv::OpTypeBool:
1224                 case spv::OpTypeVector:
1225                 case spv::OpTypeArray:
1226                 case spv::OpTypeRuntimeArray:
1227                 case spv::OpTypeMatrix:
1228                 case spv::OpTypeStruct:
1229                 case spv::OpTypePointer:
1230                 case spv::OpTypeFunction:
1231                 case spv::OpExecutionMode:
1232                 case spv::OpMemoryModel:
1233                 case spv::OpFunction:
1234                 case spv::OpFunctionEnd:
1235                 case spv::OpConstant:
1236                 case spv::OpConstantNull:
1237                 case spv::OpConstantTrue:
1238                 case spv::OpConstantFalse:
1239                 case spv::OpConstantComposite:
1240                 case spv::OpUndef:
1241                 case spv::OpExtension:
1242                 case spv::OpCapability:
1243                 case spv::OpEntryPoint:
1244                 case spv::OpExtInstImport:
1245                 case spv::OpDecorate:
1246                 case spv::OpMemberDecorate:
1247                 case spv::OpGroupDecorate:
1248                 case spv::OpGroupMemberDecorate:
1249                 case spv::OpDecorationGroup:
1250                 case spv::OpName:
1251                 case spv::OpMemberName:
1252                 case spv::OpSource:
1253                 case spv::OpSourceContinued:
1254                 case spv::OpSourceExtension:
1255                 case spv::OpLine:
1256                 case spv::OpNoLine:
1257                 case spv::OpModuleProcessed:
1258                 case spv::OpString:
1259                         // Nothing to do at emit time. These are either fully handled at analysis time,
1260                         // or don't require any work at all.
1261                         return EmitResult::Continue;
1262
1263                 case spv::OpLabel:
1264                 case spv::OpReturn:
1265                         // TODO: when we do control flow, will need to do some work here.
1266                         // Until then, there is nothing to do -- we expect there to be an initial OpLabel
1267                         // in the entrypoint function, for which we do nothing; and a final OpReturn at the
1268                         // end of the entrypoint function, for which we do nothing.
1269                         return EmitResult::Continue;
1270
1271                 case spv::OpVariable:
1272                         return EmitVariable(insn, state);
1273
1274                 case spv::OpLoad:
1275                 case spv::OpAtomicLoad:
1276                         return EmitLoad(insn, state);
1277
1278                 case spv::OpStore:
1279                 case spv::OpAtomicStore:
1280                         return EmitStore(insn, state);
1281
1282                 case spv::OpAccessChain:
1283                 case spv::OpInBoundsAccessChain:
1284                         return EmitAccessChain(insn, state);
1285
1286                 case spv::OpCompositeConstruct:
1287                         return EmitCompositeConstruct(insn, state);
1288
1289                 case spv::OpCompositeInsert:
1290                         return EmitCompositeInsert(insn, state);
1291
1292                 case spv::OpCompositeExtract:
1293                         return EmitCompositeExtract(insn, state);
1294
1295                 case spv::OpVectorShuffle:
1296                         return EmitVectorShuffle(insn, state);
1297
1298                 case spv::OpVectorExtractDynamic:
1299                         return EmitVectorExtractDynamic(insn, state);
1300
1301                 case spv::OpVectorInsertDynamic:
1302                         return EmitVectorInsertDynamic(insn, state);
1303
1304                 case spv::OpVectorTimesScalar:
1305                         return EmitVectorTimesScalar(insn, state);
1306
1307                 case spv::OpNot:
1308                 case spv::OpSNegate:
1309                 case spv::OpFNegate:
1310                 case spv::OpLogicalNot:
1311                 case spv::OpConvertFToU:
1312                 case spv::OpConvertFToS:
1313                 case spv::OpConvertSToF:
1314                 case spv::OpConvertUToF:
1315                 case spv::OpBitcast:
1316                 case spv::OpIsInf:
1317                 case spv::OpIsNan:
1318                 case spv::OpDPdx:
1319                 case spv::OpDPdxCoarse:
1320                 case spv::OpDPdy:
1321                 case spv::OpDPdyCoarse:
1322                 case spv::OpFwidth:
1323                 case spv::OpFwidthCoarse:
1324                 case spv::OpDPdxFine:
1325                 case spv::OpDPdyFine:
1326                 case spv::OpFwidthFine:
1327                         return EmitUnaryOp(insn, state);
1328
1329                 case spv::OpIAdd:
1330                 case spv::OpISub:
1331                 case spv::OpIMul:
1332                 case spv::OpSDiv:
1333                 case spv::OpUDiv:
1334                 case spv::OpFAdd:
1335                 case spv::OpFSub:
1336                 case spv::OpFMul:
1337                 case spv::OpFDiv:
1338                 case spv::OpFMod:
1339                 case spv::OpFRem:
1340                 case spv::OpFOrdEqual:
1341                 case spv::OpFUnordEqual:
1342                 case spv::OpFOrdNotEqual:
1343                 case spv::OpFUnordNotEqual:
1344                 case spv::OpFOrdLessThan:
1345                 case spv::OpFUnordLessThan:
1346                 case spv::OpFOrdGreaterThan:
1347                 case spv::OpFUnordGreaterThan:
1348                 case spv::OpFOrdLessThanEqual:
1349                 case spv::OpFUnordLessThanEqual:
1350                 case spv::OpFOrdGreaterThanEqual:
1351                 case spv::OpFUnordGreaterThanEqual:
1352                 case spv::OpSMod:
1353                 case spv::OpSRem:
1354                 case spv::OpUMod:
1355                 case spv::OpIEqual:
1356                 case spv::OpINotEqual:
1357                 case spv::OpUGreaterThan:
1358                 case spv::OpSGreaterThan:
1359                 case spv::OpUGreaterThanEqual:
1360                 case spv::OpSGreaterThanEqual:
1361                 case spv::OpULessThan:
1362                 case spv::OpSLessThan:
1363                 case spv::OpULessThanEqual:
1364                 case spv::OpSLessThanEqual:
1365                 case spv::OpShiftRightLogical:
1366                 case spv::OpShiftRightArithmetic:
1367                 case spv::OpShiftLeftLogical:
1368                 case spv::OpBitwiseOr:
1369                 case spv::OpBitwiseXor:
1370                 case spv::OpBitwiseAnd:
1371                 case spv::OpLogicalOr:
1372                 case spv::OpLogicalAnd:
1373                 case spv::OpLogicalEqual:
1374                 case spv::OpLogicalNotEqual:
1375                 case spv::OpUMulExtended:
1376                 case spv::OpSMulExtended:
1377                         return EmitBinaryOp(insn, state);
1378
1379                 case spv::OpDot:
1380                         return EmitDot(insn, state);
1381
1382                 case spv::OpSelect:
1383                         return EmitSelect(insn, state);
1384
1385                 case spv::OpExtInst:
1386                         return EmitExtendedInstruction(insn, state);
1387
1388                 case spv::OpAny:
1389                         return EmitAny(insn, state);
1390
1391                 case spv::OpAll:
1392                         return EmitAll(insn, state);
1393
1394                 case spv::OpBranch:
1395                         return EmitBranch(insn, state);
1396
1397                 default:
1398                         UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1399                         break;
1400                 }
1401
1402                 return EmitResult::Continue;
1403         }
1404
1405         SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1406         {
1407                 auto routine = state->routine;
1408                 Object::ID resultId = insn.word(2);
1409                 auto &object = getObject(resultId);
1410                 auto &objectTy = getType(object.type);
1411                 switch (objectTy.storageClass)
1412                 {
1413                 case spv::StorageClassInput:
1414                 {
1415                         if (object.kind == Object::Kind::InterfaceVariable)
1416                         {
1417                                 auto &dst = routine->getValue(resultId);
1418                                 int offset = 0;
1419                                 VisitInterface(resultId,
1420                                                                 [&](Decorations const &d, AttribType type) {
1421                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1422                                                                         dst[offset++] = routine->inputs[scalarSlot];
1423                                                                 });
1424                         }
1425                         break;
1426                 }
1427                 case spv::StorageClassUniform:
1428                 case spv::StorageClassStorageBuffer:
1429                 {
1430                         Decorations d{};
1431                         ApplyDecorationsForId(&d, resultId);
1432                         ASSERT(d.DescriptorSet >= 0);
1433                         ASSERT(d.Binding >= 0);
1434
1435                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1436
1437                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1438                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1439                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1440                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1441                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1442                         Pointer<Byte> address = data + offset;
1443                         routine->physicalPointers[resultId] = address;
1444                         break;
1445                 }
1446                 case spv::StorageClassPushConstant:
1447                 {
1448                         routine->physicalPointers[resultId] = routine->pushConstants;
1449                         break;
1450                 }
1451                 default:
1452                         break;
1453                 }
1454
1455                 return EmitResult::Continue;
1456         }
1457
1458         SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1459         {
1460                 auto routine = state->routine;
1461                 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1462                 Object::ID resultId = insn.word(2);
1463                 Object::ID pointerId = insn.word(3);
1464                 auto &result = getObject(resultId);
1465                 auto &resultTy = getType(result.type);
1466                 auto &pointer = getObject(pointerId);
1467                 auto &pointerBase = getObject(pointer.pointerBase);
1468                 auto &pointerBaseTy = getType(pointerBase.type);
1469                 std::memory_order memoryOrder = std::memory_order_relaxed;
1470
1471                 if(atomic)
1472                 {
1473                         Object::ID semanticsId = insn.word(5);
1474                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1475                         memoryOrder = MemoryOrder(memorySemantics);
1476                 }
1477
1478                 ASSERT(getType(pointer.type).element == result.type);
1479                 ASSERT(Type::ID(insn.word(1)) == result.type);
1480                 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1481
1482                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1483                 {
1484                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1485                 }
1486
1487                 Pointer<Float> ptrBase;
1488                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1489                 {
1490                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1491                 }
1492                 else
1493                 {
1494                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1495                 }
1496
1497                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1498                 auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
1499
1500                 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1501
1502                 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1503                 {
1504                         // Divergent offsets or masked lanes.
1505                         auto offsets = pointer.kind == Object::Kind::Value ?
1506                                         As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1507                                         RValue<SIMD::Int>(SIMD::Int(0));
1508                         for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1509                         {
1510                                 // i wish i had a Float,Float,Float,Float constructor here..
1511                                 for (int j = 0; j < SIMD::Width; j++)
1512                                 {
1513                                         If(Extract(state->activeLaneMask(), j) != 0)
1514                                         {
1515                                                 Int offset = Int(i) + Extract(offsets, j);
1516                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1517                                                 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1518                                         }
1519                                 }
1520                         }
1521                 }
1522                 Else
1523                 {
1524                         // No divergent offsets or masked lanes.
1525                         if (interleavedByLane)
1526                         {
1527                                 // Lane-interleaved data.
1528                                 Pointer<SIMD::Float> src = ptrBase;
1529                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1530                                 {
1531                                         load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1532                                 }
1533                         }
1534                         else
1535                         {
1536                                 // Non-interleaved data.
1537                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1538                                 {
1539                                         load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
1540                                 }
1541                         }
1542                 }
1543
1544                 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1545                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1546                 {
1547                         dst.move(i, load[i]);
1548                 }
1549
1550                 return EmitResult::Continue;
1551         }
1552
1553         SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1554         {
1555                 auto routine = state->routine;
1556                 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1557                 Object::ID pointerId = insn.word(1);
1558                 Object::ID objectId = insn.word(atomic ? 4 : 2);
1559                 auto &object = getObject(objectId);
1560                 auto &pointer = getObject(pointerId);
1561                 auto &pointerTy = getType(pointer.type);
1562                 auto &elementTy = getType(pointerTy.element);
1563                 auto &pointerBase = getObject(pointer.pointerBase);
1564                 auto &pointerBaseTy = getType(pointerBase.type);
1565                 std::memory_order memoryOrder = std::memory_order_relaxed;
1566
1567                 if(atomic)
1568                 {
1569                         Object::ID semanticsId = insn.word(3);
1570                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1571                         memoryOrder = MemoryOrder(memorySemantics);
1572                 }
1573
1574                 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1575
1576                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1577                 {
1578                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1579                 }
1580
1581                 Pointer<Float> ptrBase;
1582                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1583                 {
1584                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1585                 }
1586                 else
1587                 {
1588                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1589                 }
1590
1591                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1592                 auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
1593
1594                 if (object.kind == Object::Kind::Constant)
1595                 {
1596                         // Constant source data.
1597                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1598                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1599                         {
1600                                 // Divergent offsets or masked lanes.
1601                                 auto offsets = pointer.kind == Object::Kind::Value ?
1602                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1603                                                 RValue<SIMD::Int>(SIMD::Int(0));
1604                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1605                                 {
1606                                         for (int j = 0; j < SIMD::Width; j++)
1607                                         {
1608                                                 If(Extract(state->activeLaneMask(), j) != 0)
1609                                                 {
1610                                                         Int offset = Int(i) + Extract(offsets, j);
1611                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1612                                                         Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1613                                                 }
1614                                         }
1615                                 }
1616                         }
1617                         Else
1618                         {
1619                                 // Constant source data.
1620                                 // No divergent offsets or masked lanes.
1621                                 Pointer<SIMD::Float> dst = ptrBase;
1622                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1623                                 {
1624                                         Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1625                                 }
1626                         }
1627                 }
1628                 else
1629                 {
1630                         // Intermediate source data.
1631                         auto &src = routine->getIntermediate(objectId);
1632                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1633                         {
1634                                 // Divergent offsets or masked lanes.
1635                                 auto offsets = pointer.kind == Object::Kind::Value ?
1636                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1637                                                 RValue<SIMD::Int>(SIMD::Int(0));
1638                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1639                                 {
1640                                         for (int j = 0; j < SIMD::Width; j++)
1641                                         {
1642                                                 If(Extract(state->activeLaneMask(), j) != 0)
1643                                                 {
1644                                                         Int offset = Int(i) + Extract(offsets, j);
1645                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1646                                                         Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1647                                                 }
1648                                         }
1649                                 }
1650                         }
1651                         Else
1652                         {
1653                                 // No divergent offsets or masked lanes.
1654                                 if (interleavedByLane)
1655                                 {
1656                                         // Lane-interleaved data.
1657                                         Pointer<SIMD::Float> dst = ptrBase;
1658                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1659                                         {
1660                                                 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1661                                         }
1662                                 }
1663                                 else
1664                                 {
1665                                         // Intermediate source data. Non-interleaved data.
1666                                         Pointer<SIMD::Float> dst = ptrBase;
1667                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1668                                         {
1669                                                 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1670                                         }
1671                                 }
1672                         }
1673                 }
1674
1675                 return EmitResult::Continue;
1676         }
1677
1678         SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1679         {
1680                 auto routine = state->routine;
1681                 Type::ID typeId = insn.word(1);
1682                 Object::ID resultId = insn.word(2);
1683                 Object::ID baseId = insn.word(3);
1684                 uint32_t numIndexes = insn.wordCount() - 4;
1685                 const uint32_t *indexes = insn.wordPointer(4);
1686                 auto &type = getType(typeId);
1687                 ASSERT(type.sizeInComponents == 1);
1688                 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1689
1690                 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1691
1692                 if(type.storageClass == spv::StorageClassPushConstant ||
1693                    type.storageClass == spv::StorageClassUniform ||
1694                    type.storageClass == spv::StorageClassStorageBuffer)
1695                 {
1696                         dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1697                 }
1698                 else
1699                 {
1700                         dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1701                 }
1702
1703                 return EmitResult::Continue;
1704         }
1705
1706         SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1707         {
1708                 auto routine = state->routine;
1709                 auto &type = getType(insn.word(1));
1710                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1711                 auto offset = 0u;
1712
1713                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1714                 {
1715                         Object::ID srcObjectId = insn.word(3u + i);
1716                         auto & srcObject = getObject(srcObjectId);
1717                         auto & srcObjectTy = getType(srcObject.type);
1718                         GenericValue srcObjectAccess(this, routine, srcObjectId);
1719
1720                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1721                         {
1722                                 dst.move(offset++, srcObjectAccess.Float(j));
1723                         }
1724                 }
1725
1726                 return EmitResult::Continue;
1727         }
1728
1729         SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1730         {
1731                 auto routine = state->routine;
1732                 Type::ID resultTypeId = insn.word(1);
1733                 auto &type = getType(resultTypeId);
1734                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1735                 auto &newPartObject = getObject(insn.word(3));
1736                 auto &newPartObjectTy = getType(newPartObject.type);
1737                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1738
1739                 GenericValue srcObjectAccess(this, routine, insn.word(4));
1740                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1741
1742                 // old components before
1743                 for (auto i = 0u; i < firstNewComponent; i++)
1744                 {
1745                         dst.move(i, srcObjectAccess.Float(i));
1746                 }
1747                 // new part
1748                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1749                 {
1750                         dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1751                 }
1752                 // old components after
1753                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1754                 {
1755                         dst.move(i, srcObjectAccess.Float(i));
1756                 }
1757
1758                 return EmitResult::Continue;
1759         }
1760
1761         SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1762         {
1763                 auto routine = state->routine;
1764                 auto &type = getType(insn.word(1));
1765                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1766                 auto &compositeObject = getObject(insn.word(3));
1767                 Type::ID compositeTypeId = compositeObject.definition.word(1);
1768                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1769
1770                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1771                 for (auto i = 0u; i < type.sizeInComponents; i++)
1772                 {
1773                         dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1774                 }
1775
1776                 return EmitResult::Continue;
1777         }
1778
1779         SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1780         {
1781                 auto routine = state->routine;
1782                 auto &type = getType(insn.word(1));
1783                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1784
1785                 // Note: number of components in result type, first half type, and second
1786                 // half type are all independent.
1787                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1788
1789                 GenericValue firstHalfAccess(this, routine, insn.word(3));
1790                 GenericValue secondHalfAccess(this, routine, insn.word(4));
1791
1792                 for (auto i = 0u; i < type.sizeInComponents; i++)
1793                 {
1794                         auto selector = insn.word(5 + i);
1795                         if (selector == static_cast<uint32_t>(-1))
1796                         {
1797                                 // Undefined value. Until we decide to do real undef values, zero is as good
1798                                 // a value as any
1799                                 dst.move(i, RValue<SIMD::Float>(0.0f));
1800                         }
1801                         else if (selector < firstHalfType.sizeInComponents)
1802                         {
1803                                 dst.move(i, firstHalfAccess.Float(selector));
1804                         }
1805                         else
1806                         {
1807                                 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
1808                         }
1809                 }
1810
1811                 return EmitResult::Continue;
1812         }
1813
1814         SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
1815         {
1816                 auto routine = state->routine;
1817                 auto &type = getType(insn.word(1));
1818                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1819                 auto &srcType = getType(getObject(insn.word(3)).type);
1820
1821                 GenericValue src(this, routine, insn.word(3));
1822                 GenericValue index(this, routine, insn.word(4));
1823
1824                 SIMD::UInt v = SIMD::UInt(0);
1825
1826                 for (auto i = 0u; i < srcType.sizeInComponents; i++)
1827                 {
1828                         v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
1829                 }
1830
1831                 dst.move(0, v);
1832                 return EmitResult::Continue;
1833         }
1834
1835         SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
1836         {
1837                 auto routine = state->routine;
1838                 auto &type = getType(insn.word(1));
1839                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1840
1841                 GenericValue src(this, routine, insn.word(3));
1842                 GenericValue component(this, routine, insn.word(4));
1843                 GenericValue index(this, routine, insn.word(5));
1844
1845                 for (auto i = 0u; i < type.sizeInComponents; i++)
1846                 {
1847                         SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
1848                         dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
1849                 }
1850                 return EmitResult::Continue;
1851         }
1852
1853         SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
1854         {
1855                 auto routine = state->routine;
1856                 auto &type = getType(insn.word(1));
1857                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1858                 auto lhs = GenericValue(this, routine, insn.word(3));
1859                 auto rhs = GenericValue(this, routine, insn.word(4));
1860
1861                 for (auto i = 0u; i < type.sizeInComponents; i++)
1862                 {
1863                         dst.move(i, lhs.Float(i) * rhs.Float(0));
1864                 }
1865
1866                 return EmitResult::Continue;
1867         }
1868
1869         SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
1870         {
1871                 auto routine = state->routine;
1872                 auto &type = getType(insn.word(1));
1873                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1874                 auto src = GenericValue(this, routine, insn.word(3));
1875
1876                 for (auto i = 0u; i < type.sizeInComponents; i++)
1877                 {
1878                         switch (insn.opcode())
1879                         {
1880                         case spv::OpNot:
1881                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
1882                                 dst.move(i, ~src.UInt(i));
1883                                 break;
1884                         case spv::OpSNegate:
1885                                 dst.move(i, -src.Int(i));
1886                                 break;
1887                         case spv::OpFNegate:
1888                                 dst.move(i, -src.Float(i));
1889                                 break;
1890                         case spv::OpConvertFToU:
1891                                 dst.move(i, SIMD::UInt(src.Float(i)));
1892                                 break;
1893                         case spv::OpConvertFToS:
1894                                 dst.move(i, SIMD::Int(src.Float(i)));
1895                                 break;
1896                         case spv::OpConvertSToF:
1897                                 dst.move(i, SIMD::Float(src.Int(i)));
1898                                 break;
1899                         case spv::OpConvertUToF:
1900                                 dst.move(i, SIMD::Float(src.UInt(i)));
1901                                 break;
1902                         case spv::OpBitcast:
1903                                 dst.move(i, src.Float(i));
1904                                 break;
1905                         case spv::OpIsInf:
1906                                 dst.move(i, IsInf(src.Float(i)));
1907                                 break;
1908                         case spv::OpIsNan:
1909                                 dst.move(i, IsNan(src.Float(i)));
1910                                 break;
1911                         case spv::OpDPdx:
1912                         case spv::OpDPdxCoarse:
1913                                 // Derivative instructions: FS invocations are laid out like so:
1914                                 //    0 1
1915                                 //    2 3
1916                                 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
1917                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
1918                                 break;
1919                         case spv::OpDPdy:
1920                         case spv::OpDPdyCoarse:
1921                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
1922                                 break;
1923                         case spv::OpFwidth:
1924                         case spv::OpFwidthCoarse:
1925                                 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
1926                                                         + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
1927                                 break;
1928                         case spv::OpDPdxFine:
1929                         {
1930                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1931                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1932                                 SIMD::Float v = SIMD::Float(firstRow);
1933                                 v = Insert(v, secondRow, 2);
1934                                 v = Insert(v, secondRow, 3);
1935                                 dst.move(i, v);
1936                                 break;
1937                         }
1938                         case spv::OpDPdyFine:
1939                         {
1940                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1941                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1942                                 SIMD::Float v = SIMD::Float(firstColumn);
1943                                 v = Insert(v, secondColumn, 1);
1944                                 v = Insert(v, secondColumn, 3);
1945                                 dst.move(i, v);
1946                                 break;
1947                         }
1948                         case spv::OpFwidthFine:
1949                         {
1950                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1951                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1952                                 SIMD::Float dpdx = SIMD::Float(firstRow);
1953                                 dpdx = Insert(dpdx, secondRow, 2);
1954                                 dpdx = Insert(dpdx, secondRow, 3);
1955                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1956                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1957                                 SIMD::Float dpdy = SIMD::Float(firstColumn);
1958                                 dpdy = Insert(dpdy, secondColumn, 1);
1959                                 dpdy = Insert(dpdy, secondColumn, 3);
1960                                 dst.move(i, Abs(dpdx) + Abs(dpdy));
1961                                 break;
1962                         }
1963                         default:
1964                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
1965                         }
1966                 }
1967
1968                 return EmitResult::Continue;
1969         }
1970
1971         SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
1972         {
1973                 auto routine = state->routine;
1974                 auto &type = getType(insn.word(1));
1975                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1976                 auto &lhsType = getType(getObject(insn.word(3)).type);
1977                 auto lhs = GenericValue(this, routine, insn.word(3));
1978                 auto rhs = GenericValue(this, routine, insn.word(4));
1979
1980                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
1981                 {
1982                         switch (insn.opcode())
1983                         {
1984                         case spv::OpIAdd:
1985                                 dst.move(i, lhs.Int(i) + rhs.Int(i));
1986                                 break;
1987                         case spv::OpISub:
1988                                 dst.move(i, lhs.Int(i) - rhs.Int(i));
1989                                 break;
1990                         case spv::OpIMul:
1991                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
1992                                 break;
1993                         case spv::OpSDiv:
1994                         {
1995                                 SIMD::Int a = lhs.Int(i);
1996                                 SIMD::Int b = rhs.Int(i);
1997                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
1998                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
1999                                 dst.move(i, a / b);
2000                                 break;
2001                         }
2002                         case spv::OpUDiv:
2003                         {
2004                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2005                                 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2006                                 break;
2007                         }
2008                         case spv::OpSRem:
2009                         {
2010                                 SIMD::Int a = lhs.Int(i);
2011                                 SIMD::Int b = rhs.Int(i);
2012                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2013                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2014                                 dst.move(i, a % b);
2015                                 break;
2016                         }
2017                         case spv::OpSMod:
2018                         {
2019                                 SIMD::Int a = lhs.Int(i);
2020                                 SIMD::Int b = rhs.Int(i);
2021                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2022                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2023                                 auto mod = a % b;
2024                                 // If a and b have opposite signs, the remainder operation takes
2025                                 // the sign from a but OpSMod is supposed to take the sign of b.
2026                                 // Adding b will ensure that the result has the correct sign and
2027                                 // that it is still congruent to a modulo b.
2028                                 //
2029                                 // See also http://mathforum.org/library/drmath/view/52343.html
2030                                 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2031                                 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2032                                 dst.move(i, As<SIMD::Float>(fixedMod));
2033                                 break;
2034                         }
2035                         case spv::OpUMod:
2036                         {
2037                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2038                                 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2039                                 break;
2040                         }
2041                         case spv::OpIEqual:
2042                         case spv::OpLogicalEqual:
2043                                 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2044                                 break;
2045                         case spv::OpINotEqual:
2046                         case spv::OpLogicalNotEqual:
2047                                 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2048                                 break;
2049                         case spv::OpUGreaterThan:
2050                                 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2051                                 break;
2052                         case spv::OpSGreaterThan:
2053                                 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2054                                 break;
2055                         case spv::OpUGreaterThanEqual:
2056                                 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2057                                 break;
2058                         case spv::OpSGreaterThanEqual:
2059                                 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2060                                 break;
2061                         case spv::OpULessThan:
2062                                 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2063                                 break;
2064                         case spv::OpSLessThan:
2065                                 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2066                                 break;
2067                         case spv::OpULessThanEqual:
2068                                 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2069                                 break;
2070                         case spv::OpSLessThanEqual:
2071                                 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2072                                 break;
2073                         case spv::OpFAdd:
2074                                 dst.move(i, lhs.Float(i) + rhs.Float(i));
2075                                 break;
2076                         case spv::OpFSub:
2077                                 dst.move(i, lhs.Float(i) - rhs.Float(i));
2078                                 break;
2079                         case spv::OpFMul:
2080                                 dst.move(i, lhs.Float(i) * rhs.Float(i));
2081                                 break;
2082                         case spv::OpFDiv:
2083                                 dst.move(i, lhs.Float(i) / rhs.Float(i));
2084                                 break;
2085                         case spv::OpFMod:
2086                                 // TODO(b/126873455): inaccurate for values greater than 2^24
2087                                 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2088                                 break;
2089                         case spv::OpFRem:
2090                                 dst.move(i, lhs.Float(i) % rhs.Float(i));
2091                                 break;
2092                         case spv::OpFOrdEqual:
2093                                 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2094                                 break;
2095                         case spv::OpFUnordEqual:
2096                                 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2097                                 break;
2098                         case spv::OpFOrdNotEqual:
2099                                 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2100                                 break;
2101                         case spv::OpFUnordNotEqual:
2102                                 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2103                                 break;
2104                         case spv::OpFOrdLessThan:
2105                                 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2106                                 break;
2107                         case spv::OpFUnordLessThan:
2108                                 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2109                                 break;
2110                         case spv::OpFOrdGreaterThan:
2111                                 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2112                                 break;
2113                         case spv::OpFUnordGreaterThan:
2114                                 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2115                                 break;
2116                         case spv::OpFOrdLessThanEqual:
2117                                 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2118                                 break;
2119                         case spv::OpFUnordLessThanEqual:
2120                                 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2121                                 break;
2122                         case spv::OpFOrdGreaterThanEqual:
2123                                 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2124                                 break;
2125                         case spv::OpFUnordGreaterThanEqual:
2126                                 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2127                                 break;
2128                         case spv::OpShiftRightLogical:
2129                                 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2130                                 break;
2131                         case spv::OpShiftRightArithmetic:
2132                                 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2133                                 break;
2134                         case spv::OpShiftLeftLogical:
2135                                 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2136                                 break;
2137                         case spv::OpBitwiseOr:
2138                         case spv::OpLogicalOr:
2139                                 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2140                                 break;
2141                         case spv::OpBitwiseXor:
2142                                 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2143                                 break;
2144                         case spv::OpBitwiseAnd:
2145                         case spv::OpLogicalAnd:
2146                                 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2147                                 break;
2148                         case spv::OpSMulExtended:
2149                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2150                                 // In our flat view then, component i is the i'th component of the first member;
2151                                 // component i + N is the i'th component of the second member.
2152                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2153                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2154                                 break;
2155                         case spv::OpUMulExtended:
2156                                 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2157                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2158                                 break;
2159                         default:
2160                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2161                         }
2162                 }
2163
2164                 return EmitResult::Continue;
2165         }
2166
2167         SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2168         {
2169                 auto routine = state->routine;
2170                 auto &type = getType(insn.word(1));
2171                 ASSERT(type.sizeInComponents == 1);
2172                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2173                 auto &lhsType = getType(getObject(insn.word(3)).type);
2174                 auto lhs = GenericValue(this, routine, insn.word(3));
2175                 auto rhs = GenericValue(this, routine, insn.word(4));
2176
2177                 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2178                 return EmitResult::Continue;
2179         }
2180
2181         SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2182         {
2183                 auto routine = state->routine;
2184                 auto &type = getType(insn.word(1));
2185                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2186                 auto cond = GenericValue(this, routine, insn.word(3));
2187                 auto lhs = GenericValue(this, routine, insn.word(4));
2188                 auto rhs = GenericValue(this, routine, insn.word(5));
2189
2190                 for (auto i = 0u; i < type.sizeInComponents; i++)
2191                 {
2192                         dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i)));   // FIXME: IfThenElse()
2193                 }
2194
2195                 return EmitResult::Continue;
2196         }
2197
2198         SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2199         {
2200                 auto routine = state->routine;
2201                 auto &type = getType(insn.word(1));
2202                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2203                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2204
2205                 switch (extInstIndex)
2206                 {
2207                 case GLSLstd450FAbs:
2208                 {
2209                         auto src = GenericValue(this, routine, insn.word(5));
2210                         for (auto i = 0u; i < type.sizeInComponents; i++)
2211                         {
2212                                 dst.move(i, Abs(src.Float(i)));
2213                         }
2214                         break;
2215                 }
2216                 case GLSLstd450SAbs:
2217                 {
2218                         auto src = GenericValue(this, routine, insn.word(5));
2219                         for (auto i = 0u; i < type.sizeInComponents; i++)
2220                         {
2221                                 dst.move(i, Abs(src.Int(i)));
2222                         }
2223                         break;
2224                 }
2225                 case GLSLstd450Cross:
2226                 {
2227                         auto lhs = GenericValue(this, routine, insn.word(5));
2228                         auto rhs = GenericValue(this, routine, insn.word(6));
2229                         dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2230                         dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2231                         dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2232                         break;
2233                 }
2234                 case GLSLstd450Floor:
2235                 {
2236                         auto src = GenericValue(this, routine, insn.word(5));
2237                         for (auto i = 0u; i < type.sizeInComponents; i++)
2238                         {
2239                                 dst.move(i, Floor(src.Float(i)));
2240                         }
2241                         break;
2242                 }
2243                 case GLSLstd450Trunc:
2244                 {
2245                         auto src = GenericValue(this, routine, insn.word(5));
2246                         for (auto i = 0u; i < type.sizeInComponents; i++)
2247                         {
2248                                 dst.move(i, Trunc(src.Float(i)));
2249                         }
2250                         break;
2251                 }
2252                 case GLSLstd450Ceil:
2253                 {
2254                         auto src = GenericValue(this, routine, insn.word(5));
2255                         for (auto i = 0u; i < type.sizeInComponents; i++)
2256                         {
2257                                 dst.move(i, Ceil(src.Float(i)));
2258                         }
2259                         break;
2260                 }
2261                 case GLSLstd450Fract:
2262                 {
2263                         auto src = GenericValue(this, routine, insn.word(5));
2264                         for (auto i = 0u; i < type.sizeInComponents; i++)
2265                         {
2266                                 dst.move(i, Frac(src.Float(i)));
2267                         }
2268                         break;
2269                 }
2270                 case GLSLstd450Round:
2271                 {
2272                         auto src = GenericValue(this, routine, insn.word(5));
2273                         for (auto i = 0u; i < type.sizeInComponents; i++)
2274                         {
2275                                 dst.move(i, Round(src.Float(i)));
2276                         }
2277                         break;
2278                 }
2279                 case GLSLstd450RoundEven:
2280                 {
2281                         auto src = GenericValue(this, routine, insn.word(5));
2282                         for (auto i = 0u; i < type.sizeInComponents; i++)
2283                         {
2284                                 auto x = Round(src.Float(i));
2285                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2286                                 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2287                                                 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2288                         }
2289                         break;
2290                 }
2291                 case GLSLstd450FMin:
2292                 {
2293                         auto lhs = GenericValue(this, routine, insn.word(5));
2294                         auto rhs = GenericValue(this, routine, insn.word(6));
2295                         for (auto i = 0u; i < type.sizeInComponents; i++)
2296                         {
2297                                 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2298                         }
2299                         break;
2300                 }
2301                 case GLSLstd450FMax:
2302                 {
2303                         auto lhs = GenericValue(this, routine, insn.word(5));
2304                         auto rhs = GenericValue(this, routine, insn.word(6));
2305                         for (auto i = 0u; i < type.sizeInComponents; i++)
2306                         {
2307                                 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2308                         }
2309                         break;
2310                 }
2311                 case GLSLstd450SMin:
2312                 {
2313                         auto lhs = GenericValue(this, routine, insn.word(5));
2314                         auto rhs = GenericValue(this, routine, insn.word(6));
2315                         for (auto i = 0u; i < type.sizeInComponents; i++)
2316                         {
2317                                 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2318                         }
2319                         break;
2320                 }
2321                 case GLSLstd450SMax:
2322                 {
2323                         auto lhs = GenericValue(this, routine, insn.word(5));
2324                         auto rhs = GenericValue(this, routine, insn.word(6));
2325                         for (auto i = 0u; i < type.sizeInComponents; i++)
2326                         {
2327                                 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2328                         }
2329                         break;
2330                 }
2331                 case GLSLstd450UMin:
2332                 {
2333                         auto lhs = GenericValue(this, routine, insn.word(5));
2334                         auto rhs = GenericValue(this, routine, insn.word(6));
2335                         for (auto i = 0u; i < type.sizeInComponents; i++)
2336                         {
2337                                 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2338                         }
2339                         break;
2340                 }
2341                 case GLSLstd450UMax:
2342                 {
2343                         auto lhs = GenericValue(this, routine, insn.word(5));
2344                         auto rhs = GenericValue(this, routine, insn.word(6));
2345                         for (auto i = 0u; i < type.sizeInComponents; i++)
2346                         {
2347                                 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2348                         }
2349                         break;
2350                 }
2351                 case GLSLstd450Step:
2352                 {
2353                         auto edge = GenericValue(this, routine, insn.word(5));
2354                         auto x = GenericValue(this, routine, insn.word(6));
2355                         for (auto i = 0u; i < type.sizeInComponents; i++)
2356                         {
2357                                 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2358                         }
2359                         break;
2360                 }
2361                 case GLSLstd450SmoothStep:
2362                 {
2363                         auto edge0 = GenericValue(this, routine, insn.word(5));
2364                         auto edge1 = GenericValue(this, routine, insn.word(6));
2365                         auto x = GenericValue(this, routine, insn.word(7));
2366                         for (auto i = 0u; i < type.sizeInComponents; i++)
2367                         {
2368                                 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2369                                                 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2370                                 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2371                         }
2372                         break;
2373                 }
2374                 case GLSLstd450FMix:
2375                 {
2376                         auto x = GenericValue(this, routine, insn.word(5));
2377                         auto y = GenericValue(this, routine, insn.word(6));
2378                         auto a = GenericValue(this, routine, insn.word(7));
2379                         for (auto i = 0u; i < type.sizeInComponents; i++)
2380                         {
2381                                 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2382                         }
2383                         break;
2384                 }
2385                 case GLSLstd450FClamp:
2386                 {
2387                         auto x = GenericValue(this, routine, insn.word(5));
2388                         auto minVal = GenericValue(this, routine, insn.word(6));
2389                         auto maxVal = GenericValue(this, routine, insn.word(7));
2390                         for (auto i = 0u; i < type.sizeInComponents; i++)
2391                         {
2392                                 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2393                         }
2394                         break;
2395                 }
2396                 case GLSLstd450SClamp:
2397                 {
2398                         auto x = GenericValue(this, routine, insn.word(5));
2399                         auto minVal = GenericValue(this, routine, insn.word(6));
2400                         auto maxVal = GenericValue(this, routine, insn.word(7));
2401                         for (auto i = 0u; i < type.sizeInComponents; i++)
2402                         {
2403                                 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2404                         }
2405                         break;
2406                 }
2407                 case GLSLstd450UClamp:
2408                 {
2409                         auto x = GenericValue(this, routine, insn.word(5));
2410                         auto minVal = GenericValue(this, routine, insn.word(6));
2411                         auto maxVal = GenericValue(this, routine, insn.word(7));
2412                         for (auto i = 0u; i < type.sizeInComponents; i++)
2413                         {
2414                                 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2415                         }
2416                         break;
2417                 }
2418                 case GLSLstd450FSign:
2419                 {
2420                         auto src = GenericValue(this, routine, insn.word(5));
2421                         for (auto i = 0u; i < type.sizeInComponents; i++)
2422                         {
2423                                 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2424                                 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2425                                 dst.move(i, neg | pos);
2426                         }
2427                         break;
2428                 }
2429                 case GLSLstd450SSign:
2430                 {
2431                         auto src = GenericValue(this, routine, insn.word(5));
2432                         for (auto i = 0u; i < type.sizeInComponents; i++)
2433                         {
2434                                 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2435                                 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2436                                 dst.move(i, neg | pos);
2437                         }
2438                         break;
2439                 }
2440                 case GLSLstd450Reflect:
2441                 {
2442                         auto I = GenericValue(this, routine, insn.word(5));
2443                         auto N = GenericValue(this, routine, insn.word(6));
2444
2445                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2446
2447                         for (auto i = 0u; i < type.sizeInComponents; i++)
2448                         {
2449                                 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2450                         }
2451                         break;
2452                 }
2453                 case GLSLstd450Refract:
2454                 {
2455                         auto I = GenericValue(this, routine, insn.word(5));
2456                         auto N = GenericValue(this, routine, insn.word(6));
2457                         auto eta = GenericValue(this, routine, insn.word(7));
2458
2459                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2460                         SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2461                         SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2462                         SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2463
2464                         for (auto i = 0u; i < type.sizeInComponents; i++)
2465                         {
2466                                 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2467                         }
2468                         break;
2469                 }
2470                 case GLSLstd450FaceForward:
2471                 {
2472                         auto N = GenericValue(this, routine, insn.word(5));
2473                         auto I = GenericValue(this, routine, insn.word(6));
2474                         auto Nref = GenericValue(this, routine, insn.word(7));
2475
2476                         SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2477                         SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2478
2479                         for (auto i = 0u; i < type.sizeInComponents; i++)
2480                         {
2481                                 auto n = N.Float(i);
2482                                 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2483                         }
2484                         break;
2485                 }
2486                 case GLSLstd450Length:
2487                 {
2488                         auto x = GenericValue(this, routine, insn.word(5));
2489                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2490
2491                         dst.move(0, Sqrt(d));
2492                         break;
2493                 }
2494                 case GLSLstd450Normalize:
2495                 {
2496                         auto x = GenericValue(this, routine, insn.word(5));
2497                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2498                         SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2499
2500                         for (auto i = 0u; i < type.sizeInComponents; i++)
2501                         {
2502                                 dst.move(i, invLength * x.Float(i));
2503                         }
2504                         break;
2505                 }
2506                 case GLSLstd450Distance:
2507                 {
2508                         auto p0 = GenericValue(this, routine, insn.word(5));
2509                         auto p1 = GenericValue(this, routine, insn.word(6));
2510                         auto p0Type = getType(getObject(insn.word(5)).type);
2511
2512                         // sqrt(dot(p0-p1, p0-p1))
2513                         SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2514
2515                         for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2516                         {
2517                                 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2518                         }
2519
2520                         dst.move(0, Sqrt(d));
2521                         break;
2522                 }
2523                 default:
2524                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2525                 }
2526
2527                 return EmitResult::Continue;
2528         }
2529
2530         std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2531         {
2532                 switch(memorySemantics)
2533                 {
2534                 case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
2535                 case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
2536                 case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
2537                 case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
2538                 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2539                 default:
2540                         UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2541                         return std::memory_order_acq_rel;
2542                 }
2543         }
2544
2545         SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2546         {
2547                 SIMD::Float d = x.Float(0) * y.Float(0);
2548
2549                 for (auto i = 1u; i < numComponents; i++)
2550                 {
2551                         d += x.Float(i) * y.Float(i);
2552                 }
2553
2554                 return d;
2555         }
2556
2557         SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2558         {
2559                 auto routine = state->routine;
2560                 auto &type = getType(insn.word(1));
2561                 ASSERT(type.sizeInComponents == 1);
2562                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2563                 auto &srcType = getType(getObject(insn.word(3)).type);
2564                 auto src = GenericValue(this, routine, insn.word(3));
2565
2566                 SIMD::UInt result = src.UInt(0);
2567
2568                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2569                 {
2570                         result |= src.UInt(i);
2571                 }
2572
2573                 dst.move(0, result);
2574                 return EmitResult::Continue;
2575         }
2576
2577         SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2578         {
2579                 auto routine = state->routine;
2580                 auto &type = getType(insn.word(1));
2581                 ASSERT(type.sizeInComponents == 1);
2582                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2583                 auto &srcType = getType(getObject(insn.word(3)).type);
2584                 auto src = GenericValue(this, routine, insn.word(3));
2585
2586                 SIMD::UInt result = src.UInt(0);
2587
2588                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2589                 {
2590                         result &= src.UInt(i);
2591                 }
2592
2593                 dst.move(0, result);
2594                 return EmitResult::Continue;
2595         }
2596
2597         SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2598         {
2599                 auto target = Block::ID(insn.word(1));
2600                 auto edge = Block::Edge{state->currentBlock, target};
2601                 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2602                 return EmitResult::Terminator;
2603         }
2604
2605         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2606         {
2607                 for (auto insn : *this)
2608                 {
2609                         switch (insn.opcode())
2610                         {
2611                         case spv::OpVariable:
2612                         {
2613                                 Object::ID resultId = insn.word(2);
2614                                 auto &object = getObject(resultId);
2615                                 auto &objectTy = getType(object.type);
2616                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2617                                 {
2618                                         auto &dst = routine->getValue(resultId);
2619                                         int offset = 0;
2620                                         VisitInterface(resultId,
2621                                                                    [&](Decorations const &d, AttribType type) {
2622                                                                            auto scalarSlot = d.Location << 2 | d.Component;
2623                                                                            routine->outputs[scalarSlot] = dst[offset++];
2624                                                                    });
2625                                 }
2626                                 break;
2627                         }
2628                         default:
2629                                 break;
2630                         }
2631                 }
2632         }
2633
2634         SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
2635         {
2636                 // Default to a Simple, this may change later.
2637                 kind = Block::Simple;
2638
2639                 // Walk the instructions to find the last two of the block.
2640                 InsnIterator insns[2];
2641                 for (auto insn : *this)
2642                 {
2643                         insns[0] = insns[1];
2644                         insns[1] = insn;
2645                 }
2646
2647                 switch (insns[1].opcode())
2648                 {
2649                         case spv::OpBranch:
2650                                 branchInstruction = insns[1];
2651                                 outs.emplace(Block::ID(branchInstruction.word(1)));
2652
2653                                 switch (insns[0].opcode())
2654                                 {
2655                                         case spv::OpLoopMerge:
2656                                                 kind = Loop;
2657                                                 mergeInstruction = insns[0];
2658                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2659                                                 continueTarget = Block::ID(mergeInstruction.word(2));
2660                                                 break;
2661
2662                                         default:
2663                                                 kind = Block::Simple;
2664                                                 break;
2665                                 }
2666                                 break;
2667
2668                         case spv::OpBranchConditional:
2669                                 branchInstruction = insns[1];
2670                                 outs.emplace(Block::ID(branchInstruction.word(2)));
2671                                 outs.emplace(Block::ID(branchInstruction.word(3)));
2672
2673                                 switch (insns[0].opcode())
2674                                 {
2675                                         case spv::OpSelectionMerge:
2676                                                 kind = StructuredBranchConditional;
2677                                                 mergeInstruction = insns[0];
2678                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2679                                                 break;
2680
2681                                         case spv::OpLoopMerge:
2682                                                 kind = Loop;
2683                                                 mergeInstruction = insns[0];
2684                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2685                                                 continueTarget = Block::ID(mergeInstruction.word(2));
2686                                                 break;
2687
2688                                         default:
2689                                                 kind = UnstructuredBranchConditional;
2690                                                 break;
2691                                 }
2692                                 break;
2693
2694                         case spv::OpSwitch:
2695                                 branchInstruction = insns[1];
2696                                 outs.emplace(Block::ID(branchInstruction.word(2)));
2697                                 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
2698                                 {
2699                                         outs.emplace(Block::ID(branchInstruction.word(w)));
2700                                 }
2701
2702                                 switch (insns[0].opcode())
2703                                 {
2704                                         case spv::OpSelectionMerge:
2705                                                 kind = StructuredSwitch;
2706                                                 mergeInstruction = insns[0];
2707                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2708                                                 break;
2709
2710                                         default:
2711                                                 kind = UnstructuredSwitch;
2712                                                 break;
2713                                 }
2714                                 break;
2715
2716                         default:
2717                                 break;
2718                 }
2719         }
2720
2721         void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
2722         {
2723                 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
2724         }
2725
2726         void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
2727         {
2728                 auto edge = Block::Edge{from, to};
2729                 auto it = edgeActiveLaneMasks.find(edge);
2730                 if (it == edgeActiveLaneMasks.end())
2731                 {
2732                         edgeActiveLaneMasks.emplace(edge, mask);
2733                 }
2734                 else
2735                 {
2736                         auto combined = it->second | mask;
2737                         edgeActiveLaneMasks.erase(edge);
2738                         edgeActiveLaneMasks.emplace(edge, combined);
2739                 }
2740         }
2741
2742         RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
2743         {
2744                 auto edge = Block::Edge{from, to};
2745                 auto it = edgeActiveLaneMasks.find(edge);
2746                 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
2747                 return it->second;
2748         }
2749
2750         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
2751                 pipelineLayout(pipelineLayout)
2752         {
2753         }
2754
2755 }