OSDN Git Service

938351766e0334025245b2ee4ae1670afccfb9d8
[android-x86/external-swiftshader.git] / src / Pipeline / SpirvShader.cpp
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
23
24 #include <queue>
25
26 #ifdef Bool
27 #undef Bool // b/127920555
28 #endif
29
30 namespace
31 {
32         rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
33         {
34                 return rr::SignMask(ints) != 0;
35         }
36
37         rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
38         {
39                 return rr::SignMask(~ints) != 0;
40         }
41 }
42
43 namespace sw
44 {
45         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
46
47         SpirvShader::SpirvShader(InsnStore const &insns)
48                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
49                           outputs{MAX_INTERFACE_COMPONENTS},
50                           serialID{serialCounter++}, modes{}
51         {
52                 ASSERT(insns.size() > 0);
53
54                 // Simplifying assumptions (to be satisfied by earlier transformations)
55                 // - There is exactly one entrypoint in the module, and it's the one we want
56                 // - The only input/output OpVariables present are those used by the entrypoint
57
58                 Block::ID currentBlock;
59                 InsnIterator blockStart;
60
61                 for (auto insn : *this)
62                 {
63                         switch (insn.opcode())
64                         {
65                         case spv::OpExecutionMode:
66                                 ProcessExecutionMode(insn);
67                                 break;
68
69                         case spv::OpDecorate:
70                         {
71                                 TypeOrObjectID targetId = insn.word(1);
72                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
73                                 decorations[targetId].Apply(
74                                                 decoration,
75                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
76
77                                 if (decoration == spv::DecorationCentroid)
78                                         modes.NeedsCentroid = true;
79                                 break;
80                         }
81
82                         case spv::OpMemberDecorate:
83                         {
84                                 Type::ID targetId = insn.word(1);
85                                 auto memberIndex = insn.word(2);
86                                 auto &d = memberDecorations[targetId];
87                                 if (memberIndex >= d.size())
88                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
89                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
90                                 d[memberIndex].Apply(
91                                                 decoration,
92                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
93
94                                 if (decoration == spv::DecorationCentroid)
95                                         modes.NeedsCentroid = true;
96                                 break;
97                         }
98
99                         case spv::OpDecorationGroup:
100                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
101                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
102                                 // we might think about introducing this as a real Object.
103                                 break;
104
105                         case spv::OpGroupDecorate:
106                         {
107                                 auto const &srcDecorations = decorations[insn.word(1)];
108                                 for (auto i = 2u; i < insn.wordCount(); i++)
109                                 {
110                                         // remaining operands are targets to apply the group to.
111                                         decorations[insn.word(i)].Apply(srcDecorations);
112                                 }
113                                 break;
114                         }
115
116                         case spv::OpGroupMemberDecorate:
117                         {
118                                 auto const &srcDecorations = decorations[insn.word(1)];
119                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
120                                 {
121                                         // remaining operands are pairs of <id>, literal for members to apply to.
122                                         auto &d = memberDecorations[insn.word(i)];
123                                         auto memberIndex = insn.word(i + 1);
124                                         if (memberIndex >= d.size())
125                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
126                                         d[memberIndex].Apply(srcDecorations);
127                                 }
128                                 break;
129                         }
130
131                         case spv::OpLabel:
132                         {
133                                 ASSERT(currentBlock.value() == 0);
134                                 currentBlock = Block::ID(insn.word(1));
135                                 blockStart = insn;
136                                 break;
137                         }
138
139                         // Branch Instructions (subset of Termination Instructions):
140                         case spv::OpBranch:
141                         case spv::OpBranchConditional:
142                         case spv::OpSwitch:
143                         case spv::OpReturn:
144                         // fallthrough
145
146                         // Termination instruction:
147                         case spv::OpKill:
148                         case spv::OpUnreachable:
149                         {
150                                 ASSERT(currentBlock.value() != 0);
151                                 auto blockEnd = insn; blockEnd++;
152                                 blocks[currentBlock] = Block(blockStart, blockEnd);
153                                 currentBlock = Block::ID(0);
154
155                                 if (insn.opcode() == spv::OpKill)
156                                 {
157                                         modes.ContainsKill = true;
158                                 }
159                                 break;
160                         }
161
162                         case spv::OpLoopMerge:
163                         case spv::OpSelectionMerge:
164                                 break; // Nothing to do in analysis pass.
165
166                         case spv::OpTypeVoid:
167                         case spv::OpTypeBool:
168                         case spv::OpTypeInt:
169                         case spv::OpTypeFloat:
170                         case spv::OpTypeVector:
171                         case spv::OpTypeMatrix:
172                         case spv::OpTypeImage:
173                         case spv::OpTypeSampler:
174                         case spv::OpTypeSampledImage:
175                         case spv::OpTypeArray:
176                         case spv::OpTypeRuntimeArray:
177                         case spv::OpTypeStruct:
178                         case spv::OpTypePointer:
179                         case spv::OpTypeFunction:
180                                 DeclareType(insn);
181                                 break;
182
183                         case spv::OpVariable:
184                         {
185                                 Type::ID typeId = insn.word(1);
186                                 Object::ID resultId = insn.word(2);
187                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
188                                 if (insn.wordCount() > 4)
189                                         UNIMPLEMENTED("Variable initializers not yet supported");
190
191                                 auto &object = defs[resultId];
192                                 object.kind = Object::Kind::Variable;
193                                 object.definition = insn;
194                                 object.type = typeId;
195                                 object.pointerBase = insn.word(2);      // base is itself
196
197                                 ASSERT(getType(typeId).storageClass == storageClass);
198
199                                 switch (storageClass)
200                                 {
201                                 case spv::StorageClassInput:
202                                 case spv::StorageClassOutput:
203                                         ProcessInterfaceVariable(object);
204                                         break;
205                                 case spv::StorageClassUniform:
206                                 case spv::StorageClassStorageBuffer:
207                                 case spv::StorageClassPushConstant:
208                                         object.kind = Object::Kind::PhysicalPointer;
209                                         break;
210
211                                 case spv::StorageClassPrivate:
212                                 case spv::StorageClassFunction:
213                                         break; // Correctly handled.
214
215                                 case spv::StorageClassUniformConstant:
216                                 case spv::StorageClassWorkgroup:
217                                 case spv::StorageClassCrossWorkgroup:
218                                 case spv::StorageClassGeneric:
219                                 case spv::StorageClassAtomicCounter:
220                                 case spv::StorageClassImage:
221                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
222                                         break;
223
224                                 default:
225                                         UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
226                                         break;
227                                 }
228                                 break;
229                         }
230
231                         case spv::OpConstant:
232                                 CreateConstant(insn).constantValue[0] = insn.word(3);
233                                 break;
234                         case spv::OpConstantFalse:
235                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
236                                 break;
237                         case spv::OpConstantTrue:
238                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
239                                 break;
240                         case spv::OpConstantNull:
241                         case spv::OpUndef:
242                         {
243                                 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
244                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
245                                 auto &object = CreateConstant(insn);
246                                 auto &objectTy = getType(object.type);
247                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
248                                 {
249                                         object.constantValue[i] = 0;
250                                 }
251                                 break;
252                         }
253                         case spv::OpConstantComposite:
254                         {
255                                 auto &object = CreateConstant(insn);
256                                 auto offset = 0u;
257                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
258                                 {
259                                         auto &constituent = getObject(insn.word(i + 3));
260                                         auto &constituentTy = getType(constituent.type);
261                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
262                                                 object.constantValue[offset++] = constituent.constantValue[j];
263                                 }
264
265                                 auto objectId = Object::ID(insn.word(2));
266                                 auto decorationsIt = decorations.find(objectId);
267                                 if (decorationsIt != decorations.end() &&
268                                         decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
269                                 {
270                                         // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
271                                         // Decorating an object with the WorkgroupSize built-in
272                                         // decoration will make that object contain the dimensions
273                                         // of a local workgroup. If an object is decorated with the
274                                         // WorkgroupSize decoration, this must take precedence over
275                                         // any execution mode set for LocalSize.
276                                         // The object decorated with WorkgroupSize must be declared
277                                         // as a three-component vector of 32-bit integers.
278                                         ASSERT(getType(object.type).sizeInComponents == 3);
279                                         modes.WorkgroupSizeX = object.constantValue[0];
280                                         modes.WorkgroupSizeY = object.constantValue[1];
281                                         modes.WorkgroupSizeZ = object.constantValue[2];
282                                 }
283                                 break;
284                         }
285
286                         case spv::OpCapability:
287                                 break; // Various capabilities will be declared, but none affect our code generation at this point.
288                         case spv::OpMemoryModel:
289                                 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
290
291                         case spv::OpEntryPoint:
292                                 break;
293                         case spv::OpFunction:
294                                 ASSERT(mainBlockId.value() == 0); // Multiple functions found
295                                 // Scan forward to find the function's label.
296                                 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
297                                 {
298                                         switch (it.opcode())
299                                         {
300                                         case spv::OpFunction:
301                                         case spv::OpFunctionParameter:
302                                                 break;
303                                         case spv::OpLabel:
304                                                 mainBlockId = Block::ID(it.word(1));
305                                                 break;
306                                         default:
307                                                 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
308                                         }
309                                 }
310                                 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
311                                 break;
312                         case spv::OpFunctionEnd:
313                                 // Due to preprocessing, the entrypoint and its function provide no value.
314                                 break;
315                         case spv::OpExtInstImport:
316                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
317                                 // Valid shaders will not attempt to import any other instruction sets.
318                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
319                                 {
320                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
321                                 }
322                                 break;
323                         case spv::OpName:
324                         case spv::OpMemberName:
325                         case spv::OpSource:
326                         case spv::OpSourceContinued:
327                         case spv::OpSourceExtension:
328                         case spv::OpLine:
329                         case spv::OpNoLine:
330                         case spv::OpModuleProcessed:
331                         case spv::OpString:
332                                 // No semantic impact
333                                 break;
334
335                         case spv::OpFunctionParameter:
336                         case spv::OpFunctionCall:
337                         case spv::OpSpecConstant:
338                         case spv::OpSpecConstantComposite:
339                         case spv::OpSpecConstantFalse:
340                         case spv::OpSpecConstantOp:
341                         case spv::OpSpecConstantTrue:
342                                 // These should have all been removed by preprocessing passes. If we see them here,
343                                 // our assumptions are wrong and we will probably generate wrong code.
344                                 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
345                                 break;
346
347                         case spv::OpFConvert:
348                         case spv::OpSConvert:
349                         case spv::OpUConvert:
350                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
351                                 break;
352
353                         case spv::OpLoad:
354                         case spv::OpAccessChain:
355                         case spv::OpInBoundsAccessChain:
356                         case spv::OpCompositeConstruct:
357                         case spv::OpCompositeInsert:
358                         case spv::OpCompositeExtract:
359                         case spv::OpVectorShuffle:
360                         case spv::OpVectorTimesScalar:
361                         case spv::OpMatrixTimesScalar:
362                         case spv::OpMatrixTimesVector:
363                         case spv::OpVectorTimesMatrix:
364                         case spv::OpVectorExtractDynamic:
365                         case spv::OpVectorInsertDynamic:
366                         case spv::OpNot: // Unary ops
367                         case spv::OpSNegate:
368                         case spv::OpFNegate:
369                         case spv::OpLogicalNot:
370                         case spv::OpIAdd: // Binary ops
371                         case spv::OpISub:
372                         case spv::OpIMul:
373                         case spv::OpSDiv:
374                         case spv::OpUDiv:
375                         case spv::OpFAdd:
376                         case spv::OpFSub:
377                         case spv::OpFMul:
378                         case spv::OpFDiv:
379                         case spv::OpFMod:
380                         case spv::OpFRem:
381                         case spv::OpFOrdEqual:
382                         case spv::OpFUnordEqual:
383                         case spv::OpFOrdNotEqual:
384                         case spv::OpFUnordNotEqual:
385                         case spv::OpFOrdLessThan:
386                         case spv::OpFUnordLessThan:
387                         case spv::OpFOrdGreaterThan:
388                         case spv::OpFUnordGreaterThan:
389                         case spv::OpFOrdLessThanEqual:
390                         case spv::OpFUnordLessThanEqual:
391                         case spv::OpFOrdGreaterThanEqual:
392                         case spv::OpFUnordGreaterThanEqual:
393                         case spv::OpSMod:
394                         case spv::OpSRem:
395                         case spv::OpUMod:
396                         case spv::OpIEqual:
397                         case spv::OpINotEqual:
398                         case spv::OpUGreaterThan:
399                         case spv::OpSGreaterThan:
400                         case spv::OpUGreaterThanEqual:
401                         case spv::OpSGreaterThanEqual:
402                         case spv::OpULessThan:
403                         case spv::OpSLessThan:
404                         case spv::OpULessThanEqual:
405                         case spv::OpSLessThanEqual:
406                         case spv::OpShiftRightLogical:
407                         case spv::OpShiftRightArithmetic:
408                         case spv::OpShiftLeftLogical:
409                         case spv::OpBitwiseOr:
410                         case spv::OpBitwiseXor:
411                         case spv::OpBitwiseAnd:
412                         case spv::OpLogicalOr:
413                         case spv::OpLogicalAnd:
414                         case spv::OpLogicalEqual:
415                         case spv::OpLogicalNotEqual:
416                         case spv::OpUMulExtended:
417                         case spv::OpSMulExtended:
418                         case spv::OpDot:
419                         case spv::OpConvertFToU:
420                         case spv::OpConvertFToS:
421                         case spv::OpConvertSToF:
422                         case spv::OpConvertUToF:
423                         case spv::OpBitcast:
424                         case spv::OpSelect:
425                         case spv::OpExtInst:
426                         case spv::OpIsInf:
427                         case spv::OpIsNan:
428                         case spv::OpAny:
429                         case spv::OpAll:
430                         case spv::OpDPdx:
431                         case spv::OpDPdxCoarse:
432                         case spv::OpDPdy:
433                         case spv::OpDPdyCoarse:
434                         case spv::OpFwidth:
435                         case spv::OpFwidthCoarse:
436                         case spv::OpDPdxFine:
437                         case spv::OpDPdyFine:
438                         case spv::OpFwidthFine:
439                         case spv::OpAtomicLoad:
440                         case spv::OpPhi:
441                                 // Instructions that yield an intermediate value
442                         {
443                                 Type::ID typeId = insn.word(1);
444                                 Object::ID resultId = insn.word(2);
445                                 auto &object = defs[resultId];
446                                 object.type = typeId;
447                                 object.kind = Object::Kind::Value;
448                                 object.definition = insn;
449
450                                 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
451                                 {
452                                         // interior ptr has two parts:
453                                         // - logical base ptr, common across all lanes and known at compile time
454                                         // - per-lane offset
455                                         Object::ID baseId = insn.word(3);
456                                         object.pointerBase = getObject(baseId).pointerBase;
457                                 }
458                                 break;
459                         }
460
461                         case spv::OpStore:
462                         case spv::OpAtomicStore:
463                                 // Don't need to do anything during analysis pass
464                                 break;
465
466                         default:
467                                 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
468                         }
469                 }
470
471                 // Assign all Block::ins
472                 for (auto &it : blocks)
473                 {
474                         auto &blockId = it.first;
475                         auto &block = it.second;
476                         for (auto &outId : block.outs)
477                         {
478                                 auto outIt = blocks.find(outId);
479                                 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
480                                 auto &out = outIt->second;
481                                 out.ins.emplace(blockId);
482                         }
483                 }
484         }
485
486         void SpirvShader::DeclareType(InsnIterator insn)
487         {
488                 Type::ID resultId = insn.word(1);
489
490                 auto &type = types[resultId];
491                 type.definition = insn;
492                 type.sizeInComponents = ComputeTypeSize(insn);
493
494                 // A structure is a builtin block if it has a builtin
495                 // member. All members of such a structure are builtins.
496                 switch (insn.opcode())
497                 {
498                 case spv::OpTypeStruct:
499                 {
500                         auto d = memberDecorations.find(resultId);
501                         if (d != memberDecorations.end())
502                         {
503                                 for (auto &m : d->second)
504                                 {
505                                         if (m.HasBuiltIn)
506                                         {
507                                                 type.isBuiltInBlock = true;
508                                                 break;
509                                         }
510                                 }
511                         }
512                         break;
513                 }
514                 case spv::OpTypePointer:
515                 {
516                         Type::ID elementTypeId = insn.word(3);
517                         type.element = elementTypeId;
518                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
519                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
520                         break;
521                 }
522                 case spv::OpTypeVector:
523                 case spv::OpTypeMatrix:
524                 case spv::OpTypeArray:
525                 case spv::OpTypeRuntimeArray:
526                 {
527                         Type::ID elementTypeId = insn.word(2);
528                         type.element = elementTypeId;
529                         break;
530                 }
531                 default:
532                         break;
533                 }
534         }
535
536         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
537         {
538                 Type::ID typeId = insn.word(1);
539                 Object::ID resultId = insn.word(2);
540                 auto &object = defs[resultId];
541                 auto &objectTy = getType(typeId);
542                 object.type = typeId;
543                 object.kind = Object::Kind::Constant;
544                 object.definition = insn;
545                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
546                 return object;
547         }
548
549         void SpirvShader::ProcessInterfaceVariable(Object &object)
550         {
551                 auto &objectTy = getType(object.type);
552                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
553
554                 ASSERT(objectTy.opcode() == spv::OpTypePointer);
555                 auto pointeeTy = getType(objectTy.element);
556
557                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
558                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
559
560                 ASSERT(object.opcode() == spv::OpVariable);
561                 Object::ID resultId = object.definition.word(2);
562
563                 if (objectTy.isBuiltInBlock)
564                 {
565                         // walk the builtin block, registering each of its members separately.
566                         auto m = memberDecorations.find(objectTy.element);
567                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
568                         auto &structType = pointeeTy.definition;
569                         auto offset = 0u;
570                         auto word = 2u;
571                         for (auto &member : m->second)
572                         {
573                                 auto &memberType = getType(structType.word(word));
574
575                                 if (member.HasBuiltIn)
576                                 {
577                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
578                                 }
579
580                                 offset += memberType.sizeInComponents;
581                                 ++word;
582                         }
583                         return;
584                 }
585
586                 auto d = decorations.find(resultId);
587                 if (d != decorations.end() && d->second.HasBuiltIn)
588                 {
589                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
590                 }
591                 else
592                 {
593                         object.kind = Object::Kind::InterfaceVariable;
594                         VisitInterface(resultId,
595                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
596                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
597                                                            auto scalarSlot = (d.Location << 2) | d.Component;
598                                                            ASSERT(scalarSlot >= 0 &&
599                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
600
601                                                            auto &slot = userDefinedInterface[scalarSlot];
602                                                            slot.Type = type;
603                                                            slot.Flat = d.Flat;
604                                                            slot.NoPerspective = d.NoPerspective;
605                                                            slot.Centroid = d.Centroid;
606                                                    });
607                 }
608         }
609
610         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
611         {
612                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
613                 switch (mode)
614                 {
615                 case spv::ExecutionModeEarlyFragmentTests:
616                         modes.EarlyFragmentTests = true;
617                         break;
618                 case spv::ExecutionModeDepthReplacing:
619                         modes.DepthReplacing = true;
620                         break;
621                 case spv::ExecutionModeDepthGreater:
622                         modes.DepthGreater = true;
623                         break;
624                 case spv::ExecutionModeDepthLess:
625                         modes.DepthLess = true;
626                         break;
627                 case spv::ExecutionModeDepthUnchanged:
628                         modes.DepthUnchanged = true;
629                         break;
630                 case spv::ExecutionModeLocalSize:
631                         modes.WorkgroupSizeX = insn.word(3);
632                         modes.WorkgroupSizeY = insn.word(4);
633                         modes.WorkgroupSizeZ = insn.word(5);
634                         break;
635                 case spv::ExecutionModeOriginUpperLeft:
636                         // This is always the case for a Vulkan shader. Do nothing.
637                         break;
638                 default:
639                         UNIMPLEMENTED("No other execution modes are permitted");
640                 }
641         }
642
643         uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
644         {
645                 // Types are always built from the bottom up (with the exception of forward ptrs, which
646                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
647                 // already been described (and so their sizes determined)
648                 switch (insn.opcode())
649                 {
650                 case spv::OpTypeVoid:
651                 case spv::OpTypeSampler:
652                 case spv::OpTypeImage:
653                 case spv::OpTypeSampledImage:
654                 case spv::OpTypeFunction:
655                 case spv::OpTypeRuntimeArray:
656                         // Objects that don't consume any space.
657                         // Descriptor-backed objects currently only need exist at compile-time.
658                         // Runtime arrays don't appear in places where their size would be interesting
659                         return 0;
660
661                 case spv::OpTypeBool:
662                 case spv::OpTypeFloat:
663                 case spv::OpTypeInt:
664                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
665                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
666                         return 1;
667
668                 case spv::OpTypeVector:
669                 case spv::OpTypeMatrix:
670                         // Vectors and matrices both consume element count * element size.
671                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
672
673                 case spv::OpTypeArray:
674                 {
675                         // Element count * element size. Array sizes come from constant ids.
676                         auto arraySize = GetConstantInt(insn.word(3));
677                         return getType(insn.word(2)).sizeInComponents * arraySize;
678                 }
679
680                 case spv::OpTypeStruct:
681                 {
682                         uint32_t size = 0;
683                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
684                         {
685                                 size += getType(insn.word(i)).sizeInComponents;
686                         }
687                         return size;
688                 }
689
690                 case spv::OpTypePointer:
691                         // Runtime representation of a pointer is a per-lane index.
692                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
693                         return 1;
694
695                 default:
696                         // Some other random insn.
697                         UNIMPLEMENTED("Only types are supported");
698                         return 0;
699                 }
700         }
701
702         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
703         {
704                 switch (storageClass)
705                 {
706                 case spv::StorageClassUniform:
707                 case spv::StorageClassStorageBuffer:
708                 case spv::StorageClassPushConstant:
709                         return false;
710                 default:
711                         return true;
712                 }
713         }
714
715         template<typename F>
716         int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
717         {
718                 // Recursively walks variable definition and its type tree, taking into account
719                 // any explicit Location or Component decorations encountered; where explicit
720                 // Locations or Components are not specified, assigns them sequentially.
721                 // Collected decorations are carried down toward the leaves and across
722                 // siblings; Effect of decorations intentionally does not flow back up the tree.
723                 //
724                 // F is a functor to be called with the effective decoration set for every component.
725                 //
726                 // Returns the next available location, and calls f().
727
728                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
729
730                 ApplyDecorationsForId(&d, id);
731
732                 auto const &obj = getType(id);
733                 switch(obj.opcode())
734                 {
735                 case spv::OpTypePointer:
736                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
737                 case spv::OpTypeMatrix:
738                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
739                         {
740                                 // consumes same components of N consecutive locations
741                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
742                         }
743                         return d.Location;
744                 case spv::OpTypeVector:
745                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
746                         {
747                                 // consumes N consecutive components in the same location
748                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
749                         }
750                         return d.Location + 1;
751                 case spv::OpTypeFloat:
752                         f(d, ATTRIBTYPE_FLOAT);
753                         return d.Location + 1;
754                 case spv::OpTypeInt:
755                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
756                         return d.Location + 1;
757                 case spv::OpTypeBool:
758                         f(d, ATTRIBTYPE_UINT);
759                         return d.Location + 1;
760                 case spv::OpTypeStruct:
761                 {
762                         // iterate over members, which may themselves have Location/Component decorations
763                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
764                         {
765                                 ApplyDecorationsForIdMember(&d, id, i);
766                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
767                                 d.Component = 0;    // Implicit locations always have component=0
768                         }
769                         return d.Location;
770                 }
771                 case spv::OpTypeArray:
772                 {
773                         auto arraySize = GetConstantInt(obj.definition.word(3));
774                         for (auto i = 0u; i < arraySize; i++)
775                         {
776                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
777                         }
778                         return d.Location;
779                 }
780                 default:
781                         // Intentionally partial; most opcodes do not participate in type hierarchies
782                         return 0;
783                 }
784         }
785
786         template<typename F>
787         void SpirvShader::VisitInterface(Object::ID id, F f) const
788         {
789                 // Walk a variable definition and call f for each component in it.
790                 Decorations d{};
791                 ApplyDecorationsForId(&d, id);
792
793                 auto def = getObject(id).definition;
794                 ASSERT(def.opcode() == spv::OpVariable);
795                 VisitInterfaceInner<F>(def.word(1), d, f);
796         }
797
798         SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
799         {
800                 // Produce a offset into external memory in sizeof(float) units
801
802                 int constantOffset = 0;
803                 SIMD::Int dynamicOffset = SIMD::Int(0);
804                 auto &baseObject = getObject(id);
805                 Type::ID typeId = getType(baseObject.type).element;
806                 Decorations d{};
807                 ApplyDecorationsForId(&d, baseObject.type);
808
809                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
810                 // Start with its offset and build from there.
811                 if (baseObject.kind == Object::Kind::Value)
812                 {
813                         dynamicOffset += routine->getIntermediate(id).Int(0);
814                 }
815
816                 for (auto i = 0u; i < numIndexes; i++)
817                 {
818                         auto & type = getType(typeId);
819                         switch (type.definition.opcode())
820                         {
821                         case spv::OpTypeStruct:
822                         {
823                                 int memberIndex = GetConstantInt(indexIds[i]);
824                                 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
825                                 ASSERT(d.HasOffset);
826                                 constantOffset += d.Offset / sizeof(float);
827                                 typeId = type.definition.word(2u + memberIndex);
828                                 break;
829                         }
830                         case spv::OpTypeArray:
831                         case spv::OpTypeRuntimeArray:
832                         {
833                                 // TODO: b/127950082: Check bounds.
834                                 ApplyDecorationsForId(&d, typeId);
835                                 ASSERT(d.HasArrayStride);
836                                 auto & obj = getObject(indexIds[i]);
837                                 if (obj.kind == Object::Kind::Constant)
838                                         constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
839                                 else
840                                         dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
841                                 typeId = type.element;
842                                 break;
843                         }
844                         case spv::OpTypeMatrix:
845                         {
846                                 // TODO: b/127950082: Check bounds.
847                                 ApplyDecorationsForId(&d, typeId);
848                                 ASSERT(d.HasMatrixStride);
849                                 auto & obj = getObject(indexIds[i]);
850                                 if (obj.kind == Object::Kind::Constant)
851                                         constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
852                                 else
853                                         dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
854                                 typeId = type.element;
855                                 break;
856                         }
857                         case spv::OpTypeVector:
858                         {
859                                 auto & obj = getObject(indexIds[i]);
860                                 if (obj.kind == Object::Kind::Constant)
861                                         constantOffset += GetConstantInt(indexIds[i]);
862                                 else
863                                         dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
864                                 typeId = type.element;
865                                 break;
866                         }
867                         default:
868                                 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
869                         }
870                 }
871
872                 return dynamicOffset + SIMD::Int(constantOffset);
873         }
874
875         SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
876         {
877                 // TODO: avoid doing per-lane work in some cases if we can?
878                 // Produce a *component* offset into location-oriented memory
879
880                 int constantOffset = 0;
881                 SIMD::Int dynamicOffset = SIMD::Int(0);
882                 auto &baseObject = getObject(id);
883                 Type::ID typeId = getType(baseObject.type).element;
884
885                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
886                 // Start with its offset and build from there.
887                 if (baseObject.kind == Object::Kind::Value)
888                 {
889                         dynamicOffset += routine->getIntermediate(id).Int(0);
890                 }
891
892                 for (auto i = 0u; i < numIndexes; i++)
893                 {
894                         auto & type = getType(typeId);
895                         switch(type.opcode())
896                         {
897                         case spv::OpTypeStruct:
898                         {
899                                 int memberIndex = GetConstantInt(indexIds[i]);
900                                 int offsetIntoStruct = 0;
901                                 for (auto j = 0; j < memberIndex; j++) {
902                                         auto memberType = type.definition.word(2u + j);
903                                         offsetIntoStruct += getType(memberType).sizeInComponents;
904                                 }
905                                 constantOffset += offsetIntoStruct;
906                                 typeId = type.definition.word(2u + memberIndex);
907                                 break;
908                         }
909
910                         case spv::OpTypeVector:
911                         case spv::OpTypeMatrix:
912                         case spv::OpTypeArray:
913                         case spv::OpTypeRuntimeArray:
914                         {
915                                 // TODO: b/127950082: Check bounds.
916                                 auto stride = getType(type.element).sizeInComponents;
917                                 auto & obj = getObject(indexIds[i]);
918                                 if (obj.kind == Object::Kind::Constant)
919                                         constantOffset += stride * GetConstantInt(indexIds[i]);
920                                 else
921                                         dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
922                                 typeId = type.element;
923                                 break;
924                         }
925
926                         default:
927                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
928                         }
929                 }
930
931                 return dynamicOffset + SIMD::Int(constantOffset);
932         }
933
934         uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
935         {
936                 uint32_t constantOffset = 0;
937
938                 for (auto i = 0u; i < numIndexes; i++)
939                 {
940                         auto & type = getType(typeId);
941                         switch(type.opcode())
942                         {
943                         case spv::OpTypeStruct:
944                         {
945                                 int memberIndex = indexes[i];
946                                 int offsetIntoStruct = 0;
947                                 for (auto j = 0; j < memberIndex; j++) {
948                                         auto memberType = type.definition.word(2u + j);
949                                         offsetIntoStruct += getType(memberType).sizeInComponents;
950                                 }
951                                 constantOffset += offsetIntoStruct;
952                                 typeId = type.definition.word(2u + memberIndex);
953                                 break;
954                         }
955
956                         case spv::OpTypeVector:
957                         case spv::OpTypeMatrix:
958                         case spv::OpTypeArray:
959                         {
960                                 auto elementType = type.definition.word(2);
961                                 auto stride = getType(elementType).sizeInComponents;
962                                 constantOffset += stride * indexes[i];
963                                 typeId = elementType;
964                                 break;
965                         }
966
967                         default:
968                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
969                         }
970                 }
971
972                 return constantOffset;
973         }
974
975         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
976         {
977                 switch (decoration)
978                 {
979                 case spv::DecorationLocation:
980                         HasLocation = true;
981                         Location = static_cast<int32_t>(arg);
982                         break;
983                 case spv::DecorationComponent:
984                         HasComponent = true;
985                         Component = arg;
986                         break;
987                 case spv::DecorationDescriptorSet:
988                         HasDescriptorSet = true;
989                         DescriptorSet = arg;
990                         break;
991                 case spv::DecorationBinding:
992                         HasBinding = true;
993                         Binding = arg;
994                         break;
995                 case spv::DecorationBuiltIn:
996                         HasBuiltIn = true;
997                         BuiltIn = static_cast<spv::BuiltIn>(arg);
998                         break;
999                 case spv::DecorationFlat:
1000                         Flat = true;
1001                         break;
1002                 case spv::DecorationNoPerspective:
1003                         NoPerspective = true;
1004                         break;
1005                 case spv::DecorationCentroid:
1006                         Centroid = true;
1007                         break;
1008                 case spv::DecorationBlock:
1009                         Block = true;
1010                         break;
1011                 case spv::DecorationBufferBlock:
1012                         BufferBlock = true;
1013                         break;
1014                 case spv::DecorationOffset:
1015                         HasOffset = true;
1016                         Offset = static_cast<int32_t>(arg);
1017                         break;
1018                 case spv::DecorationArrayStride:
1019                         HasArrayStride = true;
1020                         ArrayStride = static_cast<int32_t>(arg);
1021                         break;
1022                 case spv::DecorationMatrixStride:
1023                         HasMatrixStride = true;
1024                         MatrixStride = static_cast<int32_t>(arg);
1025                         break;
1026                 default:
1027                         // Intentionally partial, there are many decorations we just don't care about.
1028                         break;
1029                 }
1030         }
1031
1032         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1033         {
1034                 // Apply a decoration group to this set of decorations
1035                 if (src.HasBuiltIn)
1036                 {
1037                         HasBuiltIn = true;
1038                         BuiltIn = src.BuiltIn;
1039                 }
1040
1041                 if (src.HasLocation)
1042                 {
1043                         HasLocation = true;
1044                         Location = src.Location;
1045                 }
1046
1047                 if (src.HasComponent)
1048                 {
1049                         HasComponent = true;
1050                         Component = src.Component;
1051                 }
1052
1053                 if (src.HasDescriptorSet)
1054                 {
1055                         HasDescriptorSet = true;
1056                         DescriptorSet = src.DescriptorSet;
1057                 }
1058
1059                 if (src.HasBinding)
1060                 {
1061                         HasBinding = true;
1062                         Binding = src.Binding;
1063                 }
1064
1065                 if (src.HasOffset)
1066                 {
1067                         HasOffset = true;
1068                         Offset = src.Offset;
1069                 }
1070
1071                 if (src.HasArrayStride)
1072                 {
1073                         HasArrayStride = true;
1074                         ArrayStride = src.ArrayStride;
1075                 }
1076
1077                 if (src.HasMatrixStride)
1078                 {
1079                         HasMatrixStride = true;
1080                         MatrixStride = src.MatrixStride;
1081                 }
1082
1083                 Flat |= src.Flat;
1084                 NoPerspective |= src.NoPerspective;
1085                 Centroid |= src.Centroid;
1086                 Block |= src.Block;
1087                 BufferBlock |= src.BufferBlock;
1088         }
1089
1090         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1091         {
1092                 auto it = decorations.find(id);
1093                 if (it != decorations.end())
1094                         d->Apply(it->second);
1095         }
1096
1097         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1098         {
1099                 auto it = memberDecorations.find(id);
1100                 if (it != memberDecorations.end() && member < it->second.size())
1101                 {
1102                         d->Apply(it->second[member]);
1103                 }
1104         }
1105
1106         uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1107         {
1108                 // Slightly hackish access to constants very early in translation.
1109                 // General consumption of constants by other instructions should
1110                 // probably be just lowered to Reactor.
1111
1112                 // TODO: not encountered yet since we only use this for array sizes etc,
1113                 // but is possible to construct integer constant 0 via OpConstantNull.
1114                 auto insn = getObject(id).definition;
1115                 ASSERT(insn.opcode() == spv::OpConstant);
1116                 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1117                 return insn.word(3);
1118         }
1119
1120         // emit-time
1121
1122         void SpirvShader::emitProlog(SpirvRoutine *routine) const
1123         {
1124                 for (auto insn : *this)
1125                 {
1126                         switch (insn.opcode())
1127                         {
1128                         case spv::OpVariable:
1129                         {
1130                                 Type::ID resultPointerTypeId = insn.word(1);
1131                                 auto resultPointerType = getType(resultPointerTypeId);
1132                                 auto pointeeType = getType(resultPointerType.element);
1133
1134                                 if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
1135                                 {
1136                                         Object::ID resultId = insn.word(2);
1137                                         routine->createLvalue(resultId, pointeeType.sizeInComponents);
1138                                 }
1139                                 break;
1140                         }
1141                         default:
1142                                 // Nothing else produces interface variables, so can all be safely ignored.
1143                                 break;
1144                         }
1145                 }
1146         }
1147
1148         void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1149         {
1150                 EmitState state;
1151                 state.setActiveLaneMask(activeLaneMask);
1152                 state.routine = routine;
1153
1154                 // Emit everything up to the first label
1155                 // TODO: Separate out dispatch of block from non-block instructions?
1156                 for (auto insn : *this)
1157                 {
1158                         if (insn.opcode() == spv::OpLabel)
1159                         {
1160                                 break;
1161                         }
1162                         EmitInstruction(insn, &state);
1163                 }
1164
1165                 // Emit all the blocks in BFS order, starting with the main block.
1166                 std::queue<Block::ID> pending;
1167                 pending.push(mainBlockId);
1168                 while (pending.size() > 0)
1169                 {
1170                         auto id = pending.front();
1171                         pending.pop();
1172                         if (state.visited.count(id) == 0)
1173                         {
1174                                 EmitBlock(id, &state);
1175                                 for (auto it : getBlock(id).outs)
1176                                 {
1177                                         pending.push(it);
1178                                 }
1179                         }
1180                 }
1181         }
1182
1183         void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1184         {
1185                 if (state->visited.count(id) > 0)
1186                 {
1187                         return; // Already processed this block.
1188                 }
1189
1190                 state->visited.emplace(id);
1191
1192                 auto &block = getBlock(id);
1193
1194                 switch (block.kind)
1195                 {
1196                         case Block::Simple:
1197                         case Block::StructuredBranchConditional:
1198                         case Block::UnstructuredBranchConditional:
1199                         case Block::StructuredSwitch:
1200                         case Block::UnstructuredSwitch:
1201                                 if (id != mainBlockId)
1202                                 {
1203                                         // Emit all preceding blocks and set the activeLaneMask.
1204                                         Intermediate activeLaneMask(1);
1205                                         activeLaneMask.move(0, SIMD::Int(0));
1206                                         for (auto in : block.ins)
1207                                         {
1208                                                 EmitBlock(in, state);
1209                                                 auto inMask = state->getActiveLaneMaskEdge(in, id);
1210                                                 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1211                                         }
1212                                         state->setActiveLaneMask(activeLaneMask.Int(0));
1213                                 }
1214                                 state->currentBlock = id;
1215                                 EmitInstructions(block.begin(), block.end(), state);
1216                                 break;
1217
1218                         case Block::Loop:
1219                                 state->currentBlock = id;
1220                                 EmitLoop(state);
1221                                 break;
1222
1223                         default:
1224                                 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
1225                 }
1226         }
1227
1228         void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1229         {
1230                 for (auto insn = begin; insn != end; insn++)
1231                 {
1232                         auto res = EmitInstruction(insn, state);
1233                         switch (res)
1234                         {
1235                         case EmitResult::Continue:
1236                                 continue;
1237                         case EmitResult::Terminator:
1238                                 break;
1239                         default:
1240                                 UNREACHABLE("Unexpected EmitResult %d", int(res));
1241                                 break;
1242                         }
1243                 }
1244         }
1245
1246         void SpirvShader::EmitLoop(EmitState *state) const
1247         {
1248                 auto blockId = state->currentBlock;
1249                 auto block = getBlock(blockId);
1250
1251                 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
1252                 // This is initialized with the incoming active lane masks.
1253                 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
1254                 for (auto in : block.ins)
1255                 {
1256                         if (!existsPath(blockId, in)) // if not a loop back edge
1257                         {
1258                                 EmitBlock(in, state);
1259                                 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1260                         }
1261                 }
1262
1263                 // Generate an alloca for each of the loop's phis.
1264                 // These will be primed with the incoming, non back edge Phi values
1265                 // before the loop, and then updated just before the loop jumps back to
1266                 // the block.
1267                 struct LoopPhi
1268                 {
1269                         Object::ID phiId; // The Phi identifier.
1270                         Object::ID continueValue; // The source merge value from the loop.
1271                         Array<SIMD::Int> storage; // The alloca.
1272                 };
1273
1274                 std::vector<LoopPhi> phis;
1275
1276                 // For each OpPhi between the block start and the merge instruction:
1277                 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
1278                 {
1279                         if (insn.opcode() == spv::OpPhi)
1280                         {
1281                                 auto objectId = Object::ID(insn.word(2));
1282                                 auto &object = getObject(objectId);
1283                                 auto &type = getType(object.type);
1284
1285                                 LoopPhi phi;
1286                                 phi.phiId = Object::ID(insn.word(2));
1287                                 phi.storage = Array<SIMD::Int>(type.sizeInComponents);
1288
1289                                 // Start with the Phi set to 0.
1290                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1291                                 {
1292                                         phi.storage[i] = SIMD::Int(0);
1293                                 }
1294
1295                                 // For each Phi source:
1296                                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
1297                                 {
1298                                         auto varId = Object::ID(insn.word(w + 0));
1299                                         auto blockId = Block::ID(insn.word(w + 1));
1300                                         if (existsPath(state->currentBlock, blockId))
1301                                         {
1302                                                 // This source is from a loop back-edge.
1303                                                 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
1304                                                 phi.continueValue = varId;
1305                                         }
1306                                         else
1307                                         {
1308                                                 // This source is from a preceding block.
1309                                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1310                                                 {
1311                                                         auto in = GenericValue(this, state->routine, varId);
1312                                                         auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
1313                                                         phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
1314                                                 }
1315                                         }
1316                                 }
1317
1318                                 phis.push_back(phi);
1319                         }
1320                 }
1321
1322                 // Create the loop basic blocks
1323                 auto headerBasicBlock = Nucleus::createBasicBlock();
1324                 auto mergeBasicBlock = Nucleus::createBasicBlock();
1325
1326                 // Start emitting code inside the loop.
1327                 Nucleus::createBr(headerBasicBlock);
1328                 Nucleus::setInsertBlock(headerBasicBlock);
1329
1330                 // Load the Phi values from storage.
1331                 // This will load at the start of each loop.
1332                 for (auto &phi : phis)
1333                 {
1334                         auto &type = getType(getObject(phi.phiId).type);
1335                         auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
1336                         for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1337                         {
1338                                 dst.move(i, phi.storage[i]);
1339                         }
1340                 }
1341
1342                 // Load the active lane mask.
1343                 state->setActiveLaneMask(loopActiveLaneMask);
1344
1345                 // Emit all the non-phi instructions in this loop header block.
1346                 for (auto insn = block.begin(); insn != block.end(); insn++)
1347                 {
1348                         if (insn.opcode() != spv::OpPhi)
1349                         {
1350                                 EmitInstruction(insn, state);
1351                         }
1352                 }
1353
1354                 // Emit all the back-edge blocks and use their active lane masks to
1355                 // rebuild the loopActiveLaneMask.
1356                 loopActiveLaneMask = SIMD::Int(0);
1357                 for (auto in : block.ins)
1358                 {
1359                         if (existsPath(blockId, in))
1360                         {
1361                                 EmitBlock(in, state);
1362                                 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1363                         }
1364                 }
1365
1366                 // Update loop phi values
1367                 for (auto &phi : phis)
1368                 {
1369                         if (phi.continueValue != 0)
1370                         {
1371                                 auto val = GenericValue(this, state->routine, phi.continueValue);
1372                                 auto &type = getType(getObject(phi.phiId).type);
1373                                 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1374                                 {
1375                                         phi.storage[i] = val.Int(i);
1376                                 }
1377                         }
1378                 }
1379
1380                 // Loop body now done.
1381                 // If any lanes are still active, jump back to the loop header,
1382                 // otherwise jump to the merge block.
1383                 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
1384
1385                 // Emit the merge block, and we're done.
1386                 Nucleus::setInsertBlock(mergeBasicBlock);
1387                 EmitBlock(block.mergeBlock, state);
1388         }
1389
1390         SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1391         {
1392                 switch (insn.opcode())
1393                 {
1394                 case spv::OpTypeVoid:
1395                 case spv::OpTypeInt:
1396                 case spv::OpTypeFloat:
1397                 case spv::OpTypeBool:
1398                 case spv::OpTypeVector:
1399                 case spv::OpTypeArray:
1400                 case spv::OpTypeRuntimeArray:
1401                 case spv::OpTypeMatrix:
1402                 case spv::OpTypeStruct:
1403                 case spv::OpTypePointer:
1404                 case spv::OpTypeFunction:
1405                 case spv::OpExecutionMode:
1406                 case spv::OpMemoryModel:
1407                 case spv::OpFunction:
1408                 case spv::OpFunctionEnd:
1409                 case spv::OpConstant:
1410                 case spv::OpConstantNull:
1411                 case spv::OpConstantTrue:
1412                 case spv::OpConstantFalse:
1413                 case spv::OpConstantComposite:
1414                 case spv::OpUndef:
1415                 case spv::OpExtension:
1416                 case spv::OpCapability:
1417                 case spv::OpEntryPoint:
1418                 case spv::OpExtInstImport:
1419                 case spv::OpDecorate:
1420                 case spv::OpMemberDecorate:
1421                 case spv::OpGroupDecorate:
1422                 case spv::OpGroupMemberDecorate:
1423                 case spv::OpDecorationGroup:
1424                 case spv::OpName:
1425                 case spv::OpMemberName:
1426                 case spv::OpSource:
1427                 case spv::OpSourceContinued:
1428                 case spv::OpSourceExtension:
1429                 case spv::OpLine:
1430                 case spv::OpNoLine:
1431                 case spv::OpModuleProcessed:
1432                 case spv::OpString:
1433                         // Nothing to do at emit time. These are either fully handled at analysis time,
1434                         // or don't require any work at all.
1435                         return EmitResult::Continue;
1436
1437                 case spv::OpLabel:
1438                         return EmitResult::Continue;
1439
1440                 case spv::OpVariable:
1441                         return EmitVariable(insn, state);
1442
1443                 case spv::OpLoad:
1444                 case spv::OpAtomicLoad:
1445                         return EmitLoad(insn, state);
1446
1447                 case spv::OpStore:
1448                 case spv::OpAtomicStore:
1449                         return EmitStore(insn, state);
1450
1451                 case spv::OpAccessChain:
1452                 case spv::OpInBoundsAccessChain:
1453                         return EmitAccessChain(insn, state);
1454
1455                 case spv::OpCompositeConstruct:
1456                         return EmitCompositeConstruct(insn, state);
1457
1458                 case spv::OpCompositeInsert:
1459                         return EmitCompositeInsert(insn, state);
1460
1461                 case spv::OpCompositeExtract:
1462                         return EmitCompositeExtract(insn, state);
1463
1464                 case spv::OpVectorShuffle:
1465                         return EmitVectorShuffle(insn, state);
1466
1467                 case spv::OpVectorExtractDynamic:
1468                         return EmitVectorExtractDynamic(insn, state);
1469
1470                 case spv::OpVectorInsertDynamic:
1471                         return EmitVectorInsertDynamic(insn, state);
1472
1473                 case spv::OpVectorTimesScalar:
1474                 case spv::OpMatrixTimesScalar:
1475                         return EmitVectorTimesScalar(insn, state);
1476
1477                 case spv::OpMatrixTimesVector:
1478                         return EmitMatrixTimesVector(insn, state);
1479
1480                 case spv::OpVectorTimesMatrix:
1481                         return EmitVectorTimesMatrix(insn, state);
1482
1483                 case spv::OpNot:
1484                 case spv::OpSNegate:
1485                 case spv::OpFNegate:
1486                 case spv::OpLogicalNot:
1487                 case spv::OpConvertFToU:
1488                 case spv::OpConvertFToS:
1489                 case spv::OpConvertSToF:
1490                 case spv::OpConvertUToF:
1491                 case spv::OpBitcast:
1492                 case spv::OpIsInf:
1493                 case spv::OpIsNan:
1494                 case spv::OpDPdx:
1495                 case spv::OpDPdxCoarse:
1496                 case spv::OpDPdy:
1497                 case spv::OpDPdyCoarse:
1498                 case spv::OpFwidth:
1499                 case spv::OpFwidthCoarse:
1500                 case spv::OpDPdxFine:
1501                 case spv::OpDPdyFine:
1502                 case spv::OpFwidthFine:
1503                         return EmitUnaryOp(insn, state);
1504
1505                 case spv::OpIAdd:
1506                 case spv::OpISub:
1507                 case spv::OpIMul:
1508                 case spv::OpSDiv:
1509                 case spv::OpUDiv:
1510                 case spv::OpFAdd:
1511                 case spv::OpFSub:
1512                 case spv::OpFMul:
1513                 case spv::OpFDiv:
1514                 case spv::OpFMod:
1515                 case spv::OpFRem:
1516                 case spv::OpFOrdEqual:
1517                 case spv::OpFUnordEqual:
1518                 case spv::OpFOrdNotEqual:
1519                 case spv::OpFUnordNotEqual:
1520                 case spv::OpFOrdLessThan:
1521                 case spv::OpFUnordLessThan:
1522                 case spv::OpFOrdGreaterThan:
1523                 case spv::OpFUnordGreaterThan:
1524                 case spv::OpFOrdLessThanEqual:
1525                 case spv::OpFUnordLessThanEqual:
1526                 case spv::OpFOrdGreaterThanEqual:
1527                 case spv::OpFUnordGreaterThanEqual:
1528                 case spv::OpSMod:
1529                 case spv::OpSRem:
1530                 case spv::OpUMod:
1531                 case spv::OpIEqual:
1532                 case spv::OpINotEqual:
1533                 case spv::OpUGreaterThan:
1534                 case spv::OpSGreaterThan:
1535                 case spv::OpUGreaterThanEqual:
1536                 case spv::OpSGreaterThanEqual:
1537                 case spv::OpULessThan:
1538                 case spv::OpSLessThan:
1539                 case spv::OpULessThanEqual:
1540                 case spv::OpSLessThanEqual:
1541                 case spv::OpShiftRightLogical:
1542                 case spv::OpShiftRightArithmetic:
1543                 case spv::OpShiftLeftLogical:
1544                 case spv::OpBitwiseOr:
1545                 case spv::OpBitwiseXor:
1546                 case spv::OpBitwiseAnd:
1547                 case spv::OpLogicalOr:
1548                 case spv::OpLogicalAnd:
1549                 case spv::OpLogicalEqual:
1550                 case spv::OpLogicalNotEqual:
1551                 case spv::OpUMulExtended:
1552                 case spv::OpSMulExtended:
1553                         return EmitBinaryOp(insn, state);
1554
1555                 case spv::OpDot:
1556                         return EmitDot(insn, state);
1557
1558                 case spv::OpSelect:
1559                         return EmitSelect(insn, state);
1560
1561                 case spv::OpExtInst:
1562                         return EmitExtendedInstruction(insn, state);
1563
1564                 case spv::OpAny:
1565                         return EmitAny(insn, state);
1566
1567                 case spv::OpAll:
1568                         return EmitAll(insn, state);
1569
1570                 case spv::OpBranch:
1571                         return EmitBranch(insn, state);
1572
1573                 case spv::OpPhi:
1574                         return EmitPhi(insn, state);
1575
1576                 case spv::OpSelectionMerge:
1577                 case spv::OpLoopMerge:
1578                         return EmitResult::Continue;
1579
1580                 case spv::OpBranchConditional:
1581                         return EmitBranchConditional(insn, state);
1582
1583                 case spv::OpSwitch:
1584                         return EmitSwitch(insn, state);
1585
1586                 case spv::OpUnreachable:
1587                         return EmitUnreachable(insn, state);
1588
1589                 case spv::OpReturn:
1590                         return EmitReturn(insn, state);
1591
1592                 default:
1593                         UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1594                         break;
1595                 }
1596
1597                 return EmitResult::Continue;
1598         }
1599
1600         SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1601         {
1602                 auto routine = state->routine;
1603                 Object::ID resultId = insn.word(2);
1604                 auto &object = getObject(resultId);
1605                 auto &objectTy = getType(object.type);
1606                 switch (objectTy.storageClass)
1607                 {
1608                 case spv::StorageClassInput:
1609                 {
1610                         if (object.kind == Object::Kind::InterfaceVariable)
1611                         {
1612                                 auto &dst = routine->getValue(resultId);
1613                                 int offset = 0;
1614                                 VisitInterface(resultId,
1615                                                                 [&](Decorations const &d, AttribType type) {
1616                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1617                                                                         dst[offset++] = routine->inputs[scalarSlot];
1618                                                                 });
1619                         }
1620                         break;
1621                 }
1622                 case spv::StorageClassUniform:
1623                 case spv::StorageClassStorageBuffer:
1624                 {
1625                         Decorations d{};
1626                         ApplyDecorationsForId(&d, resultId);
1627                         ASSERT(d.DescriptorSet >= 0);
1628                         ASSERT(d.Binding >= 0);
1629
1630                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1631
1632                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1633                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1634                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1635                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1636                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1637                         Pointer<Byte> address = data + offset;
1638                         routine->physicalPointers[resultId] = address;
1639                         break;
1640                 }
1641                 case spv::StorageClassPushConstant:
1642                 {
1643                         routine->physicalPointers[resultId] = routine->pushConstants;
1644                         break;
1645                 }
1646                 default:
1647                         break;
1648                 }
1649
1650                 return EmitResult::Continue;
1651         }
1652
1653         SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1654         {
1655                 auto routine = state->routine;
1656                 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1657                 Object::ID resultId = insn.word(2);
1658                 Object::ID pointerId = insn.word(3);
1659                 auto &result = getObject(resultId);
1660                 auto &resultTy = getType(result.type);
1661                 auto &pointer = getObject(pointerId);
1662                 auto &pointerBase = getObject(pointer.pointerBase);
1663                 auto &pointerBaseTy = getType(pointerBase.type);
1664                 std::memory_order memoryOrder = std::memory_order_relaxed;
1665
1666                 if(atomic)
1667                 {
1668                         Object::ID semanticsId = insn.word(5);
1669                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1670                         memoryOrder = MemoryOrder(memorySemantics);
1671                 }
1672
1673                 ASSERT(getType(pointer.type).element == result.type);
1674                 ASSERT(Type::ID(insn.word(1)) == result.type);
1675                 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1676
1677                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1678                 {
1679                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1680                 }
1681
1682                 Pointer<Float> ptrBase;
1683                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1684                 {
1685                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1686                 }
1687                 else
1688                 {
1689                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1690                 }
1691
1692                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1693                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1694
1695                 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1696
1697                 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1698                 {
1699                         // Divergent offsets or masked lanes.
1700                         auto offsets = pointer.kind == Object::Kind::Value ?
1701                                         As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1702                                         RValue<SIMD::Int>(SIMD::Int(0));
1703                         for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1704                         {
1705                                 // i wish i had a Float,Float,Float,Float constructor here..
1706                                 for (int j = 0; j < SIMD::Width; j++)
1707                                 {
1708                                         If(Extract(state->activeLaneMask(), j) != 0)
1709                                         {
1710                                                 Int offset = Int(i) + Extract(offsets, j);
1711                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1712                                                 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1713                                         }
1714                                 }
1715                         }
1716                 }
1717                 Else
1718                 {
1719                         // No divergent offsets or masked lanes.
1720                         if (interleavedByLane)
1721                         {
1722                                 // Lane-interleaved data.
1723                                 Pointer<SIMD::Float> src = ptrBase;
1724                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1725                                 {
1726                                         load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1727                                 }
1728                         }
1729                         else
1730                         {
1731                                 // Non-interleaved data.
1732                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1733                                 {
1734                                         load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
1735                                 }
1736                         }
1737                 }
1738
1739                 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1740                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1741                 {
1742                         dst.move(i, load[i]);
1743                 }
1744
1745                 return EmitResult::Continue;
1746         }
1747
1748         SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1749         {
1750                 auto routine = state->routine;
1751                 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1752                 Object::ID pointerId = insn.word(1);
1753                 Object::ID objectId = insn.word(atomic ? 4 : 2);
1754                 auto &object = getObject(objectId);
1755                 auto &pointer = getObject(pointerId);
1756                 auto &pointerTy = getType(pointer.type);
1757                 auto &elementTy = getType(pointerTy.element);
1758                 auto &pointerBase = getObject(pointer.pointerBase);
1759                 auto &pointerBaseTy = getType(pointerBase.type);
1760                 std::memory_order memoryOrder = std::memory_order_relaxed;
1761
1762                 if(atomic)
1763                 {
1764                         Object::ID semanticsId = insn.word(3);
1765                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1766                         memoryOrder = MemoryOrder(memorySemantics);
1767                 }
1768
1769                 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1770
1771                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1772                 {
1773                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1774                 }
1775
1776                 Pointer<Float> ptrBase;
1777                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1778                 {
1779                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1780                 }
1781                 else
1782                 {
1783                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1784                 }
1785
1786                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1787                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1788
1789                 if (object.kind == Object::Kind::Constant)
1790                 {
1791                         // Constant source data.
1792                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1793                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1794                         {
1795                                 // Divergent offsets or masked lanes.
1796                                 auto offsets = pointer.kind == Object::Kind::Value ?
1797                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1798                                                 RValue<SIMD::Int>(SIMD::Int(0));
1799                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1800                                 {
1801                                         for (int j = 0; j < SIMD::Width; j++)
1802                                         {
1803                                                 If(Extract(state->activeLaneMask(), j) != 0)
1804                                                 {
1805                                                         Int offset = Int(i) + Extract(offsets, j);
1806                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1807                                                         Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1808                                                 }
1809                                         }
1810                                 }
1811                         }
1812                         Else
1813                         {
1814                                 // Constant source data.
1815                                 // No divergent offsets or masked lanes.
1816                                 Pointer<SIMD::Float> dst = ptrBase;
1817                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1818                                 {
1819                                         Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1820                                 }
1821                         }
1822                 }
1823                 else
1824                 {
1825                         // Intermediate source data.
1826                         auto &src = routine->getIntermediate(objectId);
1827                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1828                         {
1829                                 // Divergent offsets or masked lanes.
1830                                 auto offsets = pointer.kind == Object::Kind::Value ?
1831                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1832                                                 RValue<SIMD::Int>(SIMD::Int(0));
1833                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1834                                 {
1835                                         for (int j = 0; j < SIMD::Width; j++)
1836                                         {
1837                                                 If(Extract(state->activeLaneMask(), j) != 0)
1838                                                 {
1839                                                         Int offset = Int(i) + Extract(offsets, j);
1840                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1841                                                         Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1842                                                 }
1843                                         }
1844                                 }
1845                         }
1846                         Else
1847                         {
1848                                 // No divergent offsets or masked lanes.
1849                                 if (interleavedByLane)
1850                                 {
1851                                         // Lane-interleaved data.
1852                                         Pointer<SIMD::Float> dst = ptrBase;
1853                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1854                                         {
1855                                                 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1856                                         }
1857                                 }
1858                                 else
1859                                 {
1860                                         // Intermediate source data. Non-interleaved data.
1861                                         Pointer<SIMD::Float> dst = ptrBase;
1862                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1863                                         {
1864                                                 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1865                                         }
1866                                 }
1867                         }
1868                 }
1869
1870                 return EmitResult::Continue;
1871         }
1872
1873         SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1874         {
1875                 auto routine = state->routine;
1876                 Type::ID typeId = insn.word(1);
1877                 Object::ID resultId = insn.word(2);
1878                 Object::ID baseId = insn.word(3);
1879                 uint32_t numIndexes = insn.wordCount() - 4;
1880                 const uint32_t *indexes = insn.wordPointer(4);
1881                 auto &type = getType(typeId);
1882                 ASSERT(type.sizeInComponents == 1);
1883                 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1884
1885                 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1886
1887                 if(type.storageClass == spv::StorageClassPushConstant ||
1888                    type.storageClass == spv::StorageClassUniform ||
1889                    type.storageClass == spv::StorageClassStorageBuffer)
1890                 {
1891                         dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1892                 }
1893                 else
1894                 {
1895                         dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1896                 }
1897
1898                 return EmitResult::Continue;
1899         }
1900
1901         SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1902         {
1903                 auto routine = state->routine;
1904                 auto &type = getType(insn.word(1));
1905                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1906                 auto offset = 0u;
1907
1908                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1909                 {
1910                         Object::ID srcObjectId = insn.word(3u + i);
1911                         auto & srcObject = getObject(srcObjectId);
1912                         auto & srcObjectTy = getType(srcObject.type);
1913                         GenericValue srcObjectAccess(this, routine, srcObjectId);
1914
1915                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1916                         {
1917                                 dst.move(offset++, srcObjectAccess.Float(j));
1918                         }
1919                 }
1920
1921                 return EmitResult::Continue;
1922         }
1923
1924         SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1925         {
1926                 auto routine = state->routine;
1927                 Type::ID resultTypeId = insn.word(1);
1928                 auto &type = getType(resultTypeId);
1929                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1930                 auto &newPartObject = getObject(insn.word(3));
1931                 auto &newPartObjectTy = getType(newPartObject.type);
1932                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1933
1934                 GenericValue srcObjectAccess(this, routine, insn.word(4));
1935                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1936
1937                 // old components before
1938                 for (auto i = 0u; i < firstNewComponent; i++)
1939                 {
1940                         dst.move(i, srcObjectAccess.Float(i));
1941                 }
1942                 // new part
1943                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1944                 {
1945                         dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1946                 }
1947                 // old components after
1948                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1949                 {
1950                         dst.move(i, srcObjectAccess.Float(i));
1951                 }
1952
1953                 return EmitResult::Continue;
1954         }
1955
1956         SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1957         {
1958                 auto routine = state->routine;
1959                 auto &type = getType(insn.word(1));
1960                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1961                 auto &compositeObject = getObject(insn.word(3));
1962                 Type::ID compositeTypeId = compositeObject.definition.word(1);
1963                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1964
1965                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1966                 for (auto i = 0u; i < type.sizeInComponents; i++)
1967                 {
1968                         dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1969                 }
1970
1971                 return EmitResult::Continue;
1972         }
1973
1974         SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1975         {
1976                 auto routine = state->routine;
1977                 auto &type = getType(insn.word(1));
1978                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1979
1980                 // Note: number of components in result type, first half type, and second
1981                 // half type are all independent.
1982                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1983
1984                 GenericValue firstHalfAccess(this, routine, insn.word(3));
1985                 GenericValue secondHalfAccess(this, routine, insn.word(4));
1986
1987                 for (auto i = 0u; i < type.sizeInComponents; i++)
1988                 {
1989                         auto selector = insn.word(5 + i);
1990                         if (selector == static_cast<uint32_t>(-1))
1991                         {
1992                                 // Undefined value. Until we decide to do real undef values, zero is as good
1993                                 // a value as any
1994                                 dst.move(i, RValue<SIMD::Float>(0.0f));
1995                         }
1996                         else if (selector < firstHalfType.sizeInComponents)
1997                         {
1998                                 dst.move(i, firstHalfAccess.Float(selector));
1999                         }
2000                         else
2001                         {
2002                                 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
2003                         }
2004                 }
2005
2006                 return EmitResult::Continue;
2007         }
2008
2009         SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2010         {
2011                 auto routine = state->routine;
2012                 auto &type = getType(insn.word(1));
2013                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2014                 auto &srcType = getType(getObject(insn.word(3)).type);
2015
2016                 GenericValue src(this, routine, insn.word(3));
2017                 GenericValue index(this, routine, insn.word(4));
2018
2019                 SIMD::UInt v = SIMD::UInt(0);
2020
2021                 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2022                 {
2023                         v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2024                 }
2025
2026                 dst.move(0, v);
2027                 return EmitResult::Continue;
2028         }
2029
2030         SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2031         {
2032                 auto routine = state->routine;
2033                 auto &type = getType(insn.word(1));
2034                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2035
2036                 GenericValue src(this, routine, insn.word(3));
2037                 GenericValue component(this, routine, insn.word(4));
2038                 GenericValue index(this, routine, insn.word(5));
2039
2040                 for (auto i = 0u; i < type.sizeInComponents; i++)
2041                 {
2042                         SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2043                         dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2044                 }
2045                 return EmitResult::Continue;
2046         }
2047
2048         SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
2049         {
2050                 auto routine = state->routine;
2051                 auto &type = getType(insn.word(1));
2052                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2053                 auto lhs = GenericValue(this, routine, insn.word(3));
2054                 auto rhs = GenericValue(this, routine, insn.word(4));
2055
2056                 for (auto i = 0u; i < type.sizeInComponents; i++)
2057                 {
2058                         dst.move(i, lhs.Float(i) * rhs.Float(0));
2059                 }
2060
2061                 return EmitResult::Continue;
2062         }
2063
2064         SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
2065         {
2066                 auto routine = state->routine;
2067                 auto &type = getType(insn.word(1));
2068                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2069                 auto lhs = GenericValue(this, routine, insn.word(3));
2070                 auto rhs = GenericValue(this, routine, insn.word(4));
2071                 auto rhsType = getType(getObject(insn.word(4)).type);
2072
2073                 for (auto i = 0u; i < type.sizeInComponents; i++)
2074                 {
2075                         SIMD::Float v = lhs.Float(i) * rhs.Float(0);
2076                         for (auto j = 1u; j < rhsType.sizeInComponents; j++)
2077                         {
2078                                 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
2079                         }
2080                         dst.move(i, v);
2081                 }
2082
2083                 return EmitResult::Continue;
2084         }
2085
2086         SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
2087         {
2088                 auto routine = state->routine;
2089                 auto &type = getType(insn.word(1));
2090                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2091                 auto lhs = GenericValue(this, routine, insn.word(3));
2092                 auto rhs = GenericValue(this, routine, insn.word(4));
2093                 auto lhsType = getType(getObject(insn.word(3)).type);
2094
2095                 for (auto i = 0u; i < type.sizeInComponents; i++)
2096                 {
2097                         SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
2098                         for (auto j = 1u; j < lhsType.sizeInComponents; j++)
2099                         {
2100                                 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
2101                         }
2102                         dst.move(i, v);
2103                 }
2104
2105                 return EmitResult::Continue;
2106         }
2107
2108         SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
2109         {
2110                 auto routine = state->routine;
2111                 auto &type = getType(insn.word(1));
2112                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2113                 auto src = GenericValue(this, routine, insn.word(3));
2114
2115                 for (auto i = 0u; i < type.sizeInComponents; i++)
2116                 {
2117                         switch (insn.opcode())
2118                         {
2119                         case spv::OpNot:
2120                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
2121                                 dst.move(i, ~src.UInt(i));
2122                                 break;
2123                         case spv::OpSNegate:
2124                                 dst.move(i, -src.Int(i));
2125                                 break;
2126                         case spv::OpFNegate:
2127                                 dst.move(i, -src.Float(i));
2128                                 break;
2129                         case spv::OpConvertFToU:
2130                                 dst.move(i, SIMD::UInt(src.Float(i)));
2131                                 break;
2132                         case spv::OpConvertFToS:
2133                                 dst.move(i, SIMD::Int(src.Float(i)));
2134                                 break;
2135                         case spv::OpConvertSToF:
2136                                 dst.move(i, SIMD::Float(src.Int(i)));
2137                                 break;
2138                         case spv::OpConvertUToF:
2139                                 dst.move(i, SIMD::Float(src.UInt(i)));
2140                                 break;
2141                         case spv::OpBitcast:
2142                                 dst.move(i, src.Float(i));
2143                                 break;
2144                         case spv::OpIsInf:
2145                                 dst.move(i, IsInf(src.Float(i)));
2146                                 break;
2147                         case spv::OpIsNan:
2148                                 dst.move(i, IsNan(src.Float(i)));
2149                                 break;
2150                         case spv::OpDPdx:
2151                         case spv::OpDPdxCoarse:
2152                                 // Derivative instructions: FS invocations are laid out like so:
2153                                 //    0 1
2154                                 //    2 3
2155                                 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
2156                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
2157                                 break;
2158                         case spv::OpDPdy:
2159                         case spv::OpDPdyCoarse:
2160                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
2161                                 break;
2162                         case spv::OpFwidth:
2163                         case spv::OpFwidthCoarse:
2164                                 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
2165                                                         + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
2166                                 break;
2167                         case spv::OpDPdxFine:
2168                         {
2169                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2170                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2171                                 SIMD::Float v = SIMD::Float(firstRow);
2172                                 v = Insert(v, secondRow, 2);
2173                                 v = Insert(v, secondRow, 3);
2174                                 dst.move(i, v);
2175                                 break;
2176                         }
2177                         case spv::OpDPdyFine:
2178                         {
2179                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2180                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2181                                 SIMD::Float v = SIMD::Float(firstColumn);
2182                                 v = Insert(v, secondColumn, 1);
2183                                 v = Insert(v, secondColumn, 3);
2184                                 dst.move(i, v);
2185                                 break;
2186                         }
2187                         case spv::OpFwidthFine:
2188                         {
2189                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2190                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2191                                 SIMD::Float dpdx = SIMD::Float(firstRow);
2192                                 dpdx = Insert(dpdx, secondRow, 2);
2193                                 dpdx = Insert(dpdx, secondRow, 3);
2194                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2195                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2196                                 SIMD::Float dpdy = SIMD::Float(firstColumn);
2197                                 dpdy = Insert(dpdy, secondColumn, 1);
2198                                 dpdy = Insert(dpdy, secondColumn, 3);
2199                                 dst.move(i, Abs(dpdx) + Abs(dpdy));
2200                                 break;
2201                         }
2202                         default:
2203                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
2204                         }
2205                 }
2206
2207                 return EmitResult::Continue;
2208         }
2209
2210         SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
2211         {
2212                 auto routine = state->routine;
2213                 auto &type = getType(insn.word(1));
2214                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2215                 auto &lhsType = getType(getObject(insn.word(3)).type);
2216                 auto lhs = GenericValue(this, routine, insn.word(3));
2217                 auto rhs = GenericValue(this, routine, insn.word(4));
2218
2219                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2220                 {
2221                         switch (insn.opcode())
2222                         {
2223                         case spv::OpIAdd:
2224                                 dst.move(i, lhs.Int(i) + rhs.Int(i));
2225                                 break;
2226                         case spv::OpISub:
2227                                 dst.move(i, lhs.Int(i) - rhs.Int(i));
2228                                 break;
2229                         case spv::OpIMul:
2230                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2231                                 break;
2232                         case spv::OpSDiv:
2233                         {
2234                                 SIMD::Int a = lhs.Int(i);
2235                                 SIMD::Int b = rhs.Int(i);
2236                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2237                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2238                                 dst.move(i, a / b);
2239                                 break;
2240                         }
2241                         case spv::OpUDiv:
2242                         {
2243                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2244                                 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2245                                 break;
2246                         }
2247                         case spv::OpSRem:
2248                         {
2249                                 SIMD::Int a = lhs.Int(i);
2250                                 SIMD::Int b = rhs.Int(i);
2251                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2252                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2253                                 dst.move(i, a % b);
2254                                 break;
2255                         }
2256                         case spv::OpSMod:
2257                         {
2258                                 SIMD::Int a = lhs.Int(i);
2259                                 SIMD::Int b = rhs.Int(i);
2260                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2261                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2262                                 auto mod = a % b;
2263                                 // If a and b have opposite signs, the remainder operation takes
2264                                 // the sign from a but OpSMod is supposed to take the sign of b.
2265                                 // Adding b will ensure that the result has the correct sign and
2266                                 // that it is still congruent to a modulo b.
2267                                 //
2268                                 // See also http://mathforum.org/library/drmath/view/52343.html
2269                                 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2270                                 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2271                                 dst.move(i, As<SIMD::Float>(fixedMod));
2272                                 break;
2273                         }
2274                         case spv::OpUMod:
2275                         {
2276                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2277                                 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2278                                 break;
2279                         }
2280                         case spv::OpIEqual:
2281                         case spv::OpLogicalEqual:
2282                                 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2283                                 break;
2284                         case spv::OpINotEqual:
2285                         case spv::OpLogicalNotEqual:
2286                                 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2287                                 break;
2288                         case spv::OpUGreaterThan:
2289                                 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2290                                 break;
2291                         case spv::OpSGreaterThan:
2292                                 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2293                                 break;
2294                         case spv::OpUGreaterThanEqual:
2295                                 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2296                                 break;
2297                         case spv::OpSGreaterThanEqual:
2298                                 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2299                                 break;
2300                         case spv::OpULessThan:
2301                                 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2302                                 break;
2303                         case spv::OpSLessThan:
2304                                 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2305                                 break;
2306                         case spv::OpULessThanEqual:
2307                                 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2308                                 break;
2309                         case spv::OpSLessThanEqual:
2310                                 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2311                                 break;
2312                         case spv::OpFAdd:
2313                                 dst.move(i, lhs.Float(i) + rhs.Float(i));
2314                                 break;
2315                         case spv::OpFSub:
2316                                 dst.move(i, lhs.Float(i) - rhs.Float(i));
2317                                 break;
2318                         case spv::OpFMul:
2319                                 dst.move(i, lhs.Float(i) * rhs.Float(i));
2320                                 break;
2321                         case spv::OpFDiv:
2322                                 dst.move(i, lhs.Float(i) / rhs.Float(i));
2323                                 break;
2324                         case spv::OpFMod:
2325                                 // TODO(b/126873455): inaccurate for values greater than 2^24
2326                                 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2327                                 break;
2328                         case spv::OpFRem:
2329                                 dst.move(i, lhs.Float(i) % rhs.Float(i));
2330                                 break;
2331                         case spv::OpFOrdEqual:
2332                                 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2333                                 break;
2334                         case spv::OpFUnordEqual:
2335                                 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2336                                 break;
2337                         case spv::OpFOrdNotEqual:
2338                                 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2339                                 break;
2340                         case spv::OpFUnordNotEqual:
2341                                 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2342                                 break;
2343                         case spv::OpFOrdLessThan:
2344                                 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2345                                 break;
2346                         case spv::OpFUnordLessThan:
2347                                 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2348                                 break;
2349                         case spv::OpFOrdGreaterThan:
2350                                 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2351                                 break;
2352                         case spv::OpFUnordGreaterThan:
2353                                 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2354                                 break;
2355                         case spv::OpFOrdLessThanEqual:
2356                                 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2357                                 break;
2358                         case spv::OpFUnordLessThanEqual:
2359                                 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2360                                 break;
2361                         case spv::OpFOrdGreaterThanEqual:
2362                                 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2363                                 break;
2364                         case spv::OpFUnordGreaterThanEqual:
2365                                 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2366                                 break;
2367                         case spv::OpShiftRightLogical:
2368                                 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2369                                 break;
2370                         case spv::OpShiftRightArithmetic:
2371                                 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2372                                 break;
2373                         case spv::OpShiftLeftLogical:
2374                                 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2375                                 break;
2376                         case spv::OpBitwiseOr:
2377                         case spv::OpLogicalOr:
2378                                 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2379                                 break;
2380                         case spv::OpBitwiseXor:
2381                                 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2382                                 break;
2383                         case spv::OpBitwiseAnd:
2384                         case spv::OpLogicalAnd:
2385                                 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2386                                 break;
2387                         case spv::OpSMulExtended:
2388                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2389                                 // In our flat view then, component i is the i'th component of the first member;
2390                                 // component i + N is the i'th component of the second member.
2391                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2392                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2393                                 break;
2394                         case spv::OpUMulExtended:
2395                                 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2396                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2397                                 break;
2398                         default:
2399                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2400                         }
2401                 }
2402
2403                 return EmitResult::Continue;
2404         }
2405
2406         SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2407         {
2408                 auto routine = state->routine;
2409                 auto &type = getType(insn.word(1));
2410                 ASSERT(type.sizeInComponents == 1);
2411                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2412                 auto &lhsType = getType(getObject(insn.word(3)).type);
2413                 auto lhs = GenericValue(this, routine, insn.word(3));
2414                 auto rhs = GenericValue(this, routine, insn.word(4));
2415
2416                 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2417                 return EmitResult::Continue;
2418         }
2419
2420         SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2421         {
2422                 auto routine = state->routine;
2423                 auto &type = getType(insn.word(1));
2424                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2425                 auto cond = GenericValue(this, routine, insn.word(3));
2426                 auto lhs = GenericValue(this, routine, insn.word(4));
2427                 auto rhs = GenericValue(this, routine, insn.word(5));
2428
2429                 for (auto i = 0u; i < type.sizeInComponents; i++)
2430                 {
2431                         dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i)));   // FIXME: IfThenElse()
2432                 }
2433
2434                 return EmitResult::Continue;
2435         }
2436
2437         SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2438         {
2439                 auto routine = state->routine;
2440                 auto &type = getType(insn.word(1));
2441                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2442                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2443
2444                 switch (extInstIndex)
2445                 {
2446                 case GLSLstd450FAbs:
2447                 {
2448                         auto src = GenericValue(this, routine, insn.word(5));
2449                         for (auto i = 0u; i < type.sizeInComponents; i++)
2450                         {
2451                                 dst.move(i, Abs(src.Float(i)));
2452                         }
2453                         break;
2454                 }
2455                 case GLSLstd450SAbs:
2456                 {
2457                         auto src = GenericValue(this, routine, insn.word(5));
2458                         for (auto i = 0u; i < type.sizeInComponents; i++)
2459                         {
2460                                 dst.move(i, Abs(src.Int(i)));
2461                         }
2462                         break;
2463                 }
2464                 case GLSLstd450Cross:
2465                 {
2466                         auto lhs = GenericValue(this, routine, insn.word(5));
2467                         auto rhs = GenericValue(this, routine, insn.word(6));
2468                         dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2469                         dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2470                         dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2471                         break;
2472                 }
2473                 case GLSLstd450Floor:
2474                 {
2475                         auto src = GenericValue(this, routine, insn.word(5));
2476                         for (auto i = 0u; i < type.sizeInComponents; i++)
2477                         {
2478                                 dst.move(i, Floor(src.Float(i)));
2479                         }
2480                         break;
2481                 }
2482                 case GLSLstd450Trunc:
2483                 {
2484                         auto src = GenericValue(this, routine, insn.word(5));
2485                         for (auto i = 0u; i < type.sizeInComponents; i++)
2486                         {
2487                                 dst.move(i, Trunc(src.Float(i)));
2488                         }
2489                         break;
2490                 }
2491                 case GLSLstd450Ceil:
2492                 {
2493                         auto src = GenericValue(this, routine, insn.word(5));
2494                         for (auto i = 0u; i < type.sizeInComponents; i++)
2495                         {
2496                                 dst.move(i, Ceil(src.Float(i)));
2497                         }
2498                         break;
2499                 }
2500                 case GLSLstd450Fract:
2501                 {
2502                         auto src = GenericValue(this, routine, insn.word(5));
2503                         for (auto i = 0u; i < type.sizeInComponents; i++)
2504                         {
2505                                 dst.move(i, Frac(src.Float(i)));
2506                         }
2507                         break;
2508                 }
2509                 case GLSLstd450Round:
2510                 {
2511                         auto src = GenericValue(this, routine, insn.word(5));
2512                         for (auto i = 0u; i < type.sizeInComponents; i++)
2513                         {
2514                                 dst.move(i, Round(src.Float(i)));
2515                         }
2516                         break;
2517                 }
2518                 case GLSLstd450RoundEven:
2519                 {
2520                         auto src = GenericValue(this, routine, insn.word(5));
2521                         for (auto i = 0u; i < type.sizeInComponents; i++)
2522                         {
2523                                 auto x = Round(src.Float(i));
2524                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2525                                 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2526                                                 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2527                         }
2528                         break;
2529                 }
2530                 case GLSLstd450FMin:
2531                 {
2532                         auto lhs = GenericValue(this, routine, insn.word(5));
2533                         auto rhs = GenericValue(this, routine, insn.word(6));
2534                         for (auto i = 0u; i < type.sizeInComponents; i++)
2535                         {
2536                                 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2537                         }
2538                         break;
2539                 }
2540                 case GLSLstd450FMax:
2541                 {
2542                         auto lhs = GenericValue(this, routine, insn.word(5));
2543                         auto rhs = GenericValue(this, routine, insn.word(6));
2544                         for (auto i = 0u; i < type.sizeInComponents; i++)
2545                         {
2546                                 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2547                         }
2548                         break;
2549                 }
2550                 case GLSLstd450SMin:
2551                 {
2552                         auto lhs = GenericValue(this, routine, insn.word(5));
2553                         auto rhs = GenericValue(this, routine, insn.word(6));
2554                         for (auto i = 0u; i < type.sizeInComponents; i++)
2555                         {
2556                                 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2557                         }
2558                         break;
2559                 }
2560                 case GLSLstd450SMax:
2561                 {
2562                         auto lhs = GenericValue(this, routine, insn.word(5));
2563                         auto rhs = GenericValue(this, routine, insn.word(6));
2564                         for (auto i = 0u; i < type.sizeInComponents; i++)
2565                         {
2566                                 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2567                         }
2568                         break;
2569                 }
2570                 case GLSLstd450UMin:
2571                 {
2572                         auto lhs = GenericValue(this, routine, insn.word(5));
2573                         auto rhs = GenericValue(this, routine, insn.word(6));
2574                         for (auto i = 0u; i < type.sizeInComponents; i++)
2575                         {
2576                                 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2577                         }
2578                         break;
2579                 }
2580                 case GLSLstd450UMax:
2581                 {
2582                         auto lhs = GenericValue(this, routine, insn.word(5));
2583                         auto rhs = GenericValue(this, routine, insn.word(6));
2584                         for (auto i = 0u; i < type.sizeInComponents; i++)
2585                         {
2586                                 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2587                         }
2588                         break;
2589                 }
2590                 case GLSLstd450Step:
2591                 {
2592                         auto edge = GenericValue(this, routine, insn.word(5));
2593                         auto x = GenericValue(this, routine, insn.word(6));
2594                         for (auto i = 0u; i < type.sizeInComponents; i++)
2595                         {
2596                                 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2597                         }
2598                         break;
2599                 }
2600                 case GLSLstd450SmoothStep:
2601                 {
2602                         auto edge0 = GenericValue(this, routine, insn.word(5));
2603                         auto edge1 = GenericValue(this, routine, insn.word(6));
2604                         auto x = GenericValue(this, routine, insn.word(7));
2605                         for (auto i = 0u; i < type.sizeInComponents; i++)
2606                         {
2607                                 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2608                                                 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2609                                 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2610                         }
2611                         break;
2612                 }
2613                 case GLSLstd450FMix:
2614                 {
2615                         auto x = GenericValue(this, routine, insn.word(5));
2616                         auto y = GenericValue(this, routine, insn.word(6));
2617                         auto a = GenericValue(this, routine, insn.word(7));
2618                         for (auto i = 0u; i < type.sizeInComponents; i++)
2619                         {
2620                                 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2621                         }
2622                         break;
2623                 }
2624                 case GLSLstd450FClamp:
2625                 {
2626                         auto x = GenericValue(this, routine, insn.word(5));
2627                         auto minVal = GenericValue(this, routine, insn.word(6));
2628                         auto maxVal = GenericValue(this, routine, insn.word(7));
2629                         for (auto i = 0u; i < type.sizeInComponents; i++)
2630                         {
2631                                 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2632                         }
2633                         break;
2634                 }
2635                 case GLSLstd450SClamp:
2636                 {
2637                         auto x = GenericValue(this, routine, insn.word(5));
2638                         auto minVal = GenericValue(this, routine, insn.word(6));
2639                         auto maxVal = GenericValue(this, routine, insn.word(7));
2640                         for (auto i = 0u; i < type.sizeInComponents; i++)
2641                         {
2642                                 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2643                         }
2644                         break;
2645                 }
2646                 case GLSLstd450UClamp:
2647                 {
2648                         auto x = GenericValue(this, routine, insn.word(5));
2649                         auto minVal = GenericValue(this, routine, insn.word(6));
2650                         auto maxVal = GenericValue(this, routine, insn.word(7));
2651                         for (auto i = 0u; i < type.sizeInComponents; i++)
2652                         {
2653                                 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2654                         }
2655                         break;
2656                 }
2657                 case GLSLstd450FSign:
2658                 {
2659                         auto src = GenericValue(this, routine, insn.word(5));
2660                         for (auto i = 0u; i < type.sizeInComponents; i++)
2661                         {
2662                                 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2663                                 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2664                                 dst.move(i, neg | pos);
2665                         }
2666                         break;
2667                 }
2668                 case GLSLstd450SSign:
2669                 {
2670                         auto src = GenericValue(this, routine, insn.word(5));
2671                         for (auto i = 0u; i < type.sizeInComponents; i++)
2672                         {
2673                                 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2674                                 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2675                                 dst.move(i, neg | pos);
2676                         }
2677                         break;
2678                 }
2679                 case GLSLstd450Reflect:
2680                 {
2681                         auto I = GenericValue(this, routine, insn.word(5));
2682                         auto N = GenericValue(this, routine, insn.word(6));
2683
2684                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2685
2686                         for (auto i = 0u; i < type.sizeInComponents; i++)
2687                         {
2688                                 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2689                         }
2690                         break;
2691                 }
2692                 case GLSLstd450Refract:
2693                 {
2694                         auto I = GenericValue(this, routine, insn.word(5));
2695                         auto N = GenericValue(this, routine, insn.word(6));
2696                         auto eta = GenericValue(this, routine, insn.word(7));
2697
2698                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2699                         SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2700                         SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2701                         SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2702
2703                         for (auto i = 0u; i < type.sizeInComponents; i++)
2704                         {
2705                                 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2706                         }
2707                         break;
2708                 }
2709                 case GLSLstd450FaceForward:
2710                 {
2711                         auto N = GenericValue(this, routine, insn.word(5));
2712                         auto I = GenericValue(this, routine, insn.word(6));
2713                         auto Nref = GenericValue(this, routine, insn.word(7));
2714
2715                         SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2716                         SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2717
2718                         for (auto i = 0u; i < type.sizeInComponents; i++)
2719                         {
2720                                 auto n = N.Float(i);
2721                                 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2722                         }
2723                         break;
2724                 }
2725                 case GLSLstd450Length:
2726                 {
2727                         auto x = GenericValue(this, routine, insn.word(5));
2728                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2729
2730                         dst.move(0, Sqrt(d));
2731                         break;
2732                 }
2733                 case GLSLstd450Normalize:
2734                 {
2735                         auto x = GenericValue(this, routine, insn.word(5));
2736                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2737                         SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2738
2739                         for (auto i = 0u; i < type.sizeInComponents; i++)
2740                         {
2741                                 dst.move(i, invLength * x.Float(i));
2742                         }
2743                         break;
2744                 }
2745                 case GLSLstd450Distance:
2746                 {
2747                         auto p0 = GenericValue(this, routine, insn.word(5));
2748                         auto p1 = GenericValue(this, routine, insn.word(6));
2749                         auto p0Type = getType(getObject(insn.word(5)).type);
2750
2751                         // sqrt(dot(p0-p1, p0-p1))
2752                         SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2753
2754                         for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2755                         {
2756                                 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2757                         }
2758
2759                         dst.move(0, Sqrt(d));
2760                         break;
2761                 }
2762                 default:
2763                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2764                 }
2765
2766                 return EmitResult::Continue;
2767         }
2768
2769         std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2770         {
2771                 switch(memorySemantics)
2772                 {
2773                 case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
2774                 case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
2775                 case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
2776                 case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
2777                 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2778                 default:
2779                         UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2780                         return std::memory_order_acq_rel;
2781                 }
2782         }
2783
2784         SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2785         {
2786                 SIMD::Float d = x.Float(0) * y.Float(0);
2787
2788                 for (auto i = 1u; i < numComponents; i++)
2789                 {
2790                         d += x.Float(i) * y.Float(i);
2791                 }
2792
2793                 return d;
2794         }
2795
2796         SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2797         {
2798                 auto routine = state->routine;
2799                 auto &type = getType(insn.word(1));
2800                 ASSERT(type.sizeInComponents == 1);
2801                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2802                 auto &srcType = getType(getObject(insn.word(3)).type);
2803                 auto src = GenericValue(this, routine, insn.word(3));
2804
2805                 SIMD::UInt result = src.UInt(0);
2806
2807                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2808                 {
2809                         result |= src.UInt(i);
2810                 }
2811
2812                 dst.move(0, result);
2813                 return EmitResult::Continue;
2814         }
2815
2816         SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2817         {
2818                 auto routine = state->routine;
2819                 auto &type = getType(insn.word(1));
2820                 ASSERT(type.sizeInComponents == 1);
2821                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2822                 auto &srcType = getType(getObject(insn.word(3)).type);
2823                 auto src = GenericValue(this, routine, insn.word(3));
2824
2825                 SIMD::UInt result = src.UInt(0);
2826
2827                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2828                 {
2829                         result &= src.UInt(i);
2830                 }
2831
2832                 dst.move(0, result);
2833                 return EmitResult::Continue;
2834         }
2835
2836         SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2837         {
2838                 auto target = Block::ID(insn.word(1));
2839                 auto edge = Block::Edge{state->currentBlock, target};
2840                 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2841                 return EmitResult::Terminator;
2842         }
2843
2844         SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2845         {
2846                 auto block = getBlock(state->currentBlock);
2847                 ASSERT(block.branchInstruction == insn);
2848
2849                 auto condId = Object::ID(block.branchInstruction.word(1));
2850                 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2851                 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2852
2853                 auto cond = GenericValue(this, state->routine, condId);
2854                 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2855
2856                 // TODO: Optimize for case where all lanes take same path.
2857
2858                 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2859                 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2860
2861                 return EmitResult::Terminator;
2862         }
2863
2864         SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2865         {
2866                 auto block = getBlock(state->currentBlock);
2867                 ASSERT(block.branchInstruction == insn);
2868
2869                 auto selId = Object::ID(block.branchInstruction.word(1));
2870
2871                 auto sel = GenericValue(this, state->routine, selId);
2872                 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2873
2874                 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2875
2876                 // TODO: Optimize for case where all lanes take same path.
2877
2878                 SIMD::Int defaultLaneMask = state->activeLaneMask();
2879
2880                 // Gather up the case label matches and calculate defaultLaneMask.
2881                 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2882                 caseLabelMatches.reserve(numCases);
2883                 for (uint32_t i = 0; i < numCases; i++)
2884                 {
2885                         auto label = block.branchInstruction.word(i * 2 + 3);
2886                         auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2887                         auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2888                         state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2889                         defaultLaneMask &= ~caseLabelMatch;
2890                 }
2891
2892                 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2893                 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2894
2895                 return EmitResult::Terminator;
2896         }
2897
2898         SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2899         {
2900                 // TODO: Log something in this case?
2901                 state->setActiveLaneMask(SIMD::Int(0));
2902                 return EmitResult::Terminator;
2903         }
2904
2905         SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2906         {
2907                 state->setActiveLaneMask(SIMD::Int(0));
2908                 return EmitResult::Terminator;
2909         }
2910
2911         SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2912         {
2913                 auto routine = state->routine;
2914                 auto typeId = Type::ID(insn.word(1));
2915                 auto type = getType(typeId);
2916                 auto objectId = Object::ID(insn.word(2));
2917
2918                 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2919
2920                 bool first = true;
2921                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2922                 {
2923                         auto varId = Object::ID(insn.word(w + 0));
2924                         auto blockId = Block::ID(insn.word(w + 1));
2925
2926                         auto in = GenericValue(this, routine, varId);
2927                         auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2928
2929                         for (uint32_t i = 0; i < type.sizeInComponents; i++)
2930                         {
2931                                 auto inMasked = in.Int(i) & mask;
2932                                 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2933                         }
2934                         first = false;
2935                 }
2936
2937                 return EmitResult::Continue;
2938         }
2939
2940         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2941         {
2942                 for (auto insn : *this)
2943                 {
2944                         switch (insn.opcode())
2945                         {
2946                         case spv::OpVariable:
2947                         {
2948                                 Object::ID resultId = insn.word(2);
2949                                 auto &object = getObject(resultId);
2950                                 auto &objectTy = getType(object.type);
2951                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2952                                 {
2953                                         auto &dst = routine->getValue(resultId);
2954                                         int offset = 0;
2955                                         VisitInterface(resultId,
2956                                                                    [&](Decorations const &d, AttribType type) {
2957                                                                            auto scalarSlot = d.Location << 2 | d.Component;
2958                                                                            routine->outputs[scalarSlot] = dst[offset++];
2959                                                                    });
2960                                 }
2961                                 break;
2962                         }
2963                         default:
2964                                 break;
2965                         }
2966                 }
2967         }
2968
2969         SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
2970         {
2971                 // Default to a Simple, this may change later.
2972                 kind = Block::Simple;
2973
2974                 // Walk the instructions to find the last two of the block.
2975                 InsnIterator insns[2];
2976                 for (auto insn : *this)
2977                 {
2978                         insns[0] = insns[1];
2979                         insns[1] = insn;
2980                 }
2981
2982                 switch (insns[1].opcode())
2983                 {
2984                         case spv::OpBranch:
2985                                 branchInstruction = insns[1];
2986                                 outs.emplace(Block::ID(branchInstruction.word(1)));
2987
2988                                 switch (insns[0].opcode())
2989                                 {
2990                                         case spv::OpLoopMerge:
2991                                                 kind = Loop;
2992                                                 mergeInstruction = insns[0];
2993                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2994                                                 continueTarget = Block::ID(mergeInstruction.word(2));
2995                                                 break;
2996
2997                                         default:
2998                                                 kind = Block::Simple;
2999                                                 break;
3000                                 }
3001                                 break;
3002
3003                         case spv::OpBranchConditional:
3004                                 branchInstruction = insns[1];
3005                                 outs.emplace(Block::ID(branchInstruction.word(2)));
3006                                 outs.emplace(Block::ID(branchInstruction.word(3)));
3007
3008                                 switch (insns[0].opcode())
3009                                 {
3010                                         case spv::OpSelectionMerge:
3011                                                 kind = StructuredBranchConditional;
3012                                                 mergeInstruction = insns[0];
3013                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3014                                                 break;
3015
3016                                         case spv::OpLoopMerge:
3017                                                 kind = Loop;
3018                                                 mergeInstruction = insns[0];
3019                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3020                                                 continueTarget = Block::ID(mergeInstruction.word(2));
3021                                                 break;
3022
3023                                         default:
3024                                                 kind = UnstructuredBranchConditional;
3025                                                 break;
3026                                 }
3027                                 break;
3028
3029                         case spv::OpSwitch:
3030                                 branchInstruction = insns[1];
3031                                 outs.emplace(Block::ID(branchInstruction.word(2)));
3032                                 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
3033                                 {
3034                                         outs.emplace(Block::ID(branchInstruction.word(w)));
3035                                 }
3036
3037                                 switch (insns[0].opcode())
3038                                 {
3039                                         case spv::OpSelectionMerge:
3040                                                 kind = StructuredSwitch;
3041                                                 mergeInstruction = insns[0];
3042                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3043                                                 break;
3044
3045                                         default:
3046                                                 kind = UnstructuredSwitch;
3047                                                 break;
3048                                 }
3049                                 break;
3050
3051                         default:
3052                                 break;
3053                 }
3054         }
3055
3056         bool SpirvShader::existsPath(Block::ID from, Block::ID to) const
3057         {
3058                 // TODO: Optimize: This can be cached on the block.
3059                 Block::Set seen;
3060
3061                 std::queue<Block::ID> pending;
3062                 pending.emplace(from);
3063
3064                 while (pending.size() > 0)
3065                 {
3066                         auto id = pending.front();
3067                         pending.pop();
3068                         for (auto out : getBlock(id).outs)
3069                         {
3070                                 if (seen.count(out) != 0) { continue; }
3071                                 if (out == to) { return true; }
3072                                 pending.emplace(out);
3073                         }
3074                         seen.emplace(id);
3075                 }
3076
3077                 return false;
3078         }
3079
3080         void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
3081         {
3082                 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
3083         }
3084
3085         void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
3086         {
3087                 auto edge = Block::Edge{from, to};
3088                 auto it = edgeActiveLaneMasks.find(edge);
3089                 if (it == edgeActiveLaneMasks.end())
3090                 {
3091                         edgeActiveLaneMasks.emplace(edge, mask);
3092                 }
3093                 else
3094                 {
3095                         auto combined = it->second | mask;
3096                         edgeActiveLaneMasks.erase(edge);
3097                         edgeActiveLaneMasks.emplace(edge, combined);
3098                 }
3099         }
3100
3101         RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
3102         {
3103                 auto edge = Block::Edge{from, to};
3104                 auto it = edgeActiveLaneMasks.find(edge);
3105                 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
3106                 return it->second;
3107         }
3108
3109         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
3110                 pipelineLayout(pipelineLayout)
3111         {
3112         }
3113
3114 }