OSDN Git Service

Add support for OpMatrixTimesMatrix
[android-x86/external-swiftshader.git] / src / Pipeline / SpirvShader.cpp
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
23
24 #include <queue>
25
26 #ifdef Bool
27 #undef Bool // b/127920555
28 #endif
29
30 namespace
31 {
32         rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
33         {
34                 return rr::SignMask(ints) != 0;
35         }
36
37         rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
38         {
39                 return rr::SignMask(~ints) != 0;
40         }
41 }
42
43 namespace sw
44 {
45         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
46
47         SpirvShader::SpirvShader(InsnStore const &insns)
48                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
49                           outputs{MAX_INTERFACE_COMPONENTS},
50                           serialID{serialCounter++}, modes{}
51         {
52                 ASSERT(insns.size() > 0);
53
54                 // Simplifying assumptions (to be satisfied by earlier transformations)
55                 // - There is exactly one entrypoint in the module, and it's the one we want
56                 // - The only input/output OpVariables present are those used by the entrypoint
57
58                 Block::ID currentBlock;
59                 InsnIterator blockStart;
60
61                 for (auto insn : *this)
62                 {
63                         switch (insn.opcode())
64                         {
65                         case spv::OpExecutionMode:
66                                 ProcessExecutionMode(insn);
67                                 break;
68
69                         case spv::OpDecorate:
70                         {
71                                 TypeOrObjectID targetId = insn.word(1);
72                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
73                                 decorations[targetId].Apply(
74                                                 decoration,
75                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
76
77                                 if (decoration == spv::DecorationCentroid)
78                                         modes.NeedsCentroid = true;
79                                 break;
80                         }
81
82                         case spv::OpMemberDecorate:
83                         {
84                                 Type::ID targetId = insn.word(1);
85                                 auto memberIndex = insn.word(2);
86                                 auto &d = memberDecorations[targetId];
87                                 if (memberIndex >= d.size())
88                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
89                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
90                                 d[memberIndex].Apply(
91                                                 decoration,
92                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
93
94                                 if (decoration == spv::DecorationCentroid)
95                                         modes.NeedsCentroid = true;
96                                 break;
97                         }
98
99                         case spv::OpDecorationGroup:
100                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
101                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
102                                 // we might think about introducing this as a real Object.
103                                 break;
104
105                         case spv::OpGroupDecorate:
106                         {
107                                 auto const &srcDecorations = decorations[insn.word(1)];
108                                 for (auto i = 2u; i < insn.wordCount(); i++)
109                                 {
110                                         // remaining operands are targets to apply the group to.
111                                         decorations[insn.word(i)].Apply(srcDecorations);
112                                 }
113                                 break;
114                         }
115
116                         case spv::OpGroupMemberDecorate:
117                         {
118                                 auto const &srcDecorations = decorations[insn.word(1)];
119                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
120                                 {
121                                         // remaining operands are pairs of <id>, literal for members to apply to.
122                                         auto &d = memberDecorations[insn.word(i)];
123                                         auto memberIndex = insn.word(i + 1);
124                                         if (memberIndex >= d.size())
125                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
126                                         d[memberIndex].Apply(srcDecorations);
127                                 }
128                                 break;
129                         }
130
131                         case spv::OpLabel:
132                         {
133                                 ASSERT(currentBlock.value() == 0);
134                                 currentBlock = Block::ID(insn.word(1));
135                                 blockStart = insn;
136                                 break;
137                         }
138
139                         // Branch Instructions (subset of Termination Instructions):
140                         case spv::OpBranch:
141                         case spv::OpBranchConditional:
142                         case spv::OpSwitch:
143                         case spv::OpReturn:
144                         // fallthrough
145
146                         // Termination instruction:
147                         case spv::OpKill:
148                         case spv::OpUnreachable:
149                         {
150                                 ASSERT(currentBlock.value() != 0);
151                                 auto blockEnd = insn; blockEnd++;
152                                 blocks[currentBlock] = Block(blockStart, blockEnd);
153                                 currentBlock = Block::ID(0);
154
155                                 if (insn.opcode() == spv::OpKill)
156                                 {
157                                         modes.ContainsKill = true;
158                                 }
159                                 break;
160                         }
161
162                         case spv::OpLoopMerge:
163                         case spv::OpSelectionMerge:
164                                 break; // Nothing to do in analysis pass.
165
166                         case spv::OpTypeVoid:
167                         case spv::OpTypeBool:
168                         case spv::OpTypeInt:
169                         case spv::OpTypeFloat:
170                         case spv::OpTypeVector:
171                         case spv::OpTypeMatrix:
172                         case spv::OpTypeImage:
173                         case spv::OpTypeSampler:
174                         case spv::OpTypeSampledImage:
175                         case spv::OpTypeArray:
176                         case spv::OpTypeRuntimeArray:
177                         case spv::OpTypeStruct:
178                         case spv::OpTypePointer:
179                         case spv::OpTypeFunction:
180                                 DeclareType(insn);
181                                 break;
182
183                         case spv::OpVariable:
184                         {
185                                 Type::ID typeId = insn.word(1);
186                                 Object::ID resultId = insn.word(2);
187                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
188                                 if (insn.wordCount() > 4)
189                                         UNIMPLEMENTED("Variable initializers not yet supported");
190
191                                 auto &object = defs[resultId];
192                                 object.kind = Object::Kind::Variable;
193                                 object.definition = insn;
194                                 object.type = typeId;
195                                 object.pointerBase = insn.word(2);      // base is itself
196
197                                 ASSERT(getType(typeId).storageClass == storageClass);
198
199                                 switch (storageClass)
200                                 {
201                                 case spv::StorageClassInput:
202                                 case spv::StorageClassOutput:
203                                         ProcessInterfaceVariable(object);
204                                         break;
205                                 case spv::StorageClassUniform:
206                                 case spv::StorageClassStorageBuffer:
207                                 case spv::StorageClassPushConstant:
208                                         object.kind = Object::Kind::PhysicalPointer;
209                                         break;
210
211                                 case spv::StorageClassPrivate:
212                                 case spv::StorageClassFunction:
213                                         break; // Correctly handled.
214
215                                 case spv::StorageClassUniformConstant:
216                                 case spv::StorageClassWorkgroup:
217                                 case spv::StorageClassCrossWorkgroup:
218                                 case spv::StorageClassGeneric:
219                                 case spv::StorageClassAtomicCounter:
220                                 case spv::StorageClassImage:
221                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
222                                         break;
223
224                                 default:
225                                         UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
226                                         break;
227                                 }
228                                 break;
229                         }
230
231                         case spv::OpConstant:
232                                 CreateConstant(insn).constantValue[0] = insn.word(3);
233                                 break;
234                         case spv::OpConstantFalse:
235                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
236                                 break;
237                         case spv::OpConstantTrue:
238                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
239                                 break;
240                         case spv::OpConstantNull:
241                         case spv::OpUndef:
242                         {
243                                 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
244                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
245                                 auto &object = CreateConstant(insn);
246                                 auto &objectTy = getType(object.type);
247                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
248                                 {
249                                         object.constantValue[i] = 0;
250                                 }
251                                 break;
252                         }
253                         case spv::OpConstantComposite:
254                         {
255                                 auto &object = CreateConstant(insn);
256                                 auto offset = 0u;
257                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
258                                 {
259                                         auto &constituent = getObject(insn.word(i + 3));
260                                         auto &constituentTy = getType(constituent.type);
261                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
262                                                 object.constantValue[offset++] = constituent.constantValue[j];
263                                 }
264
265                                 auto objectId = Object::ID(insn.word(2));
266                                 auto decorationsIt = decorations.find(objectId);
267                                 if (decorationsIt != decorations.end() &&
268                                         decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
269                                 {
270                                         // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
271                                         // Decorating an object with the WorkgroupSize built-in
272                                         // decoration will make that object contain the dimensions
273                                         // of a local workgroup. If an object is decorated with the
274                                         // WorkgroupSize decoration, this must take precedence over
275                                         // any execution mode set for LocalSize.
276                                         // The object decorated with WorkgroupSize must be declared
277                                         // as a three-component vector of 32-bit integers.
278                                         ASSERT(getType(object.type).sizeInComponents == 3);
279                                         modes.WorkgroupSizeX = object.constantValue[0];
280                                         modes.WorkgroupSizeY = object.constantValue[1];
281                                         modes.WorkgroupSizeZ = object.constantValue[2];
282                                 }
283                                 break;
284                         }
285
286                         case spv::OpCapability:
287                                 break; // Various capabilities will be declared, but none affect our code generation at this point.
288                         case spv::OpMemoryModel:
289                                 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
290
291                         case spv::OpEntryPoint:
292                                 break;
293                         case spv::OpFunction:
294                                 ASSERT(mainBlockId.value() == 0); // Multiple functions found
295                                 // Scan forward to find the function's label.
296                                 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
297                                 {
298                                         switch (it.opcode())
299                                         {
300                                         case spv::OpFunction:
301                                         case spv::OpFunctionParameter:
302                                                 break;
303                                         case spv::OpLabel:
304                                                 mainBlockId = Block::ID(it.word(1));
305                                                 break;
306                                         default:
307                                                 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
308                                         }
309                                 }
310                                 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
311                                 break;
312                         case spv::OpFunctionEnd:
313                                 // Due to preprocessing, the entrypoint and its function provide no value.
314                                 break;
315                         case spv::OpExtInstImport:
316                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
317                                 // Valid shaders will not attempt to import any other instruction sets.
318                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
319                                 {
320                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
321                                 }
322                                 break;
323                         case spv::OpName:
324                         case spv::OpMemberName:
325                         case spv::OpSource:
326                         case spv::OpSourceContinued:
327                         case spv::OpSourceExtension:
328                         case spv::OpLine:
329                         case spv::OpNoLine:
330                         case spv::OpModuleProcessed:
331                         case spv::OpString:
332                                 // No semantic impact
333                                 break;
334
335                         case spv::OpFunctionParameter:
336                         case spv::OpFunctionCall:
337                         case spv::OpSpecConstant:
338                         case spv::OpSpecConstantComposite:
339                         case spv::OpSpecConstantFalse:
340                         case spv::OpSpecConstantOp:
341                         case spv::OpSpecConstantTrue:
342                                 // These should have all been removed by preprocessing passes. If we see them here,
343                                 // our assumptions are wrong and we will probably generate wrong code.
344                                 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
345                                 break;
346
347                         case spv::OpFConvert:
348                         case spv::OpSConvert:
349                         case spv::OpUConvert:
350                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
351                                 break;
352
353                         case spv::OpLoad:
354                         case spv::OpAccessChain:
355                         case spv::OpInBoundsAccessChain:
356                         case spv::OpCompositeConstruct:
357                         case spv::OpCompositeInsert:
358                         case spv::OpCompositeExtract:
359                         case spv::OpVectorShuffle:
360                         case spv::OpVectorTimesScalar:
361                         case spv::OpMatrixTimesScalar:
362                         case spv::OpMatrixTimesVector:
363                         case spv::OpVectorTimesMatrix:
364                         case spv::OpMatrixTimesMatrix:
365                         case spv::OpVectorExtractDynamic:
366                         case spv::OpVectorInsertDynamic:
367                         case spv::OpNot: // Unary ops
368                         case spv::OpSNegate:
369                         case spv::OpFNegate:
370                         case spv::OpLogicalNot:
371                         case spv::OpIAdd: // Binary ops
372                         case spv::OpISub:
373                         case spv::OpIMul:
374                         case spv::OpSDiv:
375                         case spv::OpUDiv:
376                         case spv::OpFAdd:
377                         case spv::OpFSub:
378                         case spv::OpFMul:
379                         case spv::OpFDiv:
380                         case spv::OpFMod:
381                         case spv::OpFRem:
382                         case spv::OpFOrdEqual:
383                         case spv::OpFUnordEqual:
384                         case spv::OpFOrdNotEqual:
385                         case spv::OpFUnordNotEqual:
386                         case spv::OpFOrdLessThan:
387                         case spv::OpFUnordLessThan:
388                         case spv::OpFOrdGreaterThan:
389                         case spv::OpFUnordGreaterThan:
390                         case spv::OpFOrdLessThanEqual:
391                         case spv::OpFUnordLessThanEqual:
392                         case spv::OpFOrdGreaterThanEqual:
393                         case spv::OpFUnordGreaterThanEqual:
394                         case spv::OpSMod:
395                         case spv::OpSRem:
396                         case spv::OpUMod:
397                         case spv::OpIEqual:
398                         case spv::OpINotEqual:
399                         case spv::OpUGreaterThan:
400                         case spv::OpSGreaterThan:
401                         case spv::OpUGreaterThanEqual:
402                         case spv::OpSGreaterThanEqual:
403                         case spv::OpULessThan:
404                         case spv::OpSLessThan:
405                         case spv::OpULessThanEqual:
406                         case spv::OpSLessThanEqual:
407                         case spv::OpShiftRightLogical:
408                         case spv::OpShiftRightArithmetic:
409                         case spv::OpShiftLeftLogical:
410                         case spv::OpBitwiseOr:
411                         case spv::OpBitwiseXor:
412                         case spv::OpBitwiseAnd:
413                         case spv::OpLogicalOr:
414                         case spv::OpLogicalAnd:
415                         case spv::OpLogicalEqual:
416                         case spv::OpLogicalNotEqual:
417                         case spv::OpUMulExtended:
418                         case spv::OpSMulExtended:
419                         case spv::OpDot:
420                         case spv::OpConvertFToU:
421                         case spv::OpConvertFToS:
422                         case spv::OpConvertSToF:
423                         case spv::OpConvertUToF:
424                         case spv::OpBitcast:
425                         case spv::OpSelect:
426                         case spv::OpExtInst:
427                         case spv::OpIsInf:
428                         case spv::OpIsNan:
429                         case spv::OpAny:
430                         case spv::OpAll:
431                         case spv::OpDPdx:
432                         case spv::OpDPdxCoarse:
433                         case spv::OpDPdy:
434                         case spv::OpDPdyCoarse:
435                         case spv::OpFwidth:
436                         case spv::OpFwidthCoarse:
437                         case spv::OpDPdxFine:
438                         case spv::OpDPdyFine:
439                         case spv::OpFwidthFine:
440                         case spv::OpAtomicLoad:
441                         case spv::OpPhi:
442                                 // Instructions that yield an intermediate value
443                         {
444                                 Type::ID typeId = insn.word(1);
445                                 Object::ID resultId = insn.word(2);
446                                 auto &object = defs[resultId];
447                                 object.type = typeId;
448                                 object.kind = Object::Kind::Value;
449                                 object.definition = insn;
450
451                                 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
452                                 {
453                                         // interior ptr has two parts:
454                                         // - logical base ptr, common across all lanes and known at compile time
455                                         // - per-lane offset
456                                         Object::ID baseId = insn.word(3);
457                                         object.pointerBase = getObject(baseId).pointerBase;
458                                 }
459                                 break;
460                         }
461
462                         case spv::OpStore:
463                         case spv::OpAtomicStore:
464                                 // Don't need to do anything during analysis pass
465                                 break;
466
467                         default:
468                                 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
469                         }
470                 }
471
472                 // Assign all Block::ins
473                 for (auto &it : blocks)
474                 {
475                         auto &blockId = it.first;
476                         auto &block = it.second;
477                         for (auto &outId : block.outs)
478                         {
479                                 auto outIt = blocks.find(outId);
480                                 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
481                                 auto &out = outIt->second;
482                                 out.ins.emplace(blockId);
483                         }
484                 }
485         }
486
487         void SpirvShader::DeclareType(InsnIterator insn)
488         {
489                 Type::ID resultId = insn.word(1);
490
491                 auto &type = types[resultId];
492                 type.definition = insn;
493                 type.sizeInComponents = ComputeTypeSize(insn);
494
495                 // A structure is a builtin block if it has a builtin
496                 // member. All members of such a structure are builtins.
497                 switch (insn.opcode())
498                 {
499                 case spv::OpTypeStruct:
500                 {
501                         auto d = memberDecorations.find(resultId);
502                         if (d != memberDecorations.end())
503                         {
504                                 for (auto &m : d->second)
505                                 {
506                                         if (m.HasBuiltIn)
507                                         {
508                                                 type.isBuiltInBlock = true;
509                                                 break;
510                                         }
511                                 }
512                         }
513                         break;
514                 }
515                 case spv::OpTypePointer:
516                 {
517                         Type::ID elementTypeId = insn.word(3);
518                         type.element = elementTypeId;
519                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
520                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
521                         break;
522                 }
523                 case spv::OpTypeVector:
524                 case spv::OpTypeMatrix:
525                 case spv::OpTypeArray:
526                 case spv::OpTypeRuntimeArray:
527                 {
528                         Type::ID elementTypeId = insn.word(2);
529                         type.element = elementTypeId;
530                         break;
531                 }
532                 default:
533                         break;
534                 }
535         }
536
537         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
538         {
539                 Type::ID typeId = insn.word(1);
540                 Object::ID resultId = insn.word(2);
541                 auto &object = defs[resultId];
542                 auto &objectTy = getType(typeId);
543                 object.type = typeId;
544                 object.kind = Object::Kind::Constant;
545                 object.definition = insn;
546                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
547                 return object;
548         }
549
550         void SpirvShader::ProcessInterfaceVariable(Object &object)
551         {
552                 auto &objectTy = getType(object.type);
553                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
554
555                 ASSERT(objectTy.opcode() == spv::OpTypePointer);
556                 auto pointeeTy = getType(objectTy.element);
557
558                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
559                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
560
561                 ASSERT(object.opcode() == spv::OpVariable);
562                 Object::ID resultId = object.definition.word(2);
563
564                 if (objectTy.isBuiltInBlock)
565                 {
566                         // walk the builtin block, registering each of its members separately.
567                         auto m = memberDecorations.find(objectTy.element);
568                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
569                         auto &structType = pointeeTy.definition;
570                         auto offset = 0u;
571                         auto word = 2u;
572                         for (auto &member : m->second)
573                         {
574                                 auto &memberType = getType(structType.word(word));
575
576                                 if (member.HasBuiltIn)
577                                 {
578                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
579                                 }
580
581                                 offset += memberType.sizeInComponents;
582                                 ++word;
583                         }
584                         return;
585                 }
586
587                 auto d = decorations.find(resultId);
588                 if (d != decorations.end() && d->second.HasBuiltIn)
589                 {
590                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
591                 }
592                 else
593                 {
594                         object.kind = Object::Kind::InterfaceVariable;
595                         VisitInterface(resultId,
596                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
597                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
598                                                            auto scalarSlot = (d.Location << 2) | d.Component;
599                                                            ASSERT(scalarSlot >= 0 &&
600                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
601
602                                                            auto &slot = userDefinedInterface[scalarSlot];
603                                                            slot.Type = type;
604                                                            slot.Flat = d.Flat;
605                                                            slot.NoPerspective = d.NoPerspective;
606                                                            slot.Centroid = d.Centroid;
607                                                    });
608                 }
609         }
610
611         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
612         {
613                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
614                 switch (mode)
615                 {
616                 case spv::ExecutionModeEarlyFragmentTests:
617                         modes.EarlyFragmentTests = true;
618                         break;
619                 case spv::ExecutionModeDepthReplacing:
620                         modes.DepthReplacing = true;
621                         break;
622                 case spv::ExecutionModeDepthGreater:
623                         modes.DepthGreater = true;
624                         break;
625                 case spv::ExecutionModeDepthLess:
626                         modes.DepthLess = true;
627                         break;
628                 case spv::ExecutionModeDepthUnchanged:
629                         modes.DepthUnchanged = true;
630                         break;
631                 case spv::ExecutionModeLocalSize:
632                         modes.WorkgroupSizeX = insn.word(3);
633                         modes.WorkgroupSizeY = insn.word(4);
634                         modes.WorkgroupSizeZ = insn.word(5);
635                         break;
636                 case spv::ExecutionModeOriginUpperLeft:
637                         // This is always the case for a Vulkan shader. Do nothing.
638                         break;
639                 default:
640                         UNIMPLEMENTED("No other execution modes are permitted");
641                 }
642         }
643
644         uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
645         {
646                 // Types are always built from the bottom up (with the exception of forward ptrs, which
647                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
648                 // already been described (and so their sizes determined)
649                 switch (insn.opcode())
650                 {
651                 case spv::OpTypeVoid:
652                 case spv::OpTypeSampler:
653                 case spv::OpTypeImage:
654                 case spv::OpTypeSampledImage:
655                 case spv::OpTypeFunction:
656                 case spv::OpTypeRuntimeArray:
657                         // Objects that don't consume any space.
658                         // Descriptor-backed objects currently only need exist at compile-time.
659                         // Runtime arrays don't appear in places where their size would be interesting
660                         return 0;
661
662                 case spv::OpTypeBool:
663                 case spv::OpTypeFloat:
664                 case spv::OpTypeInt:
665                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
666                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
667                         return 1;
668
669                 case spv::OpTypeVector:
670                 case spv::OpTypeMatrix:
671                         // Vectors and matrices both consume element count * element size.
672                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
673
674                 case spv::OpTypeArray:
675                 {
676                         // Element count * element size. Array sizes come from constant ids.
677                         auto arraySize = GetConstantInt(insn.word(3));
678                         return getType(insn.word(2)).sizeInComponents * arraySize;
679                 }
680
681                 case spv::OpTypeStruct:
682                 {
683                         uint32_t size = 0;
684                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
685                         {
686                                 size += getType(insn.word(i)).sizeInComponents;
687                         }
688                         return size;
689                 }
690
691                 case spv::OpTypePointer:
692                         // Runtime representation of a pointer is a per-lane index.
693                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
694                         return 1;
695
696                 default:
697                         // Some other random insn.
698                         UNIMPLEMENTED("Only types are supported");
699                         return 0;
700                 }
701         }
702
703         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
704         {
705                 switch (storageClass)
706                 {
707                 case spv::StorageClassUniform:
708                 case spv::StorageClassStorageBuffer:
709                 case spv::StorageClassPushConstant:
710                         return false;
711                 default:
712                         return true;
713                 }
714         }
715
716         template<typename F>
717         int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
718         {
719                 // Recursively walks variable definition and its type tree, taking into account
720                 // any explicit Location or Component decorations encountered; where explicit
721                 // Locations or Components are not specified, assigns them sequentially.
722                 // Collected decorations are carried down toward the leaves and across
723                 // siblings; Effect of decorations intentionally does not flow back up the tree.
724                 //
725                 // F is a functor to be called with the effective decoration set for every component.
726                 //
727                 // Returns the next available location, and calls f().
728
729                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
730
731                 ApplyDecorationsForId(&d, id);
732
733                 auto const &obj = getType(id);
734                 switch(obj.opcode())
735                 {
736                 case spv::OpTypePointer:
737                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
738                 case spv::OpTypeMatrix:
739                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
740                         {
741                                 // consumes same components of N consecutive locations
742                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
743                         }
744                         return d.Location;
745                 case spv::OpTypeVector:
746                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
747                         {
748                                 // consumes N consecutive components in the same location
749                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
750                         }
751                         return d.Location + 1;
752                 case spv::OpTypeFloat:
753                         f(d, ATTRIBTYPE_FLOAT);
754                         return d.Location + 1;
755                 case spv::OpTypeInt:
756                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
757                         return d.Location + 1;
758                 case spv::OpTypeBool:
759                         f(d, ATTRIBTYPE_UINT);
760                         return d.Location + 1;
761                 case spv::OpTypeStruct:
762                 {
763                         // iterate over members, which may themselves have Location/Component decorations
764                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
765                         {
766                                 ApplyDecorationsForIdMember(&d, id, i);
767                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
768                                 d.Component = 0;    // Implicit locations always have component=0
769                         }
770                         return d.Location;
771                 }
772                 case spv::OpTypeArray:
773                 {
774                         auto arraySize = GetConstantInt(obj.definition.word(3));
775                         for (auto i = 0u; i < arraySize; i++)
776                         {
777                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
778                         }
779                         return d.Location;
780                 }
781                 default:
782                         // Intentionally partial; most opcodes do not participate in type hierarchies
783                         return 0;
784                 }
785         }
786
787         template<typename F>
788         void SpirvShader::VisitInterface(Object::ID id, F f) const
789         {
790                 // Walk a variable definition and call f for each component in it.
791                 Decorations d{};
792                 ApplyDecorationsForId(&d, id);
793
794                 auto def = getObject(id).definition;
795                 ASSERT(def.opcode() == spv::OpVariable);
796                 VisitInterfaceInner<F>(def.word(1), d, f);
797         }
798
799         SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
800         {
801                 // Produce a offset into external memory in sizeof(float) units
802
803                 int constantOffset = 0;
804                 SIMD::Int dynamicOffset = SIMD::Int(0);
805                 auto &baseObject = getObject(id);
806                 Type::ID typeId = getType(baseObject.type).element;
807                 Decorations d{};
808                 ApplyDecorationsForId(&d, baseObject.type);
809
810                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
811                 // Start with its offset and build from there.
812                 if (baseObject.kind == Object::Kind::Value)
813                 {
814                         dynamicOffset += routine->getIntermediate(id).Int(0);
815                 }
816
817                 for (auto i = 0u; i < numIndexes; i++)
818                 {
819                         auto & type = getType(typeId);
820                         switch (type.definition.opcode())
821                         {
822                         case spv::OpTypeStruct:
823                         {
824                                 int memberIndex = GetConstantInt(indexIds[i]);
825                                 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
826                                 ASSERT(d.HasOffset);
827                                 constantOffset += d.Offset / sizeof(float);
828                                 typeId = type.definition.word(2u + memberIndex);
829                                 break;
830                         }
831                         case spv::OpTypeArray:
832                         case spv::OpTypeRuntimeArray:
833                         {
834                                 // TODO: b/127950082: Check bounds.
835                                 ApplyDecorationsForId(&d, typeId);
836                                 ASSERT(d.HasArrayStride);
837                                 auto & obj = getObject(indexIds[i]);
838                                 if (obj.kind == Object::Kind::Constant)
839                                         constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
840                                 else
841                                         dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
842                                 typeId = type.element;
843                                 break;
844                         }
845                         case spv::OpTypeMatrix:
846                         {
847                                 // TODO: b/127950082: Check bounds.
848                                 ApplyDecorationsForId(&d, typeId);
849                                 ASSERT(d.HasMatrixStride);
850                                 auto & obj = getObject(indexIds[i]);
851                                 if (obj.kind == Object::Kind::Constant)
852                                         constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
853                                 else
854                                         dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
855                                 typeId = type.element;
856                                 break;
857                         }
858                         case spv::OpTypeVector:
859                         {
860                                 auto & obj = getObject(indexIds[i]);
861                                 if (obj.kind == Object::Kind::Constant)
862                                         constantOffset += GetConstantInt(indexIds[i]);
863                                 else
864                                         dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
865                                 typeId = type.element;
866                                 break;
867                         }
868                         default:
869                                 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
870                         }
871                 }
872
873                 return dynamicOffset + SIMD::Int(constantOffset);
874         }
875
876         SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
877         {
878                 // TODO: avoid doing per-lane work in some cases if we can?
879                 // Produce a *component* offset into location-oriented memory
880
881                 int constantOffset = 0;
882                 SIMD::Int dynamicOffset = SIMD::Int(0);
883                 auto &baseObject = getObject(id);
884                 Type::ID typeId = getType(baseObject.type).element;
885
886                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
887                 // Start with its offset and build from there.
888                 if (baseObject.kind == Object::Kind::Value)
889                 {
890                         dynamicOffset += routine->getIntermediate(id).Int(0);
891                 }
892
893                 for (auto i = 0u; i < numIndexes; i++)
894                 {
895                         auto & type = getType(typeId);
896                         switch(type.opcode())
897                         {
898                         case spv::OpTypeStruct:
899                         {
900                                 int memberIndex = GetConstantInt(indexIds[i]);
901                                 int offsetIntoStruct = 0;
902                                 for (auto j = 0; j < memberIndex; j++) {
903                                         auto memberType = type.definition.word(2u + j);
904                                         offsetIntoStruct += getType(memberType).sizeInComponents;
905                                 }
906                                 constantOffset += offsetIntoStruct;
907                                 typeId = type.definition.word(2u + memberIndex);
908                                 break;
909                         }
910
911                         case spv::OpTypeVector:
912                         case spv::OpTypeMatrix:
913                         case spv::OpTypeArray:
914                         case spv::OpTypeRuntimeArray:
915                         {
916                                 // TODO: b/127950082: Check bounds.
917                                 auto stride = getType(type.element).sizeInComponents;
918                                 auto & obj = getObject(indexIds[i]);
919                                 if (obj.kind == Object::Kind::Constant)
920                                         constantOffset += stride * GetConstantInt(indexIds[i]);
921                                 else
922                                         dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
923                                 typeId = type.element;
924                                 break;
925                         }
926
927                         default:
928                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
929                         }
930                 }
931
932                 return dynamicOffset + SIMD::Int(constantOffset);
933         }
934
935         uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
936         {
937                 uint32_t constantOffset = 0;
938
939                 for (auto i = 0u; i < numIndexes; i++)
940                 {
941                         auto & type = getType(typeId);
942                         switch(type.opcode())
943                         {
944                         case spv::OpTypeStruct:
945                         {
946                                 int memberIndex = indexes[i];
947                                 int offsetIntoStruct = 0;
948                                 for (auto j = 0; j < memberIndex; j++) {
949                                         auto memberType = type.definition.word(2u + j);
950                                         offsetIntoStruct += getType(memberType).sizeInComponents;
951                                 }
952                                 constantOffset += offsetIntoStruct;
953                                 typeId = type.definition.word(2u + memberIndex);
954                                 break;
955                         }
956
957                         case spv::OpTypeVector:
958                         case spv::OpTypeMatrix:
959                         case spv::OpTypeArray:
960                         {
961                                 auto elementType = type.definition.word(2);
962                                 auto stride = getType(elementType).sizeInComponents;
963                                 constantOffset += stride * indexes[i];
964                                 typeId = elementType;
965                                 break;
966                         }
967
968                         default:
969                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
970                         }
971                 }
972
973                 return constantOffset;
974         }
975
976         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
977         {
978                 switch (decoration)
979                 {
980                 case spv::DecorationLocation:
981                         HasLocation = true;
982                         Location = static_cast<int32_t>(arg);
983                         break;
984                 case spv::DecorationComponent:
985                         HasComponent = true;
986                         Component = arg;
987                         break;
988                 case spv::DecorationDescriptorSet:
989                         HasDescriptorSet = true;
990                         DescriptorSet = arg;
991                         break;
992                 case spv::DecorationBinding:
993                         HasBinding = true;
994                         Binding = arg;
995                         break;
996                 case spv::DecorationBuiltIn:
997                         HasBuiltIn = true;
998                         BuiltIn = static_cast<spv::BuiltIn>(arg);
999                         break;
1000                 case spv::DecorationFlat:
1001                         Flat = true;
1002                         break;
1003                 case spv::DecorationNoPerspective:
1004                         NoPerspective = true;
1005                         break;
1006                 case spv::DecorationCentroid:
1007                         Centroid = true;
1008                         break;
1009                 case spv::DecorationBlock:
1010                         Block = true;
1011                         break;
1012                 case spv::DecorationBufferBlock:
1013                         BufferBlock = true;
1014                         break;
1015                 case spv::DecorationOffset:
1016                         HasOffset = true;
1017                         Offset = static_cast<int32_t>(arg);
1018                         break;
1019                 case spv::DecorationArrayStride:
1020                         HasArrayStride = true;
1021                         ArrayStride = static_cast<int32_t>(arg);
1022                         break;
1023                 case spv::DecorationMatrixStride:
1024                         HasMatrixStride = true;
1025                         MatrixStride = static_cast<int32_t>(arg);
1026                         break;
1027                 default:
1028                         // Intentionally partial, there are many decorations we just don't care about.
1029                         break;
1030                 }
1031         }
1032
1033         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1034         {
1035                 // Apply a decoration group to this set of decorations
1036                 if (src.HasBuiltIn)
1037                 {
1038                         HasBuiltIn = true;
1039                         BuiltIn = src.BuiltIn;
1040                 }
1041
1042                 if (src.HasLocation)
1043                 {
1044                         HasLocation = true;
1045                         Location = src.Location;
1046                 }
1047
1048                 if (src.HasComponent)
1049                 {
1050                         HasComponent = true;
1051                         Component = src.Component;
1052                 }
1053
1054                 if (src.HasDescriptorSet)
1055                 {
1056                         HasDescriptorSet = true;
1057                         DescriptorSet = src.DescriptorSet;
1058                 }
1059
1060                 if (src.HasBinding)
1061                 {
1062                         HasBinding = true;
1063                         Binding = src.Binding;
1064                 }
1065
1066                 if (src.HasOffset)
1067                 {
1068                         HasOffset = true;
1069                         Offset = src.Offset;
1070                 }
1071
1072                 if (src.HasArrayStride)
1073                 {
1074                         HasArrayStride = true;
1075                         ArrayStride = src.ArrayStride;
1076                 }
1077
1078                 if (src.HasMatrixStride)
1079                 {
1080                         HasMatrixStride = true;
1081                         MatrixStride = src.MatrixStride;
1082                 }
1083
1084                 Flat |= src.Flat;
1085                 NoPerspective |= src.NoPerspective;
1086                 Centroid |= src.Centroid;
1087                 Block |= src.Block;
1088                 BufferBlock |= src.BufferBlock;
1089         }
1090
1091         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1092         {
1093                 auto it = decorations.find(id);
1094                 if (it != decorations.end())
1095                         d->Apply(it->second);
1096         }
1097
1098         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1099         {
1100                 auto it = memberDecorations.find(id);
1101                 if (it != memberDecorations.end() && member < it->second.size())
1102                 {
1103                         d->Apply(it->second[member]);
1104                 }
1105         }
1106
1107         uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1108         {
1109                 // Slightly hackish access to constants very early in translation.
1110                 // General consumption of constants by other instructions should
1111                 // probably be just lowered to Reactor.
1112
1113                 // TODO: not encountered yet since we only use this for array sizes etc,
1114                 // but is possible to construct integer constant 0 via OpConstantNull.
1115                 auto insn = getObject(id).definition;
1116                 ASSERT(insn.opcode() == spv::OpConstant);
1117                 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1118                 return insn.word(3);
1119         }
1120
1121         // emit-time
1122
1123         void SpirvShader::emitProlog(SpirvRoutine *routine) const
1124         {
1125                 for (auto insn : *this)
1126                 {
1127                         switch (insn.opcode())
1128                         {
1129                         case spv::OpVariable:
1130                         {
1131                                 Type::ID resultPointerTypeId = insn.word(1);
1132                                 auto resultPointerType = getType(resultPointerTypeId);
1133                                 auto pointeeType = getType(resultPointerType.element);
1134
1135                                 if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
1136                                 {
1137                                         Object::ID resultId = insn.word(2);
1138                                         routine->createLvalue(resultId, pointeeType.sizeInComponents);
1139                                 }
1140                                 break;
1141                         }
1142                         default:
1143                                 // Nothing else produces interface variables, so can all be safely ignored.
1144                                 break;
1145                         }
1146                 }
1147         }
1148
1149         void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1150         {
1151                 EmitState state;
1152                 state.setActiveLaneMask(activeLaneMask);
1153                 state.routine = routine;
1154
1155                 // Emit everything up to the first label
1156                 // TODO: Separate out dispatch of block from non-block instructions?
1157                 for (auto insn : *this)
1158                 {
1159                         if (insn.opcode() == spv::OpLabel)
1160                         {
1161                                 break;
1162                         }
1163                         EmitInstruction(insn, &state);
1164                 }
1165
1166                 // Emit all the blocks in BFS order, starting with the main block.
1167                 std::queue<Block::ID> pending;
1168                 pending.push(mainBlockId);
1169                 while (pending.size() > 0)
1170                 {
1171                         auto id = pending.front();
1172                         pending.pop();
1173                         if (state.visited.count(id) == 0)
1174                         {
1175                                 EmitBlock(id, &state);
1176                                 for (auto it : getBlock(id).outs)
1177                                 {
1178                                         pending.push(it);
1179                                 }
1180                         }
1181                 }
1182         }
1183
1184         void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1185         {
1186                 if (state->visited.count(id) > 0)
1187                 {
1188                         return; // Already processed this block.
1189                 }
1190
1191                 state->visited.emplace(id);
1192
1193                 auto &block = getBlock(id);
1194
1195                 switch (block.kind)
1196                 {
1197                         case Block::Simple:
1198                         case Block::StructuredBranchConditional:
1199                         case Block::UnstructuredBranchConditional:
1200                         case Block::StructuredSwitch:
1201                         case Block::UnstructuredSwitch:
1202                                 if (id != mainBlockId)
1203                                 {
1204                                         // Emit all preceding blocks and set the activeLaneMask.
1205                                         Intermediate activeLaneMask(1);
1206                                         activeLaneMask.move(0, SIMD::Int(0));
1207                                         for (auto in : block.ins)
1208                                         {
1209                                                 EmitBlock(in, state);
1210                                                 auto inMask = state->getActiveLaneMaskEdge(in, id);
1211                                                 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1212                                         }
1213                                         state->setActiveLaneMask(activeLaneMask.Int(0));
1214                                 }
1215                                 state->currentBlock = id;
1216                                 EmitInstructions(block.begin(), block.end(), state);
1217                                 break;
1218
1219                         case Block::Loop:
1220                                 state->currentBlock = id;
1221                                 EmitLoop(state);
1222                                 break;
1223
1224                         default:
1225                                 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
1226                 }
1227         }
1228
1229         void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1230         {
1231                 for (auto insn = begin; insn != end; insn++)
1232                 {
1233                         auto res = EmitInstruction(insn, state);
1234                         switch (res)
1235                         {
1236                         case EmitResult::Continue:
1237                                 continue;
1238                         case EmitResult::Terminator:
1239                                 break;
1240                         default:
1241                                 UNREACHABLE("Unexpected EmitResult %d", int(res));
1242                                 break;
1243                         }
1244                 }
1245         }
1246
1247         void SpirvShader::EmitLoop(EmitState *state) const
1248         {
1249                 auto blockId = state->currentBlock;
1250                 auto block = getBlock(blockId);
1251
1252                 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
1253                 // This is initialized with the incoming active lane masks.
1254                 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
1255                 for (auto in : block.ins)
1256                 {
1257                         if (!existsPath(blockId, in)) // if not a loop back edge
1258                         {
1259                                 EmitBlock(in, state);
1260                                 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1261                         }
1262                 }
1263
1264                 // Generate an alloca for each of the loop's phis.
1265                 // These will be primed with the incoming, non back edge Phi values
1266                 // before the loop, and then updated just before the loop jumps back to
1267                 // the block.
1268                 struct LoopPhi
1269                 {
1270                         Object::ID phiId; // The Phi identifier.
1271                         Object::ID continueValue; // The source merge value from the loop.
1272                         Array<SIMD::Int> storage; // The alloca.
1273                 };
1274
1275                 std::vector<LoopPhi> phis;
1276
1277                 // For each OpPhi between the block start and the merge instruction:
1278                 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
1279                 {
1280                         if (insn.opcode() == spv::OpPhi)
1281                         {
1282                                 auto objectId = Object::ID(insn.word(2));
1283                                 auto &object = getObject(objectId);
1284                                 auto &type = getType(object.type);
1285
1286                                 LoopPhi phi;
1287                                 phi.phiId = Object::ID(insn.word(2));
1288                                 phi.storage = Array<SIMD::Int>(type.sizeInComponents);
1289
1290                                 // Start with the Phi set to 0.
1291                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1292                                 {
1293                                         phi.storage[i] = SIMD::Int(0);
1294                                 }
1295
1296                                 // For each Phi source:
1297                                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
1298                                 {
1299                                         auto varId = Object::ID(insn.word(w + 0));
1300                                         auto blockId = Block::ID(insn.word(w + 1));
1301                                         if (existsPath(state->currentBlock, blockId))
1302                                         {
1303                                                 // This source is from a loop back-edge.
1304                                                 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
1305                                                 phi.continueValue = varId;
1306                                         }
1307                                         else
1308                                         {
1309                                                 // This source is from a preceding block.
1310                                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1311                                                 {
1312                                                         auto in = GenericValue(this, state->routine, varId);
1313                                                         auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
1314                                                         phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
1315                                                 }
1316                                         }
1317                                 }
1318
1319                                 phis.push_back(phi);
1320                         }
1321                 }
1322
1323                 // Create the loop basic blocks
1324                 auto headerBasicBlock = Nucleus::createBasicBlock();
1325                 auto mergeBasicBlock = Nucleus::createBasicBlock();
1326
1327                 // Start emitting code inside the loop.
1328                 Nucleus::createBr(headerBasicBlock);
1329                 Nucleus::setInsertBlock(headerBasicBlock);
1330
1331                 // Load the Phi values from storage.
1332                 // This will load at the start of each loop.
1333                 for (auto &phi : phis)
1334                 {
1335                         auto &type = getType(getObject(phi.phiId).type);
1336                         auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
1337                         for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1338                         {
1339                                 dst.move(i, phi.storage[i]);
1340                         }
1341                 }
1342
1343                 // Load the active lane mask.
1344                 state->setActiveLaneMask(loopActiveLaneMask);
1345
1346                 // Emit all the non-phi instructions in this loop header block.
1347                 for (auto insn = block.begin(); insn != block.end(); insn++)
1348                 {
1349                         if (insn.opcode() != spv::OpPhi)
1350                         {
1351                                 EmitInstruction(insn, state);
1352                         }
1353                 }
1354
1355                 // Emit all the back-edge blocks and use their active lane masks to
1356                 // rebuild the loopActiveLaneMask.
1357                 loopActiveLaneMask = SIMD::Int(0);
1358                 for (auto in : block.ins)
1359                 {
1360                         if (existsPath(blockId, in))
1361                         {
1362                                 EmitBlock(in, state);
1363                                 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1364                         }
1365                 }
1366
1367                 // Update loop phi values
1368                 for (auto &phi : phis)
1369                 {
1370                         if (phi.continueValue != 0)
1371                         {
1372                                 auto val = GenericValue(this, state->routine, phi.continueValue);
1373                                 auto &type = getType(getObject(phi.phiId).type);
1374                                 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1375                                 {
1376                                         phi.storage[i] = val.Int(i);
1377                                 }
1378                         }
1379                 }
1380
1381                 // Loop body now done.
1382                 // If any lanes are still active, jump back to the loop header,
1383                 // otherwise jump to the merge block.
1384                 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
1385
1386                 // Emit the merge block, and we're done.
1387                 Nucleus::setInsertBlock(mergeBasicBlock);
1388                 EmitBlock(block.mergeBlock, state);
1389         }
1390
1391         SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1392         {
1393                 switch (insn.opcode())
1394                 {
1395                 case spv::OpTypeVoid:
1396                 case spv::OpTypeInt:
1397                 case spv::OpTypeFloat:
1398                 case spv::OpTypeBool:
1399                 case spv::OpTypeVector:
1400                 case spv::OpTypeArray:
1401                 case spv::OpTypeRuntimeArray:
1402                 case spv::OpTypeMatrix:
1403                 case spv::OpTypeStruct:
1404                 case spv::OpTypePointer:
1405                 case spv::OpTypeFunction:
1406                 case spv::OpExecutionMode:
1407                 case spv::OpMemoryModel:
1408                 case spv::OpFunction:
1409                 case spv::OpFunctionEnd:
1410                 case spv::OpConstant:
1411                 case spv::OpConstantNull:
1412                 case spv::OpConstantTrue:
1413                 case spv::OpConstantFalse:
1414                 case spv::OpConstantComposite:
1415                 case spv::OpUndef:
1416                 case spv::OpExtension:
1417                 case spv::OpCapability:
1418                 case spv::OpEntryPoint:
1419                 case spv::OpExtInstImport:
1420                 case spv::OpDecorate:
1421                 case spv::OpMemberDecorate:
1422                 case spv::OpGroupDecorate:
1423                 case spv::OpGroupMemberDecorate:
1424                 case spv::OpDecorationGroup:
1425                 case spv::OpName:
1426                 case spv::OpMemberName:
1427                 case spv::OpSource:
1428                 case spv::OpSourceContinued:
1429                 case spv::OpSourceExtension:
1430                 case spv::OpLine:
1431                 case spv::OpNoLine:
1432                 case spv::OpModuleProcessed:
1433                 case spv::OpString:
1434                         // Nothing to do at emit time. These are either fully handled at analysis time,
1435                         // or don't require any work at all.
1436                         return EmitResult::Continue;
1437
1438                 case spv::OpLabel:
1439                         return EmitResult::Continue;
1440
1441                 case spv::OpVariable:
1442                         return EmitVariable(insn, state);
1443
1444                 case spv::OpLoad:
1445                 case spv::OpAtomicLoad:
1446                         return EmitLoad(insn, state);
1447
1448                 case spv::OpStore:
1449                 case spv::OpAtomicStore:
1450                         return EmitStore(insn, state);
1451
1452                 case spv::OpAccessChain:
1453                 case spv::OpInBoundsAccessChain:
1454                         return EmitAccessChain(insn, state);
1455
1456                 case spv::OpCompositeConstruct:
1457                         return EmitCompositeConstruct(insn, state);
1458
1459                 case spv::OpCompositeInsert:
1460                         return EmitCompositeInsert(insn, state);
1461
1462                 case spv::OpCompositeExtract:
1463                         return EmitCompositeExtract(insn, state);
1464
1465                 case spv::OpVectorShuffle:
1466                         return EmitVectorShuffle(insn, state);
1467
1468                 case spv::OpVectorExtractDynamic:
1469                         return EmitVectorExtractDynamic(insn, state);
1470
1471                 case spv::OpVectorInsertDynamic:
1472                         return EmitVectorInsertDynamic(insn, state);
1473
1474                 case spv::OpVectorTimesScalar:
1475                 case spv::OpMatrixTimesScalar:
1476                         return EmitVectorTimesScalar(insn, state);
1477
1478                 case spv::OpMatrixTimesVector:
1479                         return EmitMatrixTimesVector(insn, state);
1480
1481                 case spv::OpVectorTimesMatrix:
1482                         return EmitVectorTimesMatrix(insn, state);
1483
1484                 case spv::OpMatrixTimesMatrix:
1485                         return EmitMatrixTimesMatrix(insn, state);
1486
1487                 case spv::OpNot:
1488                 case spv::OpSNegate:
1489                 case spv::OpFNegate:
1490                 case spv::OpLogicalNot:
1491                 case spv::OpConvertFToU:
1492                 case spv::OpConvertFToS:
1493                 case spv::OpConvertSToF:
1494                 case spv::OpConvertUToF:
1495                 case spv::OpBitcast:
1496                 case spv::OpIsInf:
1497                 case spv::OpIsNan:
1498                 case spv::OpDPdx:
1499                 case spv::OpDPdxCoarse:
1500                 case spv::OpDPdy:
1501                 case spv::OpDPdyCoarse:
1502                 case spv::OpFwidth:
1503                 case spv::OpFwidthCoarse:
1504                 case spv::OpDPdxFine:
1505                 case spv::OpDPdyFine:
1506                 case spv::OpFwidthFine:
1507                         return EmitUnaryOp(insn, state);
1508
1509                 case spv::OpIAdd:
1510                 case spv::OpISub:
1511                 case spv::OpIMul:
1512                 case spv::OpSDiv:
1513                 case spv::OpUDiv:
1514                 case spv::OpFAdd:
1515                 case spv::OpFSub:
1516                 case spv::OpFMul:
1517                 case spv::OpFDiv:
1518                 case spv::OpFMod:
1519                 case spv::OpFRem:
1520                 case spv::OpFOrdEqual:
1521                 case spv::OpFUnordEqual:
1522                 case spv::OpFOrdNotEqual:
1523                 case spv::OpFUnordNotEqual:
1524                 case spv::OpFOrdLessThan:
1525                 case spv::OpFUnordLessThan:
1526                 case spv::OpFOrdGreaterThan:
1527                 case spv::OpFUnordGreaterThan:
1528                 case spv::OpFOrdLessThanEqual:
1529                 case spv::OpFUnordLessThanEqual:
1530                 case spv::OpFOrdGreaterThanEqual:
1531                 case spv::OpFUnordGreaterThanEqual:
1532                 case spv::OpSMod:
1533                 case spv::OpSRem:
1534                 case spv::OpUMod:
1535                 case spv::OpIEqual:
1536                 case spv::OpINotEqual:
1537                 case spv::OpUGreaterThan:
1538                 case spv::OpSGreaterThan:
1539                 case spv::OpUGreaterThanEqual:
1540                 case spv::OpSGreaterThanEqual:
1541                 case spv::OpULessThan:
1542                 case spv::OpSLessThan:
1543                 case spv::OpULessThanEqual:
1544                 case spv::OpSLessThanEqual:
1545                 case spv::OpShiftRightLogical:
1546                 case spv::OpShiftRightArithmetic:
1547                 case spv::OpShiftLeftLogical:
1548                 case spv::OpBitwiseOr:
1549                 case spv::OpBitwiseXor:
1550                 case spv::OpBitwiseAnd:
1551                 case spv::OpLogicalOr:
1552                 case spv::OpLogicalAnd:
1553                 case spv::OpLogicalEqual:
1554                 case spv::OpLogicalNotEqual:
1555                 case spv::OpUMulExtended:
1556                 case spv::OpSMulExtended:
1557                         return EmitBinaryOp(insn, state);
1558
1559                 case spv::OpDot:
1560                         return EmitDot(insn, state);
1561
1562                 case spv::OpSelect:
1563                         return EmitSelect(insn, state);
1564
1565                 case spv::OpExtInst:
1566                         return EmitExtendedInstruction(insn, state);
1567
1568                 case spv::OpAny:
1569                         return EmitAny(insn, state);
1570
1571                 case spv::OpAll:
1572                         return EmitAll(insn, state);
1573
1574                 case spv::OpBranch:
1575                         return EmitBranch(insn, state);
1576
1577                 case spv::OpPhi:
1578                         return EmitPhi(insn, state);
1579
1580                 case spv::OpSelectionMerge:
1581                 case spv::OpLoopMerge:
1582                         return EmitResult::Continue;
1583
1584                 case spv::OpBranchConditional:
1585                         return EmitBranchConditional(insn, state);
1586
1587                 case spv::OpSwitch:
1588                         return EmitSwitch(insn, state);
1589
1590                 case spv::OpUnreachable:
1591                         return EmitUnreachable(insn, state);
1592
1593                 case spv::OpReturn:
1594                         return EmitReturn(insn, state);
1595
1596                 default:
1597                         UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1598                         break;
1599                 }
1600
1601                 return EmitResult::Continue;
1602         }
1603
1604         SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1605         {
1606                 auto routine = state->routine;
1607                 Object::ID resultId = insn.word(2);
1608                 auto &object = getObject(resultId);
1609                 auto &objectTy = getType(object.type);
1610                 switch (objectTy.storageClass)
1611                 {
1612                 case spv::StorageClassInput:
1613                 {
1614                         if (object.kind == Object::Kind::InterfaceVariable)
1615                         {
1616                                 auto &dst = routine->getValue(resultId);
1617                                 int offset = 0;
1618                                 VisitInterface(resultId,
1619                                                                 [&](Decorations const &d, AttribType type) {
1620                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1621                                                                         dst[offset++] = routine->inputs[scalarSlot];
1622                                                                 });
1623                         }
1624                         break;
1625                 }
1626                 case spv::StorageClassUniform:
1627                 case spv::StorageClassStorageBuffer:
1628                 {
1629                         Decorations d{};
1630                         ApplyDecorationsForId(&d, resultId);
1631                         ASSERT(d.DescriptorSet >= 0);
1632                         ASSERT(d.Binding >= 0);
1633
1634                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1635
1636                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1637                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1638                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1639                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1640                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1641                         Pointer<Byte> address = data + offset;
1642                         routine->physicalPointers[resultId] = address;
1643                         break;
1644                 }
1645                 case spv::StorageClassPushConstant:
1646                 {
1647                         routine->physicalPointers[resultId] = routine->pushConstants;
1648                         break;
1649                 }
1650                 default:
1651                         break;
1652                 }
1653
1654                 return EmitResult::Continue;
1655         }
1656
1657         SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1658         {
1659                 auto routine = state->routine;
1660                 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1661                 Object::ID resultId = insn.word(2);
1662                 Object::ID pointerId = insn.word(3);
1663                 auto &result = getObject(resultId);
1664                 auto &resultTy = getType(result.type);
1665                 auto &pointer = getObject(pointerId);
1666                 auto &pointerBase = getObject(pointer.pointerBase);
1667                 auto &pointerBaseTy = getType(pointerBase.type);
1668                 std::memory_order memoryOrder = std::memory_order_relaxed;
1669
1670                 if(atomic)
1671                 {
1672                         Object::ID semanticsId = insn.word(5);
1673                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1674                         memoryOrder = MemoryOrder(memorySemantics);
1675                 }
1676
1677                 ASSERT(getType(pointer.type).element == result.type);
1678                 ASSERT(Type::ID(insn.word(1)) == result.type);
1679                 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1680
1681                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1682                 {
1683                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1684                 }
1685
1686                 Pointer<Float> ptrBase;
1687                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1688                 {
1689                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1690                 }
1691                 else
1692                 {
1693                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1694                 }
1695
1696                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1697                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1698
1699                 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1700
1701                 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1702                 {
1703                         // Divergent offsets or masked lanes.
1704                         auto offsets = pointer.kind == Object::Kind::Value ?
1705                                         As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1706                                         RValue<SIMD::Int>(SIMD::Int(0));
1707                         for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1708                         {
1709                                 // i wish i had a Float,Float,Float,Float constructor here..
1710                                 for (int j = 0; j < SIMD::Width; j++)
1711                                 {
1712                                         If(Extract(state->activeLaneMask(), j) != 0)
1713                                         {
1714                                                 Int offset = Int(i) + Extract(offsets, j);
1715                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1716                                                 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1717                                         }
1718                                 }
1719                         }
1720                 }
1721                 Else
1722                 {
1723                         // No divergent offsets or masked lanes.
1724                         if (interleavedByLane)
1725                         {
1726                                 // Lane-interleaved data.
1727                                 Pointer<SIMD::Float> src = ptrBase;
1728                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1729                                 {
1730                                         load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1731                                 }
1732                         }
1733                         else
1734                         {
1735                                 // Non-interleaved data.
1736                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1737                                 {
1738                                         load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
1739                                 }
1740                         }
1741                 }
1742
1743                 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1744                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1745                 {
1746                         dst.move(i, load[i]);
1747                 }
1748
1749                 return EmitResult::Continue;
1750         }
1751
1752         SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1753         {
1754                 auto routine = state->routine;
1755                 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1756                 Object::ID pointerId = insn.word(1);
1757                 Object::ID objectId = insn.word(atomic ? 4 : 2);
1758                 auto &object = getObject(objectId);
1759                 auto &pointer = getObject(pointerId);
1760                 auto &pointerTy = getType(pointer.type);
1761                 auto &elementTy = getType(pointerTy.element);
1762                 auto &pointerBase = getObject(pointer.pointerBase);
1763                 auto &pointerBaseTy = getType(pointerBase.type);
1764                 std::memory_order memoryOrder = std::memory_order_relaxed;
1765
1766                 if(atomic)
1767                 {
1768                         Object::ID semanticsId = insn.word(3);
1769                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1770                         memoryOrder = MemoryOrder(memorySemantics);
1771                 }
1772
1773                 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1774
1775                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1776                 {
1777                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1778                 }
1779
1780                 Pointer<Float> ptrBase;
1781                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1782                 {
1783                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1784                 }
1785                 else
1786                 {
1787                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1788                 }
1789
1790                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1791                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1792
1793                 if (object.kind == Object::Kind::Constant)
1794                 {
1795                         // Constant source data.
1796                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1797                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1798                         {
1799                                 // Divergent offsets or masked lanes.
1800                                 auto offsets = pointer.kind == Object::Kind::Value ?
1801                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1802                                                 RValue<SIMD::Int>(SIMD::Int(0));
1803                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1804                                 {
1805                                         for (int j = 0; j < SIMD::Width; j++)
1806                                         {
1807                                                 If(Extract(state->activeLaneMask(), j) != 0)
1808                                                 {
1809                                                         Int offset = Int(i) + Extract(offsets, j);
1810                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1811                                                         Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1812                                                 }
1813                                         }
1814                                 }
1815                         }
1816                         Else
1817                         {
1818                                 // Constant source data.
1819                                 // No divergent offsets or masked lanes.
1820                                 Pointer<SIMD::Float> dst = ptrBase;
1821                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1822                                 {
1823                                         Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1824                                 }
1825                         }
1826                 }
1827                 else
1828                 {
1829                         // Intermediate source data.
1830                         auto &src = routine->getIntermediate(objectId);
1831                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1832                         {
1833                                 // Divergent offsets or masked lanes.
1834                                 auto offsets = pointer.kind == Object::Kind::Value ?
1835                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1836                                                 RValue<SIMD::Int>(SIMD::Int(0));
1837                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1838                                 {
1839                                         for (int j = 0; j < SIMD::Width; j++)
1840                                         {
1841                                                 If(Extract(state->activeLaneMask(), j) != 0)
1842                                                 {
1843                                                         Int offset = Int(i) + Extract(offsets, j);
1844                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1845                                                         Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1846                                                 }
1847                                         }
1848                                 }
1849                         }
1850                         Else
1851                         {
1852                                 // No divergent offsets or masked lanes.
1853                                 if (interleavedByLane)
1854                                 {
1855                                         // Lane-interleaved data.
1856                                         Pointer<SIMD::Float> dst = ptrBase;
1857                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1858                                         {
1859                                                 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1860                                         }
1861                                 }
1862                                 else
1863                                 {
1864                                         // Intermediate source data. Non-interleaved data.
1865                                         Pointer<SIMD::Float> dst = ptrBase;
1866                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1867                                         {
1868                                                 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1869                                         }
1870                                 }
1871                         }
1872                 }
1873
1874                 return EmitResult::Continue;
1875         }
1876
1877         SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1878         {
1879                 auto routine = state->routine;
1880                 Type::ID typeId = insn.word(1);
1881                 Object::ID resultId = insn.word(2);
1882                 Object::ID baseId = insn.word(3);
1883                 uint32_t numIndexes = insn.wordCount() - 4;
1884                 const uint32_t *indexes = insn.wordPointer(4);
1885                 auto &type = getType(typeId);
1886                 ASSERT(type.sizeInComponents == 1);
1887                 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1888
1889                 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1890
1891                 if(type.storageClass == spv::StorageClassPushConstant ||
1892                    type.storageClass == spv::StorageClassUniform ||
1893                    type.storageClass == spv::StorageClassStorageBuffer)
1894                 {
1895                         dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1896                 }
1897                 else
1898                 {
1899                         dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1900                 }
1901
1902                 return EmitResult::Continue;
1903         }
1904
1905         SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1906         {
1907                 auto routine = state->routine;
1908                 auto &type = getType(insn.word(1));
1909                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1910                 auto offset = 0u;
1911
1912                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1913                 {
1914                         Object::ID srcObjectId = insn.word(3u + i);
1915                         auto & srcObject = getObject(srcObjectId);
1916                         auto & srcObjectTy = getType(srcObject.type);
1917                         GenericValue srcObjectAccess(this, routine, srcObjectId);
1918
1919                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1920                         {
1921                                 dst.move(offset++, srcObjectAccess.Float(j));
1922                         }
1923                 }
1924
1925                 return EmitResult::Continue;
1926         }
1927
1928         SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1929         {
1930                 auto routine = state->routine;
1931                 Type::ID resultTypeId = insn.word(1);
1932                 auto &type = getType(resultTypeId);
1933                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1934                 auto &newPartObject = getObject(insn.word(3));
1935                 auto &newPartObjectTy = getType(newPartObject.type);
1936                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1937
1938                 GenericValue srcObjectAccess(this, routine, insn.word(4));
1939                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1940
1941                 // old components before
1942                 for (auto i = 0u; i < firstNewComponent; i++)
1943                 {
1944                         dst.move(i, srcObjectAccess.Float(i));
1945                 }
1946                 // new part
1947                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1948                 {
1949                         dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1950                 }
1951                 // old components after
1952                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1953                 {
1954                         dst.move(i, srcObjectAccess.Float(i));
1955                 }
1956
1957                 return EmitResult::Continue;
1958         }
1959
1960         SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1961         {
1962                 auto routine = state->routine;
1963                 auto &type = getType(insn.word(1));
1964                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1965                 auto &compositeObject = getObject(insn.word(3));
1966                 Type::ID compositeTypeId = compositeObject.definition.word(1);
1967                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1968
1969                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1970                 for (auto i = 0u; i < type.sizeInComponents; i++)
1971                 {
1972                         dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1973                 }
1974
1975                 return EmitResult::Continue;
1976         }
1977
1978         SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1979         {
1980                 auto routine = state->routine;
1981                 auto &type = getType(insn.word(1));
1982                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1983
1984                 // Note: number of components in result type, first half type, and second
1985                 // half type are all independent.
1986                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1987
1988                 GenericValue firstHalfAccess(this, routine, insn.word(3));
1989                 GenericValue secondHalfAccess(this, routine, insn.word(4));
1990
1991                 for (auto i = 0u; i < type.sizeInComponents; i++)
1992                 {
1993                         auto selector = insn.word(5 + i);
1994                         if (selector == static_cast<uint32_t>(-1))
1995                         {
1996                                 // Undefined value. Until we decide to do real undef values, zero is as good
1997                                 // a value as any
1998                                 dst.move(i, RValue<SIMD::Float>(0.0f));
1999                         }
2000                         else if (selector < firstHalfType.sizeInComponents)
2001                         {
2002                                 dst.move(i, firstHalfAccess.Float(selector));
2003                         }
2004                         else
2005                         {
2006                                 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
2007                         }
2008                 }
2009
2010                 return EmitResult::Continue;
2011         }
2012
2013         SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2014         {
2015                 auto routine = state->routine;
2016                 auto &type = getType(insn.word(1));
2017                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2018                 auto &srcType = getType(getObject(insn.word(3)).type);
2019
2020                 GenericValue src(this, routine, insn.word(3));
2021                 GenericValue index(this, routine, insn.word(4));
2022
2023                 SIMD::UInt v = SIMD::UInt(0);
2024
2025                 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2026                 {
2027                         v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2028                 }
2029
2030                 dst.move(0, v);
2031                 return EmitResult::Continue;
2032         }
2033
2034         SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2035         {
2036                 auto routine = state->routine;
2037                 auto &type = getType(insn.word(1));
2038                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2039
2040                 GenericValue src(this, routine, insn.word(3));
2041                 GenericValue component(this, routine, insn.word(4));
2042                 GenericValue index(this, routine, insn.word(5));
2043
2044                 for (auto i = 0u; i < type.sizeInComponents; i++)
2045                 {
2046                         SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2047                         dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2048                 }
2049                 return EmitResult::Continue;
2050         }
2051
2052         SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
2053         {
2054                 auto routine = state->routine;
2055                 auto &type = getType(insn.word(1));
2056                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2057                 auto lhs = GenericValue(this, routine, insn.word(3));
2058                 auto rhs = GenericValue(this, routine, insn.word(4));
2059
2060                 for (auto i = 0u; i < type.sizeInComponents; i++)
2061                 {
2062                         dst.move(i, lhs.Float(i) * rhs.Float(0));
2063                 }
2064
2065                 return EmitResult::Continue;
2066         }
2067
2068         SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
2069         {
2070                 auto routine = state->routine;
2071                 auto &type = getType(insn.word(1));
2072                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2073                 auto lhs = GenericValue(this, routine, insn.word(3));
2074                 auto rhs = GenericValue(this, routine, insn.word(4));
2075                 auto rhsType = getType(getObject(insn.word(4)).type);
2076
2077                 for (auto i = 0u; i < type.sizeInComponents; i++)
2078                 {
2079                         SIMD::Float v = lhs.Float(i) * rhs.Float(0);
2080                         for (auto j = 1u; j < rhsType.sizeInComponents; j++)
2081                         {
2082                                 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
2083                         }
2084                         dst.move(i, v);
2085                 }
2086
2087                 return EmitResult::Continue;
2088         }
2089
2090         SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
2091         {
2092                 auto routine = state->routine;
2093                 auto &type = getType(insn.word(1));
2094                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2095                 auto lhs = GenericValue(this, routine, insn.word(3));
2096                 auto rhs = GenericValue(this, routine, insn.word(4));
2097                 auto lhsType = getType(getObject(insn.word(3)).type);
2098
2099                 for (auto i = 0u; i < type.sizeInComponents; i++)
2100                 {
2101                         SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
2102                         for (auto j = 1u; j < lhsType.sizeInComponents; j++)
2103                         {
2104                                 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
2105                         }
2106                         dst.move(i, v);
2107                 }
2108
2109                 return EmitResult::Continue;
2110         }
2111
2112         SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
2113         {
2114                 auto routine = state->routine;
2115                 auto &type = getType(insn.word(1));
2116                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2117                 auto lhs = GenericValue(this, routine, insn.word(3));
2118                 auto rhs = GenericValue(this, routine, insn.word(4));
2119
2120                 auto numColumns = type.definition.word(3);
2121                 auto numRows = getType(type.definition.word(2)).definition.word(3);
2122                 auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
2123
2124                 for (auto row = 0u; row < numRows; row++)
2125                 {
2126                         for (auto col = 0u; col < numColumns; col++)
2127                         {
2128                                 SIMD::Float v = SIMD::Float(0);
2129                                 for (auto i = 0u; i < numAdds; i++)
2130                                 {
2131                                         v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
2132                                 }
2133                                 dst.move(numRows * col + row, v);
2134                         }
2135                 }
2136
2137                 return EmitResult::Continue;
2138         }
2139
2140         SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
2141         {
2142                 auto routine = state->routine;
2143                 auto &type = getType(insn.word(1));
2144                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2145                 auto src = GenericValue(this, routine, insn.word(3));
2146
2147                 for (auto i = 0u; i < type.sizeInComponents; i++)
2148                 {
2149                         switch (insn.opcode())
2150                         {
2151                         case spv::OpNot:
2152                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
2153                                 dst.move(i, ~src.UInt(i));
2154                                 break;
2155                         case spv::OpSNegate:
2156                                 dst.move(i, -src.Int(i));
2157                                 break;
2158                         case spv::OpFNegate:
2159                                 dst.move(i, -src.Float(i));
2160                                 break;
2161                         case spv::OpConvertFToU:
2162                                 dst.move(i, SIMD::UInt(src.Float(i)));
2163                                 break;
2164                         case spv::OpConvertFToS:
2165                                 dst.move(i, SIMD::Int(src.Float(i)));
2166                                 break;
2167                         case spv::OpConvertSToF:
2168                                 dst.move(i, SIMD::Float(src.Int(i)));
2169                                 break;
2170                         case spv::OpConvertUToF:
2171                                 dst.move(i, SIMD::Float(src.UInt(i)));
2172                                 break;
2173                         case spv::OpBitcast:
2174                                 dst.move(i, src.Float(i));
2175                                 break;
2176                         case spv::OpIsInf:
2177                                 dst.move(i, IsInf(src.Float(i)));
2178                                 break;
2179                         case spv::OpIsNan:
2180                                 dst.move(i, IsNan(src.Float(i)));
2181                                 break;
2182                         case spv::OpDPdx:
2183                         case spv::OpDPdxCoarse:
2184                                 // Derivative instructions: FS invocations are laid out like so:
2185                                 //    0 1
2186                                 //    2 3
2187                                 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
2188                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
2189                                 break;
2190                         case spv::OpDPdy:
2191                         case spv::OpDPdyCoarse:
2192                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
2193                                 break;
2194                         case spv::OpFwidth:
2195                         case spv::OpFwidthCoarse:
2196                                 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
2197                                                         + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
2198                                 break;
2199                         case spv::OpDPdxFine:
2200                         {
2201                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2202                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2203                                 SIMD::Float v = SIMD::Float(firstRow);
2204                                 v = Insert(v, secondRow, 2);
2205                                 v = Insert(v, secondRow, 3);
2206                                 dst.move(i, v);
2207                                 break;
2208                         }
2209                         case spv::OpDPdyFine:
2210                         {
2211                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2212                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2213                                 SIMD::Float v = SIMD::Float(firstColumn);
2214                                 v = Insert(v, secondColumn, 1);
2215                                 v = Insert(v, secondColumn, 3);
2216                                 dst.move(i, v);
2217                                 break;
2218                         }
2219                         case spv::OpFwidthFine:
2220                         {
2221                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2222                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2223                                 SIMD::Float dpdx = SIMD::Float(firstRow);
2224                                 dpdx = Insert(dpdx, secondRow, 2);
2225                                 dpdx = Insert(dpdx, secondRow, 3);
2226                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2227                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2228                                 SIMD::Float dpdy = SIMD::Float(firstColumn);
2229                                 dpdy = Insert(dpdy, secondColumn, 1);
2230                                 dpdy = Insert(dpdy, secondColumn, 3);
2231                                 dst.move(i, Abs(dpdx) + Abs(dpdy));
2232                                 break;
2233                         }
2234                         default:
2235                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
2236                         }
2237                 }
2238
2239                 return EmitResult::Continue;
2240         }
2241
2242         SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
2243         {
2244                 auto routine = state->routine;
2245                 auto &type = getType(insn.word(1));
2246                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2247                 auto &lhsType = getType(getObject(insn.word(3)).type);
2248                 auto lhs = GenericValue(this, routine, insn.word(3));
2249                 auto rhs = GenericValue(this, routine, insn.word(4));
2250
2251                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2252                 {
2253                         switch (insn.opcode())
2254                         {
2255                         case spv::OpIAdd:
2256                                 dst.move(i, lhs.Int(i) + rhs.Int(i));
2257                                 break;
2258                         case spv::OpISub:
2259                                 dst.move(i, lhs.Int(i) - rhs.Int(i));
2260                                 break;
2261                         case spv::OpIMul:
2262                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2263                                 break;
2264                         case spv::OpSDiv:
2265                         {
2266                                 SIMD::Int a = lhs.Int(i);
2267                                 SIMD::Int b = rhs.Int(i);
2268                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2269                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2270                                 dst.move(i, a / b);
2271                                 break;
2272                         }
2273                         case spv::OpUDiv:
2274                         {
2275                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2276                                 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2277                                 break;
2278                         }
2279                         case spv::OpSRem:
2280                         {
2281                                 SIMD::Int a = lhs.Int(i);
2282                                 SIMD::Int b = rhs.Int(i);
2283                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2284                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2285                                 dst.move(i, a % b);
2286                                 break;
2287                         }
2288                         case spv::OpSMod:
2289                         {
2290                                 SIMD::Int a = lhs.Int(i);
2291                                 SIMD::Int b = rhs.Int(i);
2292                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2293                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2294                                 auto mod = a % b;
2295                                 // If a and b have opposite signs, the remainder operation takes
2296                                 // the sign from a but OpSMod is supposed to take the sign of b.
2297                                 // Adding b will ensure that the result has the correct sign and
2298                                 // that it is still congruent to a modulo b.
2299                                 //
2300                                 // See also http://mathforum.org/library/drmath/view/52343.html
2301                                 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2302                                 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2303                                 dst.move(i, As<SIMD::Float>(fixedMod));
2304                                 break;
2305                         }
2306                         case spv::OpUMod:
2307                         {
2308                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2309                                 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2310                                 break;
2311                         }
2312                         case spv::OpIEqual:
2313                         case spv::OpLogicalEqual:
2314                                 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2315                                 break;
2316                         case spv::OpINotEqual:
2317                         case spv::OpLogicalNotEqual:
2318                                 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2319                                 break;
2320                         case spv::OpUGreaterThan:
2321                                 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2322                                 break;
2323                         case spv::OpSGreaterThan:
2324                                 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2325                                 break;
2326                         case spv::OpUGreaterThanEqual:
2327                                 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2328                                 break;
2329                         case spv::OpSGreaterThanEqual:
2330                                 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2331                                 break;
2332                         case spv::OpULessThan:
2333                                 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2334                                 break;
2335                         case spv::OpSLessThan:
2336                                 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2337                                 break;
2338                         case spv::OpULessThanEqual:
2339                                 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2340                                 break;
2341                         case spv::OpSLessThanEqual:
2342                                 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2343                                 break;
2344                         case spv::OpFAdd:
2345                                 dst.move(i, lhs.Float(i) + rhs.Float(i));
2346                                 break;
2347                         case spv::OpFSub:
2348                                 dst.move(i, lhs.Float(i) - rhs.Float(i));
2349                                 break;
2350                         case spv::OpFMul:
2351                                 dst.move(i, lhs.Float(i) * rhs.Float(i));
2352                                 break;
2353                         case spv::OpFDiv:
2354                                 dst.move(i, lhs.Float(i) / rhs.Float(i));
2355                                 break;
2356                         case spv::OpFMod:
2357                                 // TODO(b/126873455): inaccurate for values greater than 2^24
2358                                 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2359                                 break;
2360                         case spv::OpFRem:
2361                                 dst.move(i, lhs.Float(i) % rhs.Float(i));
2362                                 break;
2363                         case spv::OpFOrdEqual:
2364                                 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2365                                 break;
2366                         case spv::OpFUnordEqual:
2367                                 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2368                                 break;
2369                         case spv::OpFOrdNotEqual:
2370                                 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2371                                 break;
2372                         case spv::OpFUnordNotEqual:
2373                                 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2374                                 break;
2375                         case spv::OpFOrdLessThan:
2376                                 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2377                                 break;
2378                         case spv::OpFUnordLessThan:
2379                                 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2380                                 break;
2381                         case spv::OpFOrdGreaterThan:
2382                                 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2383                                 break;
2384                         case spv::OpFUnordGreaterThan:
2385                                 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2386                                 break;
2387                         case spv::OpFOrdLessThanEqual:
2388                                 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2389                                 break;
2390                         case spv::OpFUnordLessThanEqual:
2391                                 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2392                                 break;
2393                         case spv::OpFOrdGreaterThanEqual:
2394                                 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2395                                 break;
2396                         case spv::OpFUnordGreaterThanEqual:
2397                                 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2398                                 break;
2399                         case spv::OpShiftRightLogical:
2400                                 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2401                                 break;
2402                         case spv::OpShiftRightArithmetic:
2403                                 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2404                                 break;
2405                         case spv::OpShiftLeftLogical:
2406                                 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2407                                 break;
2408                         case spv::OpBitwiseOr:
2409                         case spv::OpLogicalOr:
2410                                 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2411                                 break;
2412                         case spv::OpBitwiseXor:
2413                                 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2414                                 break;
2415                         case spv::OpBitwiseAnd:
2416                         case spv::OpLogicalAnd:
2417                                 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2418                                 break;
2419                         case spv::OpSMulExtended:
2420                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2421                                 // In our flat view then, component i is the i'th component of the first member;
2422                                 // component i + N is the i'th component of the second member.
2423                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2424                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2425                                 break;
2426                         case spv::OpUMulExtended:
2427                                 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2428                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2429                                 break;
2430                         default:
2431                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2432                         }
2433                 }
2434
2435                 return EmitResult::Continue;
2436         }
2437
2438         SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2439         {
2440                 auto routine = state->routine;
2441                 auto &type = getType(insn.word(1));
2442                 ASSERT(type.sizeInComponents == 1);
2443                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2444                 auto &lhsType = getType(getObject(insn.word(3)).type);
2445                 auto lhs = GenericValue(this, routine, insn.word(3));
2446                 auto rhs = GenericValue(this, routine, insn.word(4));
2447
2448                 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2449                 return EmitResult::Continue;
2450         }
2451
2452         SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2453         {
2454                 auto routine = state->routine;
2455                 auto &type = getType(insn.word(1));
2456                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2457                 auto cond = GenericValue(this, routine, insn.word(3));
2458                 auto lhs = GenericValue(this, routine, insn.word(4));
2459                 auto rhs = GenericValue(this, routine, insn.word(5));
2460
2461                 for (auto i = 0u; i < type.sizeInComponents; i++)
2462                 {
2463                         dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i)));   // FIXME: IfThenElse()
2464                 }
2465
2466                 return EmitResult::Continue;
2467         }
2468
2469         SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2470         {
2471                 auto routine = state->routine;
2472                 auto &type = getType(insn.word(1));
2473                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2474                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2475
2476                 switch (extInstIndex)
2477                 {
2478                 case GLSLstd450FAbs:
2479                 {
2480                         auto src = GenericValue(this, routine, insn.word(5));
2481                         for (auto i = 0u; i < type.sizeInComponents; i++)
2482                         {
2483                                 dst.move(i, Abs(src.Float(i)));
2484                         }
2485                         break;
2486                 }
2487                 case GLSLstd450SAbs:
2488                 {
2489                         auto src = GenericValue(this, routine, insn.word(5));
2490                         for (auto i = 0u; i < type.sizeInComponents; i++)
2491                         {
2492                                 dst.move(i, Abs(src.Int(i)));
2493                         }
2494                         break;
2495                 }
2496                 case GLSLstd450Cross:
2497                 {
2498                         auto lhs = GenericValue(this, routine, insn.word(5));
2499                         auto rhs = GenericValue(this, routine, insn.word(6));
2500                         dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2501                         dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2502                         dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2503                         break;
2504                 }
2505                 case GLSLstd450Floor:
2506                 {
2507                         auto src = GenericValue(this, routine, insn.word(5));
2508                         for (auto i = 0u; i < type.sizeInComponents; i++)
2509                         {
2510                                 dst.move(i, Floor(src.Float(i)));
2511                         }
2512                         break;
2513                 }
2514                 case GLSLstd450Trunc:
2515                 {
2516                         auto src = GenericValue(this, routine, insn.word(5));
2517                         for (auto i = 0u; i < type.sizeInComponents; i++)
2518                         {
2519                                 dst.move(i, Trunc(src.Float(i)));
2520                         }
2521                         break;
2522                 }
2523                 case GLSLstd450Ceil:
2524                 {
2525                         auto src = GenericValue(this, routine, insn.word(5));
2526                         for (auto i = 0u; i < type.sizeInComponents; i++)
2527                         {
2528                                 dst.move(i, Ceil(src.Float(i)));
2529                         }
2530                         break;
2531                 }
2532                 case GLSLstd450Fract:
2533                 {
2534                         auto src = GenericValue(this, routine, insn.word(5));
2535                         for (auto i = 0u; i < type.sizeInComponents; i++)
2536                         {
2537                                 dst.move(i, Frac(src.Float(i)));
2538                         }
2539                         break;
2540                 }
2541                 case GLSLstd450Round:
2542                 {
2543                         auto src = GenericValue(this, routine, insn.word(5));
2544                         for (auto i = 0u; i < type.sizeInComponents; i++)
2545                         {
2546                                 dst.move(i, Round(src.Float(i)));
2547                         }
2548                         break;
2549                 }
2550                 case GLSLstd450RoundEven:
2551                 {
2552                         auto src = GenericValue(this, routine, insn.word(5));
2553                         for (auto i = 0u; i < type.sizeInComponents; i++)
2554                         {
2555                                 auto x = Round(src.Float(i));
2556                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2557                                 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2558                                                 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2559                         }
2560                         break;
2561                 }
2562                 case GLSLstd450FMin:
2563                 {
2564                         auto lhs = GenericValue(this, routine, insn.word(5));
2565                         auto rhs = GenericValue(this, routine, insn.word(6));
2566                         for (auto i = 0u; i < type.sizeInComponents; i++)
2567                         {
2568                                 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2569                         }
2570                         break;
2571                 }
2572                 case GLSLstd450FMax:
2573                 {
2574                         auto lhs = GenericValue(this, routine, insn.word(5));
2575                         auto rhs = GenericValue(this, routine, insn.word(6));
2576                         for (auto i = 0u; i < type.sizeInComponents; i++)
2577                         {
2578                                 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2579                         }
2580                         break;
2581                 }
2582                 case GLSLstd450SMin:
2583                 {
2584                         auto lhs = GenericValue(this, routine, insn.word(5));
2585                         auto rhs = GenericValue(this, routine, insn.word(6));
2586                         for (auto i = 0u; i < type.sizeInComponents; i++)
2587                         {
2588                                 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2589                         }
2590                         break;
2591                 }
2592                 case GLSLstd450SMax:
2593                 {
2594                         auto lhs = GenericValue(this, routine, insn.word(5));
2595                         auto rhs = GenericValue(this, routine, insn.word(6));
2596                         for (auto i = 0u; i < type.sizeInComponents; i++)
2597                         {
2598                                 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2599                         }
2600                         break;
2601                 }
2602                 case GLSLstd450UMin:
2603                 {
2604                         auto lhs = GenericValue(this, routine, insn.word(5));
2605                         auto rhs = GenericValue(this, routine, insn.word(6));
2606                         for (auto i = 0u; i < type.sizeInComponents; i++)
2607                         {
2608                                 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2609                         }
2610                         break;
2611                 }
2612                 case GLSLstd450UMax:
2613                 {
2614                         auto lhs = GenericValue(this, routine, insn.word(5));
2615                         auto rhs = GenericValue(this, routine, insn.word(6));
2616                         for (auto i = 0u; i < type.sizeInComponents; i++)
2617                         {
2618                                 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2619                         }
2620                         break;
2621                 }
2622                 case GLSLstd450Step:
2623                 {
2624                         auto edge = GenericValue(this, routine, insn.word(5));
2625                         auto x = GenericValue(this, routine, insn.word(6));
2626                         for (auto i = 0u; i < type.sizeInComponents; i++)
2627                         {
2628                                 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2629                         }
2630                         break;
2631                 }
2632                 case GLSLstd450SmoothStep:
2633                 {
2634                         auto edge0 = GenericValue(this, routine, insn.word(5));
2635                         auto edge1 = GenericValue(this, routine, insn.word(6));
2636                         auto x = GenericValue(this, routine, insn.word(7));
2637                         for (auto i = 0u; i < type.sizeInComponents; i++)
2638                         {
2639                                 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2640                                                 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2641                                 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2642                         }
2643                         break;
2644                 }
2645                 case GLSLstd450FMix:
2646                 {
2647                         auto x = GenericValue(this, routine, insn.word(5));
2648                         auto y = GenericValue(this, routine, insn.word(6));
2649                         auto a = GenericValue(this, routine, insn.word(7));
2650                         for (auto i = 0u; i < type.sizeInComponents; i++)
2651                         {
2652                                 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2653                         }
2654                         break;
2655                 }
2656                 case GLSLstd450FClamp:
2657                 {
2658                         auto x = GenericValue(this, routine, insn.word(5));
2659                         auto minVal = GenericValue(this, routine, insn.word(6));
2660                         auto maxVal = GenericValue(this, routine, insn.word(7));
2661                         for (auto i = 0u; i < type.sizeInComponents; i++)
2662                         {
2663                                 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2664                         }
2665                         break;
2666                 }
2667                 case GLSLstd450SClamp:
2668                 {
2669                         auto x = GenericValue(this, routine, insn.word(5));
2670                         auto minVal = GenericValue(this, routine, insn.word(6));
2671                         auto maxVal = GenericValue(this, routine, insn.word(7));
2672                         for (auto i = 0u; i < type.sizeInComponents; i++)
2673                         {
2674                                 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2675                         }
2676                         break;
2677                 }
2678                 case GLSLstd450UClamp:
2679                 {
2680                         auto x = GenericValue(this, routine, insn.word(5));
2681                         auto minVal = GenericValue(this, routine, insn.word(6));
2682                         auto maxVal = GenericValue(this, routine, insn.word(7));
2683                         for (auto i = 0u; i < type.sizeInComponents; i++)
2684                         {
2685                                 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2686                         }
2687                         break;
2688                 }
2689                 case GLSLstd450FSign:
2690                 {
2691                         auto src = GenericValue(this, routine, insn.word(5));
2692                         for (auto i = 0u; i < type.sizeInComponents; i++)
2693                         {
2694                                 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2695                                 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2696                                 dst.move(i, neg | pos);
2697                         }
2698                         break;
2699                 }
2700                 case GLSLstd450SSign:
2701                 {
2702                         auto src = GenericValue(this, routine, insn.word(5));
2703                         for (auto i = 0u; i < type.sizeInComponents; i++)
2704                         {
2705                                 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2706                                 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2707                                 dst.move(i, neg | pos);
2708                         }
2709                         break;
2710                 }
2711                 case GLSLstd450Reflect:
2712                 {
2713                         auto I = GenericValue(this, routine, insn.word(5));
2714                         auto N = GenericValue(this, routine, insn.word(6));
2715
2716                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2717
2718                         for (auto i = 0u; i < type.sizeInComponents; i++)
2719                         {
2720                                 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2721                         }
2722                         break;
2723                 }
2724                 case GLSLstd450Refract:
2725                 {
2726                         auto I = GenericValue(this, routine, insn.word(5));
2727                         auto N = GenericValue(this, routine, insn.word(6));
2728                         auto eta = GenericValue(this, routine, insn.word(7));
2729
2730                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2731                         SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2732                         SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2733                         SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2734
2735                         for (auto i = 0u; i < type.sizeInComponents; i++)
2736                         {
2737                                 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2738                         }
2739                         break;
2740                 }
2741                 case GLSLstd450FaceForward:
2742                 {
2743                         auto N = GenericValue(this, routine, insn.word(5));
2744                         auto I = GenericValue(this, routine, insn.word(6));
2745                         auto Nref = GenericValue(this, routine, insn.word(7));
2746
2747                         SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2748                         SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2749
2750                         for (auto i = 0u; i < type.sizeInComponents; i++)
2751                         {
2752                                 auto n = N.Float(i);
2753                                 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2754                         }
2755                         break;
2756                 }
2757                 case GLSLstd450Length:
2758                 {
2759                         auto x = GenericValue(this, routine, insn.word(5));
2760                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2761
2762                         dst.move(0, Sqrt(d));
2763                         break;
2764                 }
2765                 case GLSLstd450Normalize:
2766                 {
2767                         auto x = GenericValue(this, routine, insn.word(5));
2768                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2769                         SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2770
2771                         for (auto i = 0u; i < type.sizeInComponents; i++)
2772                         {
2773                                 dst.move(i, invLength * x.Float(i));
2774                         }
2775                         break;
2776                 }
2777                 case GLSLstd450Distance:
2778                 {
2779                         auto p0 = GenericValue(this, routine, insn.word(5));
2780                         auto p1 = GenericValue(this, routine, insn.word(6));
2781                         auto p0Type = getType(getObject(insn.word(5)).type);
2782
2783                         // sqrt(dot(p0-p1, p0-p1))
2784                         SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2785
2786                         for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2787                         {
2788                                 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2789                         }
2790
2791                         dst.move(0, Sqrt(d));
2792                         break;
2793                 }
2794                 default:
2795                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2796                 }
2797
2798                 return EmitResult::Continue;
2799         }
2800
2801         std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2802         {
2803                 switch(memorySemantics)
2804                 {
2805                 case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
2806                 case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
2807                 case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
2808                 case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
2809                 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2810                 default:
2811                         UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2812                         return std::memory_order_acq_rel;
2813                 }
2814         }
2815
2816         SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2817         {
2818                 SIMD::Float d = x.Float(0) * y.Float(0);
2819
2820                 for (auto i = 1u; i < numComponents; i++)
2821                 {
2822                         d += x.Float(i) * y.Float(i);
2823                 }
2824
2825                 return d;
2826         }
2827
2828         SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2829         {
2830                 auto routine = state->routine;
2831                 auto &type = getType(insn.word(1));
2832                 ASSERT(type.sizeInComponents == 1);
2833                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2834                 auto &srcType = getType(getObject(insn.word(3)).type);
2835                 auto src = GenericValue(this, routine, insn.word(3));
2836
2837                 SIMD::UInt result = src.UInt(0);
2838
2839                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2840                 {
2841                         result |= src.UInt(i);
2842                 }
2843
2844                 dst.move(0, result);
2845                 return EmitResult::Continue;
2846         }
2847
2848         SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2849         {
2850                 auto routine = state->routine;
2851                 auto &type = getType(insn.word(1));
2852                 ASSERT(type.sizeInComponents == 1);
2853                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2854                 auto &srcType = getType(getObject(insn.word(3)).type);
2855                 auto src = GenericValue(this, routine, insn.word(3));
2856
2857                 SIMD::UInt result = src.UInt(0);
2858
2859                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2860                 {
2861                         result &= src.UInt(i);
2862                 }
2863
2864                 dst.move(0, result);
2865                 return EmitResult::Continue;
2866         }
2867
2868         SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2869         {
2870                 auto target = Block::ID(insn.word(1));
2871                 auto edge = Block::Edge{state->currentBlock, target};
2872                 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2873                 return EmitResult::Terminator;
2874         }
2875
2876         SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2877         {
2878                 auto block = getBlock(state->currentBlock);
2879                 ASSERT(block.branchInstruction == insn);
2880
2881                 auto condId = Object::ID(block.branchInstruction.word(1));
2882                 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2883                 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2884
2885                 auto cond = GenericValue(this, state->routine, condId);
2886                 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2887
2888                 // TODO: Optimize for case where all lanes take same path.
2889
2890                 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2891                 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2892
2893                 return EmitResult::Terminator;
2894         }
2895
2896         SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2897         {
2898                 auto block = getBlock(state->currentBlock);
2899                 ASSERT(block.branchInstruction == insn);
2900
2901                 auto selId = Object::ID(block.branchInstruction.word(1));
2902
2903                 auto sel = GenericValue(this, state->routine, selId);
2904                 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2905
2906                 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2907
2908                 // TODO: Optimize for case where all lanes take same path.
2909
2910                 SIMD::Int defaultLaneMask = state->activeLaneMask();
2911
2912                 // Gather up the case label matches and calculate defaultLaneMask.
2913                 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2914                 caseLabelMatches.reserve(numCases);
2915                 for (uint32_t i = 0; i < numCases; i++)
2916                 {
2917                         auto label = block.branchInstruction.word(i * 2 + 3);
2918                         auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2919                         auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2920                         state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2921                         defaultLaneMask &= ~caseLabelMatch;
2922                 }
2923
2924                 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2925                 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2926
2927                 return EmitResult::Terminator;
2928         }
2929
2930         SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2931         {
2932                 // TODO: Log something in this case?
2933                 state->setActiveLaneMask(SIMD::Int(0));
2934                 return EmitResult::Terminator;
2935         }
2936
2937         SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2938         {
2939                 state->setActiveLaneMask(SIMD::Int(0));
2940                 return EmitResult::Terminator;
2941         }
2942
2943         SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2944         {
2945                 auto routine = state->routine;
2946                 auto typeId = Type::ID(insn.word(1));
2947                 auto type = getType(typeId);
2948                 auto objectId = Object::ID(insn.word(2));
2949
2950                 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2951
2952                 bool first = true;
2953                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2954                 {
2955                         auto varId = Object::ID(insn.word(w + 0));
2956                         auto blockId = Block::ID(insn.word(w + 1));
2957
2958                         auto in = GenericValue(this, routine, varId);
2959                         auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2960
2961                         for (uint32_t i = 0; i < type.sizeInComponents; i++)
2962                         {
2963                                 auto inMasked = in.Int(i) & mask;
2964                                 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2965                         }
2966                         first = false;
2967                 }
2968
2969                 return EmitResult::Continue;
2970         }
2971
2972         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2973         {
2974                 for (auto insn : *this)
2975                 {
2976                         switch (insn.opcode())
2977                         {
2978                         case spv::OpVariable:
2979                         {
2980                                 Object::ID resultId = insn.word(2);
2981                                 auto &object = getObject(resultId);
2982                                 auto &objectTy = getType(object.type);
2983                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2984                                 {
2985                                         auto &dst = routine->getValue(resultId);
2986                                         int offset = 0;
2987                                         VisitInterface(resultId,
2988                                                                    [&](Decorations const &d, AttribType type) {
2989                                                                            auto scalarSlot = d.Location << 2 | d.Component;
2990                                                                            routine->outputs[scalarSlot] = dst[offset++];
2991                                                                    });
2992                                 }
2993                                 break;
2994                         }
2995                         default:
2996                                 break;
2997                         }
2998                 }
2999         }
3000
3001         SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
3002         {
3003                 // Default to a Simple, this may change later.
3004                 kind = Block::Simple;
3005
3006                 // Walk the instructions to find the last two of the block.
3007                 InsnIterator insns[2];
3008                 for (auto insn : *this)
3009                 {
3010                         insns[0] = insns[1];
3011                         insns[1] = insn;
3012                 }
3013
3014                 switch (insns[1].opcode())
3015                 {
3016                         case spv::OpBranch:
3017                                 branchInstruction = insns[1];
3018                                 outs.emplace(Block::ID(branchInstruction.word(1)));
3019
3020                                 switch (insns[0].opcode())
3021                                 {
3022                                         case spv::OpLoopMerge:
3023                                                 kind = Loop;
3024                                                 mergeInstruction = insns[0];
3025                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3026                                                 continueTarget = Block::ID(mergeInstruction.word(2));
3027                                                 break;
3028
3029                                         default:
3030                                                 kind = Block::Simple;
3031                                                 break;
3032                                 }
3033                                 break;
3034
3035                         case spv::OpBranchConditional:
3036                                 branchInstruction = insns[1];
3037                                 outs.emplace(Block::ID(branchInstruction.word(2)));
3038                                 outs.emplace(Block::ID(branchInstruction.word(3)));
3039
3040                                 switch (insns[0].opcode())
3041                                 {
3042                                         case spv::OpSelectionMerge:
3043                                                 kind = StructuredBranchConditional;
3044                                                 mergeInstruction = insns[0];
3045                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3046                                                 break;
3047
3048                                         case spv::OpLoopMerge:
3049                                                 kind = Loop;
3050                                                 mergeInstruction = insns[0];
3051                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3052                                                 continueTarget = Block::ID(mergeInstruction.word(2));
3053                                                 break;
3054
3055                                         default:
3056                                                 kind = UnstructuredBranchConditional;
3057                                                 break;
3058                                 }
3059                                 break;
3060
3061                         case spv::OpSwitch:
3062                                 branchInstruction = insns[1];
3063                                 outs.emplace(Block::ID(branchInstruction.word(2)));
3064                                 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
3065                                 {
3066                                         outs.emplace(Block::ID(branchInstruction.word(w)));
3067                                 }
3068
3069                                 switch (insns[0].opcode())
3070                                 {
3071                                         case spv::OpSelectionMerge:
3072                                                 kind = StructuredSwitch;
3073                                                 mergeInstruction = insns[0];
3074                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3075                                                 break;
3076
3077                                         default:
3078                                                 kind = UnstructuredSwitch;
3079                                                 break;
3080                                 }
3081                                 break;
3082
3083                         default:
3084                                 break;
3085                 }
3086         }
3087
3088         bool SpirvShader::existsPath(Block::ID from, Block::ID to) const
3089         {
3090                 // TODO: Optimize: This can be cached on the block.
3091                 Block::Set seen;
3092
3093                 std::queue<Block::ID> pending;
3094                 pending.emplace(from);
3095
3096                 while (pending.size() > 0)
3097                 {
3098                         auto id = pending.front();
3099                         pending.pop();
3100                         for (auto out : getBlock(id).outs)
3101                         {
3102                                 if (seen.count(out) != 0) { continue; }
3103                                 if (out == to) { return true; }
3104                                 pending.emplace(out);
3105                         }
3106                         seen.emplace(id);
3107                 }
3108
3109                 return false;
3110         }
3111
3112         void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
3113         {
3114                 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
3115         }
3116
3117         void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
3118         {
3119                 auto edge = Block::Edge{from, to};
3120                 auto it = edgeActiveLaneMasks.find(edge);
3121                 if (it == edgeActiveLaneMasks.end())
3122                 {
3123                         edgeActiveLaneMasks.emplace(edge, mask);
3124                 }
3125                 else
3126                 {
3127                         auto combined = it->second | mask;
3128                         edgeActiveLaneMasks.erase(edge);
3129                         edgeActiveLaneMasks.emplace(edge, combined);
3130                 }
3131         }
3132
3133         RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
3134         {
3135                 auto edge = Block::Edge{from, to};
3136                 auto it = edgeActiveLaneMasks.find(edge);
3137                 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
3138                 return it->second;
3139         }
3140
3141         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
3142                 pipelineLayout(pipelineLayout)
3143         {
3144         }
3145
3146 }