OSDN Git Service

SpirvShader: Fixes for complex loops.
[android-x86/external-swiftshader.git] / src / Pipeline / SpirvShader.cpp
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
23
24 #ifdef Bool
25 #undef Bool // b/127920555
26 #endif
27
28 namespace
29 {
30         rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
31         {
32                 return rr::SignMask(ints) != 0;
33         }
34
35         rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
36         {
37                 return rr::SignMask(~ints) != 0;
38         }
39 }
40
41 namespace sw
42 {
43         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
44
45         SpirvShader::SpirvShader(InsnStore const &insns)
46                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
47                           outputs{MAX_INTERFACE_COMPONENTS},
48                           serialID{serialCounter++}, modes{}
49         {
50                 ASSERT(insns.size() > 0);
51
52                 // Simplifying assumptions (to be satisfied by earlier transformations)
53                 // - There is exactly one entrypoint in the module, and it's the one we want
54                 // - The only input/output OpVariables present are those used by the entrypoint
55
56                 Block::ID currentBlock;
57                 InsnIterator blockStart;
58
59                 for (auto insn : *this)
60                 {
61                         switch (insn.opcode())
62                         {
63                         case spv::OpExecutionMode:
64                                 ProcessExecutionMode(insn);
65                                 break;
66
67                         case spv::OpDecorate:
68                         {
69                                 TypeOrObjectID targetId = insn.word(1);
70                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
71                                 decorations[targetId].Apply(
72                                                 decoration,
73                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
74
75                                 if (decoration == spv::DecorationCentroid)
76                                         modes.NeedsCentroid = true;
77                                 break;
78                         }
79
80                         case spv::OpMemberDecorate:
81                         {
82                                 Type::ID targetId = insn.word(1);
83                                 auto memberIndex = insn.word(2);
84                                 auto &d = memberDecorations[targetId];
85                                 if (memberIndex >= d.size())
86                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
87                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
88                                 d[memberIndex].Apply(
89                                                 decoration,
90                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
91
92                                 if (decoration == spv::DecorationCentroid)
93                                         modes.NeedsCentroid = true;
94                                 break;
95                         }
96
97                         case spv::OpDecorationGroup:
98                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
99                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
100                                 // we might think about introducing this as a real Object.
101                                 break;
102
103                         case spv::OpGroupDecorate:
104                         {
105                                 auto const &srcDecorations = decorations[insn.word(1)];
106                                 for (auto i = 2u; i < insn.wordCount(); i++)
107                                 {
108                                         // remaining operands are targets to apply the group to.
109                                         decorations[insn.word(i)].Apply(srcDecorations);
110                                 }
111                                 break;
112                         }
113
114                         case spv::OpGroupMemberDecorate:
115                         {
116                                 auto const &srcDecorations = decorations[insn.word(1)];
117                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
118                                 {
119                                         // remaining operands are pairs of <id>, literal for members to apply to.
120                                         auto &d = memberDecorations[insn.word(i)];
121                                         auto memberIndex = insn.word(i + 1);
122                                         if (memberIndex >= d.size())
123                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
124                                         d[memberIndex].Apply(srcDecorations);
125                                 }
126                                 break;
127                         }
128
129                         case spv::OpLabel:
130                         {
131                                 ASSERT(currentBlock.value() == 0);
132                                 currentBlock = Block::ID(insn.word(1));
133                                 blockStart = insn;
134                                 break;
135                         }
136
137                         // Branch Instructions (subset of Termination Instructions):
138                         case spv::OpBranch:
139                         case spv::OpBranchConditional:
140                         case spv::OpSwitch:
141                         case spv::OpReturn:
142                         // fallthrough
143
144                         // Termination instruction:
145                         case spv::OpKill:
146                         case spv::OpUnreachable:
147                         {
148                                 ASSERT(currentBlock.value() != 0);
149                                 auto blockEnd = insn; blockEnd++;
150                                 blocks[currentBlock] = Block(blockStart, blockEnd);
151                                 currentBlock = Block::ID(0);
152
153                                 if (insn.opcode() == spv::OpKill)
154                                 {
155                                         modes.ContainsKill = true;
156                                 }
157                                 break;
158                         }
159
160                         case spv::OpLoopMerge:
161                         case spv::OpSelectionMerge:
162                                 break; // Nothing to do in analysis pass.
163
164                         case spv::OpTypeVoid:
165                         case spv::OpTypeBool:
166                         case spv::OpTypeInt:
167                         case spv::OpTypeFloat:
168                         case spv::OpTypeVector:
169                         case spv::OpTypeMatrix:
170                         case spv::OpTypeImage:
171                         case spv::OpTypeSampler:
172                         case spv::OpTypeSampledImage:
173                         case spv::OpTypeArray:
174                         case spv::OpTypeRuntimeArray:
175                         case spv::OpTypeStruct:
176                         case spv::OpTypePointer:
177                         case spv::OpTypeFunction:
178                                 DeclareType(insn);
179                                 break;
180
181                         case spv::OpVariable:
182                         {
183                                 Type::ID typeId = insn.word(1);
184                                 Object::ID resultId = insn.word(2);
185                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
186                                 if (insn.wordCount() > 4)
187                                         UNIMPLEMENTED("Variable initializers not yet supported");
188
189                                 auto &object = defs[resultId];
190                                 object.kind = Object::Kind::Variable;
191                                 object.definition = insn;
192                                 object.type = typeId;
193                                 object.pointerBase = insn.word(2);      // base is itself
194
195                                 ASSERT(getType(typeId).storageClass == storageClass);
196
197                                 switch (storageClass)
198                                 {
199                                 case spv::StorageClassInput:
200                                 case spv::StorageClassOutput:
201                                         ProcessInterfaceVariable(object);
202                                         break;
203                                 case spv::StorageClassUniform:
204                                 case spv::StorageClassStorageBuffer:
205                                 case spv::StorageClassPushConstant:
206                                         object.kind = Object::Kind::PhysicalPointer;
207                                         break;
208
209                                 case spv::StorageClassPrivate:
210                                 case spv::StorageClassFunction:
211                                         break; // Correctly handled.
212
213                                 case spv::StorageClassUniformConstant:
214                                 case spv::StorageClassWorkgroup:
215                                 case spv::StorageClassCrossWorkgroup:
216                                 case spv::StorageClassGeneric:
217                                 case spv::StorageClassAtomicCounter:
218                                 case spv::StorageClassImage:
219                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
220                                         break;
221
222                                 default:
223                                         UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
224                                         break;
225                                 }
226                                 break;
227                         }
228
229                         case spv::OpConstant:
230                                 CreateConstant(insn).constantValue[0] = insn.word(3);
231                                 break;
232                         case spv::OpConstantFalse:
233                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
234                                 break;
235                         case spv::OpConstantTrue:
236                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
237                                 break;
238                         case spv::OpConstantNull:
239                         case spv::OpUndef:
240                         {
241                                 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
242                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
243                                 auto &object = CreateConstant(insn);
244                                 auto &objectTy = getType(object.type);
245                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
246                                 {
247                                         object.constantValue[i] = 0;
248                                 }
249                                 break;
250                         }
251                         case spv::OpConstantComposite:
252                         {
253                                 auto &object = CreateConstant(insn);
254                                 auto offset = 0u;
255                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
256                                 {
257                                         auto &constituent = getObject(insn.word(i + 3));
258                                         auto &constituentTy = getType(constituent.type);
259                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
260                                                 object.constantValue[offset++] = constituent.constantValue[j];
261                                 }
262
263                                 auto objectId = Object::ID(insn.word(2));
264                                 auto decorationsIt = decorations.find(objectId);
265                                 if (decorationsIt != decorations.end() &&
266                                         decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
267                                 {
268                                         // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
269                                         // Decorating an object with the WorkgroupSize built-in
270                                         // decoration will make that object contain the dimensions
271                                         // of a local workgroup. If an object is decorated with the
272                                         // WorkgroupSize decoration, this must take precedence over
273                                         // any execution mode set for LocalSize.
274                                         // The object decorated with WorkgroupSize must be declared
275                                         // as a three-component vector of 32-bit integers.
276                                         ASSERT(getType(object.type).sizeInComponents == 3);
277                                         modes.WorkgroupSizeX = object.constantValue[0];
278                                         modes.WorkgroupSizeY = object.constantValue[1];
279                                         modes.WorkgroupSizeZ = object.constantValue[2];
280                                 }
281                                 break;
282                         }
283
284                         case spv::OpCapability:
285                                 break; // Various capabilities will be declared, but none affect our code generation at this point.
286                         case spv::OpMemoryModel:
287                                 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
288
289                         case spv::OpEntryPoint:
290                                 break;
291                         case spv::OpFunction:
292                                 ASSERT(mainBlockId.value() == 0); // Multiple functions found
293                                 // Scan forward to find the function's label.
294                                 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
295                                 {
296                                         switch (it.opcode())
297                                         {
298                                         case spv::OpFunction:
299                                         case spv::OpFunctionParameter:
300                                                 break;
301                                         case spv::OpLabel:
302                                                 mainBlockId = Block::ID(it.word(1));
303                                                 break;
304                                         default:
305                                                 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
306                                         }
307                                 }
308                                 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
309                                 break;
310                         case spv::OpFunctionEnd:
311                                 // Due to preprocessing, the entrypoint and its function provide no value.
312                                 break;
313                         case spv::OpExtInstImport:
314                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
315                                 // Valid shaders will not attempt to import any other instruction sets.
316                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
317                                 {
318                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
319                                 }
320                                 break;
321                         case spv::OpName:
322                         case spv::OpMemberName:
323                         case spv::OpSource:
324                         case spv::OpSourceContinued:
325                         case spv::OpSourceExtension:
326                         case spv::OpLine:
327                         case spv::OpNoLine:
328                         case spv::OpModuleProcessed:
329                         case spv::OpString:
330                                 // No semantic impact
331                                 break;
332
333                         case spv::OpFunctionParameter:
334                         case spv::OpFunctionCall:
335                         case spv::OpSpecConstant:
336                         case spv::OpSpecConstantComposite:
337                         case spv::OpSpecConstantFalse:
338                         case spv::OpSpecConstantOp:
339                         case spv::OpSpecConstantTrue:
340                                 // These should have all been removed by preprocessing passes. If we see them here,
341                                 // our assumptions are wrong and we will probably generate wrong code.
342                                 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
343                                 break;
344
345                         case spv::OpFConvert:
346                         case spv::OpSConvert:
347                         case spv::OpUConvert:
348                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
349                                 break;
350
351                         case spv::OpLoad:
352                         case spv::OpAccessChain:
353                         case spv::OpInBoundsAccessChain:
354                         case spv::OpCompositeConstruct:
355                         case spv::OpCompositeInsert:
356                         case spv::OpCompositeExtract:
357                         case spv::OpVectorShuffle:
358                         case spv::OpVectorTimesScalar:
359                         case spv::OpMatrixTimesScalar:
360                         case spv::OpMatrixTimesVector:
361                         case spv::OpVectorTimesMatrix:
362                         case spv::OpMatrixTimesMatrix:
363                         case spv::OpVectorExtractDynamic:
364                         case spv::OpVectorInsertDynamic:
365                         case spv::OpNot: // Unary ops
366                         case spv::OpSNegate:
367                         case spv::OpFNegate:
368                         case spv::OpLogicalNot:
369                         case spv::OpIAdd: // Binary ops
370                         case spv::OpISub:
371                         case spv::OpIMul:
372                         case spv::OpSDiv:
373                         case spv::OpUDiv:
374                         case spv::OpFAdd:
375                         case spv::OpFSub:
376                         case spv::OpFMul:
377                         case spv::OpFDiv:
378                         case spv::OpFMod:
379                         case spv::OpFRem:
380                         case spv::OpFOrdEqual:
381                         case spv::OpFUnordEqual:
382                         case spv::OpFOrdNotEqual:
383                         case spv::OpFUnordNotEqual:
384                         case spv::OpFOrdLessThan:
385                         case spv::OpFUnordLessThan:
386                         case spv::OpFOrdGreaterThan:
387                         case spv::OpFUnordGreaterThan:
388                         case spv::OpFOrdLessThanEqual:
389                         case spv::OpFUnordLessThanEqual:
390                         case spv::OpFOrdGreaterThanEqual:
391                         case spv::OpFUnordGreaterThanEqual:
392                         case spv::OpSMod:
393                         case spv::OpSRem:
394                         case spv::OpUMod:
395                         case spv::OpIEqual:
396                         case spv::OpINotEqual:
397                         case spv::OpUGreaterThan:
398                         case spv::OpSGreaterThan:
399                         case spv::OpUGreaterThanEqual:
400                         case spv::OpSGreaterThanEqual:
401                         case spv::OpULessThan:
402                         case spv::OpSLessThan:
403                         case spv::OpULessThanEqual:
404                         case spv::OpSLessThanEqual:
405                         case spv::OpShiftRightLogical:
406                         case spv::OpShiftRightArithmetic:
407                         case spv::OpShiftLeftLogical:
408                         case spv::OpBitwiseOr:
409                         case spv::OpBitwiseXor:
410                         case spv::OpBitwiseAnd:
411                         case spv::OpLogicalOr:
412                         case spv::OpLogicalAnd:
413                         case spv::OpLogicalEqual:
414                         case spv::OpLogicalNotEqual:
415                         case spv::OpUMulExtended:
416                         case spv::OpSMulExtended:
417                         case spv::OpDot:
418                         case spv::OpConvertFToU:
419                         case spv::OpConvertFToS:
420                         case spv::OpConvertSToF:
421                         case spv::OpConvertUToF:
422                         case spv::OpBitcast:
423                         case spv::OpSelect:
424                         case spv::OpExtInst:
425                         case spv::OpIsInf:
426                         case spv::OpIsNan:
427                         case spv::OpAny:
428                         case spv::OpAll:
429                         case spv::OpDPdx:
430                         case spv::OpDPdxCoarse:
431                         case spv::OpDPdy:
432                         case spv::OpDPdyCoarse:
433                         case spv::OpFwidth:
434                         case spv::OpFwidthCoarse:
435                         case spv::OpDPdxFine:
436                         case spv::OpDPdyFine:
437                         case spv::OpFwidthFine:
438                         case spv::OpAtomicLoad:
439                         case spv::OpPhi:
440                                 // Instructions that yield an intermediate value
441                         {
442                                 Type::ID typeId = insn.word(1);
443                                 Object::ID resultId = insn.word(2);
444                                 auto &object = defs[resultId];
445                                 object.type = typeId;
446                                 object.kind = Object::Kind::Value;
447                                 object.definition = insn;
448
449                                 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
450                                 {
451                                         // interior ptr has two parts:
452                                         // - logical base ptr, common across all lanes and known at compile time
453                                         // - per-lane offset
454                                         Object::ID baseId = insn.word(3);
455                                         object.pointerBase = getObject(baseId).pointerBase;
456                                 }
457                                 break;
458                         }
459
460                         case spv::OpStore:
461                         case spv::OpAtomicStore:
462                                 // Don't need to do anything during analysis pass
463                                 break;
464
465                         default:
466                                 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
467                         }
468                 }
469
470                 AssignBlockIns();
471         }
472
473         void SpirvShader::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable)
474         {
475                 if (reachable.count(id) == 0)
476                 {
477                         reachable.emplace(id);
478                         for (auto out : getBlock(id).outs)
479                         {
480                                 TraverseReachableBlocks(out, reachable);
481                         }
482                 }
483         }
484
485         void SpirvShader::AssignBlockIns()
486         {
487                 Block::Set reachable;
488                 TraverseReachableBlocks(mainBlockId, reachable);
489
490                 for (auto &it : blocks)
491                 {
492                         auto &blockId = it.first;
493                         if (reachable.count(blockId) > 0)
494                         {
495                                 for (auto &outId : it.second.outs)
496                                 {
497                                         auto outIt = blocks.find(outId);
498                                         ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
499                                         auto &out = outIt->second;
500                                         out.ins.emplace(blockId);
501                                 }
502                         }
503                 }
504         }
505
506         void SpirvShader::DeclareType(InsnIterator insn)
507         {
508                 Type::ID resultId = insn.word(1);
509
510                 auto &type = types[resultId];
511                 type.definition = insn;
512                 type.sizeInComponents = ComputeTypeSize(insn);
513
514                 // A structure is a builtin block if it has a builtin
515                 // member. All members of such a structure are builtins.
516                 switch (insn.opcode())
517                 {
518                 case spv::OpTypeStruct:
519                 {
520                         auto d = memberDecorations.find(resultId);
521                         if (d != memberDecorations.end())
522                         {
523                                 for (auto &m : d->second)
524                                 {
525                                         if (m.HasBuiltIn)
526                                         {
527                                                 type.isBuiltInBlock = true;
528                                                 break;
529                                         }
530                                 }
531                         }
532                         break;
533                 }
534                 case spv::OpTypePointer:
535                 {
536                         Type::ID elementTypeId = insn.word(3);
537                         type.element = elementTypeId;
538                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
539                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
540                         break;
541                 }
542                 case spv::OpTypeVector:
543                 case spv::OpTypeMatrix:
544                 case spv::OpTypeArray:
545                 case spv::OpTypeRuntimeArray:
546                 {
547                         Type::ID elementTypeId = insn.word(2);
548                         type.element = elementTypeId;
549                         break;
550                 }
551                 default:
552                         break;
553                 }
554         }
555
556         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
557         {
558                 Type::ID typeId = insn.word(1);
559                 Object::ID resultId = insn.word(2);
560                 auto &object = defs[resultId];
561                 auto &objectTy = getType(typeId);
562                 object.type = typeId;
563                 object.kind = Object::Kind::Constant;
564                 object.definition = insn;
565                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
566                 return object;
567         }
568
569         void SpirvShader::ProcessInterfaceVariable(Object &object)
570         {
571                 auto &objectTy = getType(object.type);
572                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
573
574                 ASSERT(objectTy.opcode() == spv::OpTypePointer);
575                 auto pointeeTy = getType(objectTy.element);
576
577                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
578                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
579
580                 ASSERT(object.opcode() == spv::OpVariable);
581                 Object::ID resultId = object.definition.word(2);
582
583                 if (objectTy.isBuiltInBlock)
584                 {
585                         // walk the builtin block, registering each of its members separately.
586                         auto m = memberDecorations.find(objectTy.element);
587                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
588                         auto &structType = pointeeTy.definition;
589                         auto offset = 0u;
590                         auto word = 2u;
591                         for (auto &member : m->second)
592                         {
593                                 auto &memberType = getType(structType.word(word));
594
595                                 if (member.HasBuiltIn)
596                                 {
597                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
598                                 }
599
600                                 offset += memberType.sizeInComponents;
601                                 ++word;
602                         }
603                         return;
604                 }
605
606                 auto d = decorations.find(resultId);
607                 if (d != decorations.end() && d->second.HasBuiltIn)
608                 {
609                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
610                 }
611                 else
612                 {
613                         object.kind = Object::Kind::InterfaceVariable;
614                         VisitInterface(resultId,
615                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
616                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
617                                                            auto scalarSlot = (d.Location << 2) | d.Component;
618                                                            ASSERT(scalarSlot >= 0 &&
619                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
620
621                                                            auto &slot = userDefinedInterface[scalarSlot];
622                                                            slot.Type = type;
623                                                            slot.Flat = d.Flat;
624                                                            slot.NoPerspective = d.NoPerspective;
625                                                            slot.Centroid = d.Centroid;
626                                                    });
627                 }
628         }
629
630         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
631         {
632                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
633                 switch (mode)
634                 {
635                 case spv::ExecutionModeEarlyFragmentTests:
636                         modes.EarlyFragmentTests = true;
637                         break;
638                 case spv::ExecutionModeDepthReplacing:
639                         modes.DepthReplacing = true;
640                         break;
641                 case spv::ExecutionModeDepthGreater:
642                         modes.DepthGreater = true;
643                         break;
644                 case spv::ExecutionModeDepthLess:
645                         modes.DepthLess = true;
646                         break;
647                 case spv::ExecutionModeDepthUnchanged:
648                         modes.DepthUnchanged = true;
649                         break;
650                 case spv::ExecutionModeLocalSize:
651                         modes.WorkgroupSizeX = insn.word(3);
652                         modes.WorkgroupSizeY = insn.word(4);
653                         modes.WorkgroupSizeZ = insn.word(5);
654                         break;
655                 case spv::ExecutionModeOriginUpperLeft:
656                         // This is always the case for a Vulkan shader. Do nothing.
657                         break;
658                 default:
659                         UNIMPLEMENTED("No other execution modes are permitted");
660                 }
661         }
662
663         uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
664         {
665                 // Types are always built from the bottom up (with the exception of forward ptrs, which
666                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
667                 // already been described (and so their sizes determined)
668                 switch (insn.opcode())
669                 {
670                 case spv::OpTypeVoid:
671                 case spv::OpTypeSampler:
672                 case spv::OpTypeImage:
673                 case spv::OpTypeSampledImage:
674                 case spv::OpTypeFunction:
675                 case spv::OpTypeRuntimeArray:
676                         // Objects that don't consume any space.
677                         // Descriptor-backed objects currently only need exist at compile-time.
678                         // Runtime arrays don't appear in places where their size would be interesting
679                         return 0;
680
681                 case spv::OpTypeBool:
682                 case spv::OpTypeFloat:
683                 case spv::OpTypeInt:
684                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
685                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
686                         return 1;
687
688                 case spv::OpTypeVector:
689                 case spv::OpTypeMatrix:
690                         // Vectors and matrices both consume element count * element size.
691                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
692
693                 case spv::OpTypeArray:
694                 {
695                         // Element count * element size. Array sizes come from constant ids.
696                         auto arraySize = GetConstantInt(insn.word(3));
697                         return getType(insn.word(2)).sizeInComponents * arraySize;
698                 }
699
700                 case spv::OpTypeStruct:
701                 {
702                         uint32_t size = 0;
703                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
704                         {
705                                 size += getType(insn.word(i)).sizeInComponents;
706                         }
707                         return size;
708                 }
709
710                 case spv::OpTypePointer:
711                         // Runtime representation of a pointer is a per-lane index.
712                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
713                         return 1;
714
715                 default:
716                         // Some other random insn.
717                         UNIMPLEMENTED("Only types are supported");
718                         return 0;
719                 }
720         }
721
722         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
723         {
724                 switch (storageClass)
725                 {
726                 case spv::StorageClassUniform:
727                 case spv::StorageClassStorageBuffer:
728                 case spv::StorageClassPushConstant:
729                         return false;
730                 default:
731                         return true;
732                 }
733         }
734
735         template<typename F>
736         int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
737         {
738                 // Recursively walks variable definition and its type tree, taking into account
739                 // any explicit Location or Component decorations encountered; where explicit
740                 // Locations or Components are not specified, assigns them sequentially.
741                 // Collected decorations are carried down toward the leaves and across
742                 // siblings; Effect of decorations intentionally does not flow back up the tree.
743                 //
744                 // F is a functor to be called with the effective decoration set for every component.
745                 //
746                 // Returns the next available location, and calls f().
747
748                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
749
750                 ApplyDecorationsForId(&d, id);
751
752                 auto const &obj = getType(id);
753                 switch(obj.opcode())
754                 {
755                 case spv::OpTypePointer:
756                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
757                 case spv::OpTypeMatrix:
758                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
759                         {
760                                 // consumes same components of N consecutive locations
761                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
762                         }
763                         return d.Location;
764                 case spv::OpTypeVector:
765                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
766                         {
767                                 // consumes N consecutive components in the same location
768                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
769                         }
770                         return d.Location + 1;
771                 case spv::OpTypeFloat:
772                         f(d, ATTRIBTYPE_FLOAT);
773                         return d.Location + 1;
774                 case spv::OpTypeInt:
775                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
776                         return d.Location + 1;
777                 case spv::OpTypeBool:
778                         f(d, ATTRIBTYPE_UINT);
779                         return d.Location + 1;
780                 case spv::OpTypeStruct:
781                 {
782                         // iterate over members, which may themselves have Location/Component decorations
783                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
784                         {
785                                 ApplyDecorationsForIdMember(&d, id, i);
786                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
787                                 d.Component = 0;    // Implicit locations always have component=0
788                         }
789                         return d.Location;
790                 }
791                 case spv::OpTypeArray:
792                 {
793                         auto arraySize = GetConstantInt(obj.definition.word(3));
794                         for (auto i = 0u; i < arraySize; i++)
795                         {
796                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
797                         }
798                         return d.Location;
799                 }
800                 default:
801                         // Intentionally partial; most opcodes do not participate in type hierarchies
802                         return 0;
803                 }
804         }
805
806         template<typename F>
807         void SpirvShader::VisitInterface(Object::ID id, F f) const
808         {
809                 // Walk a variable definition and call f for each component in it.
810                 Decorations d{};
811                 ApplyDecorationsForId(&d, id);
812
813                 auto def = getObject(id).definition;
814                 ASSERT(def.opcode() == spv::OpVariable);
815                 VisitInterfaceInner<F>(def.word(1), d, f);
816         }
817
818         SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
819         {
820                 // Produce a offset into external memory in sizeof(float) units
821
822                 int constantOffset = 0;
823                 SIMD::Int dynamicOffset = SIMD::Int(0);
824                 auto &baseObject = getObject(id);
825                 Type::ID typeId = getType(baseObject.type).element;
826                 Decorations d{};
827                 ApplyDecorationsForId(&d, baseObject.type);
828
829                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
830                 // Start with its offset and build from there.
831                 if (baseObject.kind == Object::Kind::Value)
832                 {
833                         dynamicOffset += routine->getIntermediate(id).Int(0);
834                 }
835
836                 for (auto i = 0u; i < numIndexes; i++)
837                 {
838                         auto & type = getType(typeId);
839                         switch (type.definition.opcode())
840                         {
841                         case spv::OpTypeStruct:
842                         {
843                                 int memberIndex = GetConstantInt(indexIds[i]);
844                                 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
845                                 ASSERT(d.HasOffset);
846                                 constantOffset += d.Offset / sizeof(float);
847                                 typeId = type.definition.word(2u + memberIndex);
848                                 break;
849                         }
850                         case spv::OpTypeArray:
851                         case spv::OpTypeRuntimeArray:
852                         {
853                                 // TODO: b/127950082: Check bounds.
854                                 ApplyDecorationsForId(&d, typeId);
855                                 ASSERT(d.HasArrayStride);
856                                 auto & obj = getObject(indexIds[i]);
857                                 if (obj.kind == Object::Kind::Constant)
858                                         constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
859                                 else
860                                         dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
861                                 typeId = type.element;
862                                 break;
863                         }
864                         case spv::OpTypeMatrix:
865                         {
866                                 // TODO: b/127950082: Check bounds.
867                                 ApplyDecorationsForId(&d, typeId);
868                                 ASSERT(d.HasMatrixStride);
869                                 auto & obj = getObject(indexIds[i]);
870                                 if (obj.kind == Object::Kind::Constant)
871                                         constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
872                                 else
873                                         dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
874                                 typeId = type.element;
875                                 break;
876                         }
877                         case spv::OpTypeVector:
878                         {
879                                 auto & obj = getObject(indexIds[i]);
880                                 if (obj.kind == Object::Kind::Constant)
881                                         constantOffset += GetConstantInt(indexIds[i]);
882                                 else
883                                         dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
884                                 typeId = type.element;
885                                 break;
886                         }
887                         default:
888                                 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
889                         }
890                 }
891
892                 return dynamicOffset + SIMD::Int(constantOffset);
893         }
894
895         SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
896         {
897                 // TODO: avoid doing per-lane work in some cases if we can?
898                 // Produce a *component* offset into location-oriented memory
899
900                 int constantOffset = 0;
901                 SIMD::Int dynamicOffset = SIMD::Int(0);
902                 auto &baseObject = getObject(id);
903                 Type::ID typeId = getType(baseObject.type).element;
904
905                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
906                 // Start with its offset and build from there.
907                 if (baseObject.kind == Object::Kind::Value)
908                 {
909                         dynamicOffset += routine->getIntermediate(id).Int(0);
910                 }
911
912                 for (auto i = 0u; i < numIndexes; i++)
913                 {
914                         auto & type = getType(typeId);
915                         switch(type.opcode())
916                         {
917                         case spv::OpTypeStruct:
918                         {
919                                 int memberIndex = GetConstantInt(indexIds[i]);
920                                 int offsetIntoStruct = 0;
921                                 for (auto j = 0; j < memberIndex; j++) {
922                                         auto memberType = type.definition.word(2u + j);
923                                         offsetIntoStruct += getType(memberType).sizeInComponents;
924                                 }
925                                 constantOffset += offsetIntoStruct;
926                                 typeId = type.definition.word(2u + memberIndex);
927                                 break;
928                         }
929
930                         case spv::OpTypeVector:
931                         case spv::OpTypeMatrix:
932                         case spv::OpTypeArray:
933                         case spv::OpTypeRuntimeArray:
934                         {
935                                 // TODO: b/127950082: Check bounds.
936                                 auto stride = getType(type.element).sizeInComponents;
937                                 auto & obj = getObject(indexIds[i]);
938                                 if (obj.kind == Object::Kind::Constant)
939                                         constantOffset += stride * GetConstantInt(indexIds[i]);
940                                 else
941                                         dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
942                                 typeId = type.element;
943                                 break;
944                         }
945
946                         default:
947                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
948                         }
949                 }
950
951                 return dynamicOffset + SIMD::Int(constantOffset);
952         }
953
954         uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
955         {
956                 uint32_t constantOffset = 0;
957
958                 for (auto i = 0u; i < numIndexes; i++)
959                 {
960                         auto & type = getType(typeId);
961                         switch(type.opcode())
962                         {
963                         case spv::OpTypeStruct:
964                         {
965                                 int memberIndex = indexes[i];
966                                 int offsetIntoStruct = 0;
967                                 for (auto j = 0; j < memberIndex; j++) {
968                                         auto memberType = type.definition.word(2u + j);
969                                         offsetIntoStruct += getType(memberType).sizeInComponents;
970                                 }
971                                 constantOffset += offsetIntoStruct;
972                                 typeId = type.definition.word(2u + memberIndex);
973                                 break;
974                         }
975
976                         case spv::OpTypeVector:
977                         case spv::OpTypeMatrix:
978                         case spv::OpTypeArray:
979                         {
980                                 auto elementType = type.definition.word(2);
981                                 auto stride = getType(elementType).sizeInComponents;
982                                 constantOffset += stride * indexes[i];
983                                 typeId = elementType;
984                                 break;
985                         }
986
987                         default:
988                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
989                         }
990                 }
991
992                 return constantOffset;
993         }
994
995         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
996         {
997                 switch (decoration)
998                 {
999                 case spv::DecorationLocation:
1000                         HasLocation = true;
1001                         Location = static_cast<int32_t>(arg);
1002                         break;
1003                 case spv::DecorationComponent:
1004                         HasComponent = true;
1005                         Component = arg;
1006                         break;
1007                 case spv::DecorationDescriptorSet:
1008                         HasDescriptorSet = true;
1009                         DescriptorSet = arg;
1010                         break;
1011                 case spv::DecorationBinding:
1012                         HasBinding = true;
1013                         Binding = arg;
1014                         break;
1015                 case spv::DecorationBuiltIn:
1016                         HasBuiltIn = true;
1017                         BuiltIn = static_cast<spv::BuiltIn>(arg);
1018                         break;
1019                 case spv::DecorationFlat:
1020                         Flat = true;
1021                         break;
1022                 case spv::DecorationNoPerspective:
1023                         NoPerspective = true;
1024                         break;
1025                 case spv::DecorationCentroid:
1026                         Centroid = true;
1027                         break;
1028                 case spv::DecorationBlock:
1029                         Block = true;
1030                         break;
1031                 case spv::DecorationBufferBlock:
1032                         BufferBlock = true;
1033                         break;
1034                 case spv::DecorationOffset:
1035                         HasOffset = true;
1036                         Offset = static_cast<int32_t>(arg);
1037                         break;
1038                 case spv::DecorationArrayStride:
1039                         HasArrayStride = true;
1040                         ArrayStride = static_cast<int32_t>(arg);
1041                         break;
1042                 case spv::DecorationMatrixStride:
1043                         HasMatrixStride = true;
1044                         MatrixStride = static_cast<int32_t>(arg);
1045                         break;
1046                 default:
1047                         // Intentionally partial, there are many decorations we just don't care about.
1048                         break;
1049                 }
1050         }
1051
1052         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1053         {
1054                 // Apply a decoration group to this set of decorations
1055                 if (src.HasBuiltIn)
1056                 {
1057                         HasBuiltIn = true;
1058                         BuiltIn = src.BuiltIn;
1059                 }
1060
1061                 if (src.HasLocation)
1062                 {
1063                         HasLocation = true;
1064                         Location = src.Location;
1065                 }
1066
1067                 if (src.HasComponent)
1068                 {
1069                         HasComponent = true;
1070                         Component = src.Component;
1071                 }
1072
1073                 if (src.HasDescriptorSet)
1074                 {
1075                         HasDescriptorSet = true;
1076                         DescriptorSet = src.DescriptorSet;
1077                 }
1078
1079                 if (src.HasBinding)
1080                 {
1081                         HasBinding = true;
1082                         Binding = src.Binding;
1083                 }
1084
1085                 if (src.HasOffset)
1086                 {
1087                         HasOffset = true;
1088                         Offset = src.Offset;
1089                 }
1090
1091                 if (src.HasArrayStride)
1092                 {
1093                         HasArrayStride = true;
1094                         ArrayStride = src.ArrayStride;
1095                 }
1096
1097                 if (src.HasMatrixStride)
1098                 {
1099                         HasMatrixStride = true;
1100                         MatrixStride = src.MatrixStride;
1101                 }
1102
1103                 Flat |= src.Flat;
1104                 NoPerspective |= src.NoPerspective;
1105                 Centroid |= src.Centroid;
1106                 Block |= src.Block;
1107                 BufferBlock |= src.BufferBlock;
1108         }
1109
1110         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1111         {
1112                 auto it = decorations.find(id);
1113                 if (it != decorations.end())
1114                         d->Apply(it->second);
1115         }
1116
1117         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1118         {
1119                 auto it = memberDecorations.find(id);
1120                 if (it != memberDecorations.end() && member < it->second.size())
1121                 {
1122                         d->Apply(it->second[member]);
1123                 }
1124         }
1125
1126         uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1127         {
1128                 // Slightly hackish access to constants very early in translation.
1129                 // General consumption of constants by other instructions should
1130                 // probably be just lowered to Reactor.
1131
1132                 // TODO: not encountered yet since we only use this for array sizes etc,
1133                 // but is possible to construct integer constant 0 via OpConstantNull.
1134                 auto insn = getObject(id).definition;
1135                 ASSERT(insn.opcode() == spv::OpConstant);
1136                 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1137                 return insn.word(3);
1138         }
1139
1140         // emit-time
1141
1142         void SpirvShader::emitProlog(SpirvRoutine *routine) const
1143         {
1144                 for (auto insn : *this)
1145                 {
1146                         switch (insn.opcode())
1147                         {
1148                         case spv::OpVariable:
1149                         {
1150                                 Type::ID resultPointerTypeId = insn.word(1);
1151                                 auto resultPointerType = getType(resultPointerTypeId);
1152                                 auto pointeeType = getType(resultPointerType.element);
1153
1154                                 if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
1155                                 {
1156                                         Object::ID resultId = insn.word(2);
1157                                         routine->createLvalue(resultId, pointeeType.sizeInComponents);
1158                                 }
1159                                 break;
1160                         }
1161                         default:
1162                                 // Nothing else produces interface variables, so can all be safely ignored.
1163                                 break;
1164                         }
1165                 }
1166         }
1167
1168         void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1169         {
1170                 EmitState state;
1171                 state.setActiveLaneMask(activeLaneMask);
1172                 state.routine = routine;
1173
1174                 // Emit everything up to the first label
1175                 // TODO: Separate out dispatch of block from non-block instructions?
1176                 for (auto insn : *this)
1177                 {
1178                         if (insn.opcode() == spv::OpLabel)
1179                         {
1180                                 break;
1181                         }
1182                         EmitInstruction(insn, &state);
1183                 }
1184
1185                 // Emit all the blocks starting from mainBlockId.
1186                 EmitBlocks(mainBlockId, &state);
1187         }
1188
1189         void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
1190         {
1191                 auto oldPending = state->pending;
1192
1193                 std::queue<Block::ID> pending;
1194                 state->pending = &pending;
1195                 pending.push(id);
1196                 while (pending.size() > 0)
1197                 {
1198                         auto id = pending.front();
1199                         pending.pop();
1200
1201                         auto const &block = getBlock(id);
1202                         if (id == ignore)
1203                         {
1204                                 continue;
1205                         }
1206
1207                         state->currentBlock = id;
1208
1209                         switch (block.kind)
1210                         {
1211                                 case Block::Simple:
1212                                 case Block::StructuredBranchConditional:
1213                                 case Block::UnstructuredBranchConditional:
1214                                 case Block::StructuredSwitch:
1215                                 case Block::UnstructuredSwitch:
1216                                         EmitNonLoop(state);
1217                                         break;
1218
1219                                 case Block::Loop:
1220                                         EmitLoop(state);
1221                                         break;
1222
1223                                 default:
1224                                         UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
1225                         }
1226                 }
1227
1228                 state->pending = oldPending;
1229         }
1230
1231         void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1232         {
1233                 for (auto insn = begin; insn != end; insn++)
1234                 {
1235                         auto res = EmitInstruction(insn, state);
1236                         switch (res)
1237                         {
1238                         case EmitResult::Continue:
1239                                 continue;
1240                         case EmitResult::Terminator:
1241                                 break;
1242                         default:
1243                                 UNREACHABLE("Unexpected EmitResult %d", int(res));
1244                                 break;
1245                         }
1246                 }
1247         }
1248
1249         void SpirvShader::EmitNonLoop(EmitState *state) const
1250         {
1251                 auto blockId = state->currentBlock;
1252                 auto block = getBlock(blockId);
1253
1254                 // Ensure all incoming blocks have been generated.
1255                 auto depsDone = true;
1256                 for (auto in : block.ins)
1257                 {
1258                         if (state->visited.count(in) == 0)
1259                         {
1260                                 state->pending->emplace(in);
1261                                 depsDone = false;
1262                         }
1263                 }
1264
1265                 if (!depsDone)
1266                 {
1267                         // come back to this once the dependencies have been generated
1268                         state->pending->emplace(blockId);
1269                         return;
1270                 }
1271
1272                 if (!state->visited.emplace(blockId).second)
1273                 {
1274                         return; // Already generated this block.
1275                 }
1276
1277                 if (blockId != mainBlockId)
1278                 {
1279                         // Set the activeLaneMask.
1280                         Intermediate activeLaneMask(1);
1281                         activeLaneMask.move(0, SIMD::Int(0));
1282                         for (auto in : block.ins)
1283                         {
1284                                 auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
1285                                 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1286                         }
1287                         state->setActiveLaneMask(activeLaneMask.Int(0));
1288                 }
1289
1290                 EmitInstructions(block.begin(), block.end(), state);
1291
1292                 for (auto out : block.outs)
1293                 {
1294                         state->pending->emplace(out);
1295                 }
1296         }
1297
1298         void SpirvShader::EmitLoop(EmitState *state) const
1299         {
1300                 auto blockId = state->currentBlock;
1301                 auto block = getBlock(blockId);
1302
1303                 // Ensure all incoming non-back edge blocks have been generated.
1304                 auto depsDone = true;
1305                 for (auto in : block.ins)
1306                 {
1307                         if (state->visited.count(in) == 0)
1308                         {
1309                                 if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
1310                                 {
1311                                         state->pending->emplace(in);
1312                                         depsDone = false;
1313                                 }
1314                         }
1315                 }
1316
1317                 if (!depsDone)
1318                 {
1319                         // come back to this once the dependencies have been generated
1320                         state->pending->emplace(blockId);
1321                         return;
1322                 }
1323
1324                 if (!state->visited.emplace(blockId).second)
1325                 {
1326                         return; // Already emitted this loop.
1327                 }
1328
1329                 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
1330                 // This is initialized with the incoming active lane masks.
1331                 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
1332                 for (auto in : block.ins)
1333                 {
1334                         if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
1335                         {
1336                                 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
1337                         }
1338                 }
1339
1340                 // Generate an alloca for each of the loop's phis.
1341                 // These will be primed with the incoming, non back edge Phi values
1342                 // before the loop, and then updated just before the loop jumps back to
1343                 // the block.
1344                 struct LoopPhi
1345                 {
1346                         Object::ID phiId; // The Phi identifier.
1347                         Object::ID continueValue; // The source merge value from the loop.
1348                         Array<SIMD::Int> storage; // The alloca.
1349                 };
1350
1351                 std::vector<LoopPhi> phis;
1352
1353                 // For each OpPhi between the block start and the merge instruction:
1354                 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
1355                 {
1356                         if (insn.opcode() == spv::OpPhi)
1357                         {
1358                                 auto objectId = Object::ID(insn.word(2));
1359                                 auto &object = getObject(objectId);
1360                                 auto &type = getType(object.type);
1361
1362                                 LoopPhi phi;
1363                                 phi.phiId = Object::ID(insn.word(2));
1364                                 phi.storage = Array<SIMD::Int>(type.sizeInComponents);
1365
1366                                 // Start with the Phi set to 0.
1367                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1368                                 {
1369                                         phi.storage[i] = SIMD::Int(0);
1370                                 }
1371
1372                                 // For each Phi source:
1373                                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
1374                                 {
1375                                         auto varId = Object::ID(insn.word(w + 0));
1376                                         auto blockId = Block::ID(insn.word(w + 1));
1377                                         if (existsPath(state->currentBlock, blockId, block.mergeBlock))
1378                                         {
1379                                                 // This source is from a loop back-edge.
1380                                                 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
1381                                                 phi.continueValue = varId;
1382                                         }
1383                                         else
1384                                         {
1385                                                 // This source is from a preceding block.
1386                                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1387                                                 {
1388                                                         auto in = GenericValue(this, state->routine, varId);
1389                                                         auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
1390                                                         phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
1391                                                 }
1392                                         }
1393                                 }
1394
1395                                 phis.push_back(phi);
1396                         }
1397                 }
1398
1399                 // Create the loop basic blocks
1400                 auto headerBasicBlock = Nucleus::createBasicBlock();
1401                 auto mergeBasicBlock = Nucleus::createBasicBlock();
1402
1403                 // Start emitting code inside the loop.
1404                 Nucleus::createBr(headerBasicBlock);
1405                 Nucleus::setInsertBlock(headerBasicBlock);
1406
1407                 // Load the Phi values from storage.
1408                 // This will load at the start of each loop.
1409                 for (auto &phi : phis)
1410                 {
1411                         auto &type = getType(getObject(phi.phiId).type);
1412                         auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
1413                         for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1414                         {
1415                                 dst.move(i, phi.storage[i]);
1416                         }
1417                 }
1418
1419                 // Load the active lane mask.
1420                 state->setActiveLaneMask(loopActiveLaneMask);
1421
1422                 // Emit all the non-phi instructions in this loop header block.
1423                 for (auto insn = block.begin(); insn != block.end(); insn++)
1424                 {
1425                         if (insn.opcode() != spv::OpPhi)
1426                         {
1427                                 EmitInstruction(insn, state);
1428                         }
1429                 }
1430
1431                 // Emit all loop blocks, but don't emit the merge block yet.
1432                 for (auto out : block.outs)
1433                 {
1434                         if (existsPath(out, blockId, block.mergeBlock))
1435                         {
1436                                 EmitBlocks(out, state, block.mergeBlock);
1437                         }
1438                 }
1439
1440                 // Rebuild the loopActiveLaneMask from the loop back edges.
1441                 loopActiveLaneMask = SIMD::Int(0);
1442                 for (auto in : block.ins)
1443                 {
1444                         if (existsPath(blockId, in, block.mergeBlock))
1445                         {
1446                                 loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
1447                         }
1448                 }
1449
1450                 // Update loop phi values
1451                 for (auto &phi : phis)
1452                 {
1453                         if (phi.continueValue != 0)
1454                         {
1455                                 auto val = GenericValue(this, state->routine, phi.continueValue);
1456                                 auto &type = getType(getObject(phi.phiId).type);
1457                                 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1458                                 {
1459                                         phi.storage[i] = val.Int(i);
1460                                 }
1461                         }
1462                 }
1463
1464                 // Loop body now done.
1465                 // If any lanes are still active, jump back to the loop header,
1466                 // otherwise jump to the merge block.
1467                 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
1468
1469                 // Continue emitting from the merge block.
1470                 Nucleus::setInsertBlock(mergeBasicBlock);
1471                 state->pending->emplace(block.mergeBlock);
1472         }
1473
1474         SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1475         {
1476                 switch (insn.opcode())
1477                 {
1478                 case spv::OpTypeVoid:
1479                 case spv::OpTypeInt:
1480                 case spv::OpTypeFloat:
1481                 case spv::OpTypeBool:
1482                 case spv::OpTypeVector:
1483                 case spv::OpTypeArray:
1484                 case spv::OpTypeRuntimeArray:
1485                 case spv::OpTypeMatrix:
1486                 case spv::OpTypeStruct:
1487                 case spv::OpTypePointer:
1488                 case spv::OpTypeFunction:
1489                 case spv::OpExecutionMode:
1490                 case spv::OpMemoryModel:
1491                 case spv::OpFunction:
1492                 case spv::OpFunctionEnd:
1493                 case spv::OpConstant:
1494                 case spv::OpConstantNull:
1495                 case spv::OpConstantTrue:
1496                 case spv::OpConstantFalse:
1497                 case spv::OpConstantComposite:
1498                 case spv::OpUndef:
1499                 case spv::OpExtension:
1500                 case spv::OpCapability:
1501                 case spv::OpEntryPoint:
1502                 case spv::OpExtInstImport:
1503                 case spv::OpDecorate:
1504                 case spv::OpMemberDecorate:
1505                 case spv::OpGroupDecorate:
1506                 case spv::OpGroupMemberDecorate:
1507                 case spv::OpDecorationGroup:
1508                 case spv::OpName:
1509                 case spv::OpMemberName:
1510                 case spv::OpSource:
1511                 case spv::OpSourceContinued:
1512                 case spv::OpSourceExtension:
1513                 case spv::OpLine:
1514                 case spv::OpNoLine:
1515                 case spv::OpModuleProcessed:
1516                 case spv::OpString:
1517                         // Nothing to do at emit time. These are either fully handled at analysis time,
1518                         // or don't require any work at all.
1519                         return EmitResult::Continue;
1520
1521                 case spv::OpLabel:
1522                         return EmitResult::Continue;
1523
1524                 case spv::OpVariable:
1525                         return EmitVariable(insn, state);
1526
1527                 case spv::OpLoad:
1528                 case spv::OpAtomicLoad:
1529                         return EmitLoad(insn, state);
1530
1531                 case spv::OpStore:
1532                 case spv::OpAtomicStore:
1533                         return EmitStore(insn, state);
1534
1535                 case spv::OpAccessChain:
1536                 case spv::OpInBoundsAccessChain:
1537                         return EmitAccessChain(insn, state);
1538
1539                 case spv::OpCompositeConstruct:
1540                         return EmitCompositeConstruct(insn, state);
1541
1542                 case spv::OpCompositeInsert:
1543                         return EmitCompositeInsert(insn, state);
1544
1545                 case spv::OpCompositeExtract:
1546                         return EmitCompositeExtract(insn, state);
1547
1548                 case spv::OpVectorShuffle:
1549                         return EmitVectorShuffle(insn, state);
1550
1551                 case spv::OpVectorExtractDynamic:
1552                         return EmitVectorExtractDynamic(insn, state);
1553
1554                 case spv::OpVectorInsertDynamic:
1555                         return EmitVectorInsertDynamic(insn, state);
1556
1557                 case spv::OpVectorTimesScalar:
1558                 case spv::OpMatrixTimesScalar:
1559                         return EmitVectorTimesScalar(insn, state);
1560
1561                 case spv::OpMatrixTimesVector:
1562                         return EmitMatrixTimesVector(insn, state);
1563
1564                 case spv::OpVectorTimesMatrix:
1565                         return EmitVectorTimesMatrix(insn, state);
1566
1567                 case spv::OpMatrixTimesMatrix:
1568                         return EmitMatrixTimesMatrix(insn, state);
1569
1570                 case spv::OpNot:
1571                 case spv::OpSNegate:
1572                 case spv::OpFNegate:
1573                 case spv::OpLogicalNot:
1574                 case spv::OpConvertFToU:
1575                 case spv::OpConvertFToS:
1576                 case spv::OpConvertSToF:
1577                 case spv::OpConvertUToF:
1578                 case spv::OpBitcast:
1579                 case spv::OpIsInf:
1580                 case spv::OpIsNan:
1581                 case spv::OpDPdx:
1582                 case spv::OpDPdxCoarse:
1583                 case spv::OpDPdy:
1584                 case spv::OpDPdyCoarse:
1585                 case spv::OpFwidth:
1586                 case spv::OpFwidthCoarse:
1587                 case spv::OpDPdxFine:
1588                 case spv::OpDPdyFine:
1589                 case spv::OpFwidthFine:
1590                         return EmitUnaryOp(insn, state);
1591
1592                 case spv::OpIAdd:
1593                 case spv::OpISub:
1594                 case spv::OpIMul:
1595                 case spv::OpSDiv:
1596                 case spv::OpUDiv:
1597                 case spv::OpFAdd:
1598                 case spv::OpFSub:
1599                 case spv::OpFMul:
1600                 case spv::OpFDiv:
1601                 case spv::OpFMod:
1602                 case spv::OpFRem:
1603                 case spv::OpFOrdEqual:
1604                 case spv::OpFUnordEqual:
1605                 case spv::OpFOrdNotEqual:
1606                 case spv::OpFUnordNotEqual:
1607                 case spv::OpFOrdLessThan:
1608                 case spv::OpFUnordLessThan:
1609                 case spv::OpFOrdGreaterThan:
1610                 case spv::OpFUnordGreaterThan:
1611                 case spv::OpFOrdLessThanEqual:
1612                 case spv::OpFUnordLessThanEqual:
1613                 case spv::OpFOrdGreaterThanEqual:
1614                 case spv::OpFUnordGreaterThanEqual:
1615                 case spv::OpSMod:
1616                 case spv::OpSRem:
1617                 case spv::OpUMod:
1618                 case spv::OpIEqual:
1619                 case spv::OpINotEqual:
1620                 case spv::OpUGreaterThan:
1621                 case spv::OpSGreaterThan:
1622                 case spv::OpUGreaterThanEqual:
1623                 case spv::OpSGreaterThanEqual:
1624                 case spv::OpULessThan:
1625                 case spv::OpSLessThan:
1626                 case spv::OpULessThanEqual:
1627                 case spv::OpSLessThanEqual:
1628                 case spv::OpShiftRightLogical:
1629                 case spv::OpShiftRightArithmetic:
1630                 case spv::OpShiftLeftLogical:
1631                 case spv::OpBitwiseOr:
1632                 case spv::OpBitwiseXor:
1633                 case spv::OpBitwiseAnd:
1634                 case spv::OpLogicalOr:
1635                 case spv::OpLogicalAnd:
1636                 case spv::OpLogicalEqual:
1637                 case spv::OpLogicalNotEqual:
1638                 case spv::OpUMulExtended:
1639                 case spv::OpSMulExtended:
1640                         return EmitBinaryOp(insn, state);
1641
1642                 case spv::OpDot:
1643                         return EmitDot(insn, state);
1644
1645                 case spv::OpSelect:
1646                         return EmitSelect(insn, state);
1647
1648                 case spv::OpExtInst:
1649                         return EmitExtendedInstruction(insn, state);
1650
1651                 case spv::OpAny:
1652                         return EmitAny(insn, state);
1653
1654                 case spv::OpAll:
1655                         return EmitAll(insn, state);
1656
1657                 case spv::OpBranch:
1658                         return EmitBranch(insn, state);
1659
1660                 case spv::OpPhi:
1661                         return EmitPhi(insn, state);
1662
1663                 case spv::OpSelectionMerge:
1664                 case spv::OpLoopMerge:
1665                         return EmitResult::Continue;
1666
1667                 case spv::OpBranchConditional:
1668                         return EmitBranchConditional(insn, state);
1669
1670                 case spv::OpSwitch:
1671                         return EmitSwitch(insn, state);
1672
1673                 case spv::OpUnreachable:
1674                         return EmitUnreachable(insn, state);
1675
1676                 case spv::OpReturn:
1677                         return EmitReturn(insn, state);
1678
1679                 default:
1680                         UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1681                         break;
1682                 }
1683
1684                 return EmitResult::Continue;
1685         }
1686
1687         SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1688         {
1689                 auto routine = state->routine;
1690                 Object::ID resultId = insn.word(2);
1691                 auto &object = getObject(resultId);
1692                 auto &objectTy = getType(object.type);
1693                 switch (objectTy.storageClass)
1694                 {
1695                 case spv::StorageClassInput:
1696                 {
1697                         if (object.kind == Object::Kind::InterfaceVariable)
1698                         {
1699                                 auto &dst = routine->getValue(resultId);
1700                                 int offset = 0;
1701                                 VisitInterface(resultId,
1702                                                                 [&](Decorations const &d, AttribType type) {
1703                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1704                                                                         dst[offset++] = routine->inputs[scalarSlot];
1705                                                                 });
1706                         }
1707                         break;
1708                 }
1709                 case spv::StorageClassUniform:
1710                 case spv::StorageClassStorageBuffer:
1711                 {
1712                         Decorations d{};
1713                         ApplyDecorationsForId(&d, resultId);
1714                         ASSERT(d.DescriptorSet >= 0);
1715                         ASSERT(d.Binding >= 0);
1716
1717                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1718
1719                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1720                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1721                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1722                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1723                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1724                         Pointer<Byte> address = data + offset;
1725                         routine->physicalPointers[resultId] = address;
1726                         break;
1727                 }
1728                 case spv::StorageClassPushConstant:
1729                 {
1730                         routine->physicalPointers[resultId] = routine->pushConstants;
1731                         break;
1732                 }
1733                 default:
1734                         break;
1735                 }
1736
1737                 return EmitResult::Continue;
1738         }
1739
1740         SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1741         {
1742                 auto routine = state->routine;
1743                 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1744                 Object::ID resultId = insn.word(2);
1745                 Object::ID pointerId = insn.word(3);
1746                 auto &result = getObject(resultId);
1747                 auto &resultTy = getType(result.type);
1748                 auto &pointer = getObject(pointerId);
1749                 auto &pointerBase = getObject(pointer.pointerBase);
1750                 auto &pointerBaseTy = getType(pointerBase.type);
1751                 std::memory_order memoryOrder = std::memory_order_relaxed;
1752
1753                 if(atomic)
1754                 {
1755                         Object::ID semanticsId = insn.word(5);
1756                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1757                         memoryOrder = MemoryOrder(memorySemantics);
1758                 }
1759
1760                 ASSERT(getType(pointer.type).element == result.type);
1761                 ASSERT(Type::ID(insn.word(1)) == result.type);
1762                 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1763
1764                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1765                 {
1766                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1767                 }
1768
1769                 Pointer<Float> ptrBase;
1770                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1771                 {
1772                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1773                 }
1774                 else
1775                 {
1776                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1777                 }
1778
1779                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1780                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1781
1782                 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1783
1784                 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1785                 {
1786                         // Divergent offsets or masked lanes.
1787                         auto offsets = pointer.kind == Object::Kind::Value ?
1788                                         As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1789                                         RValue<SIMD::Int>(SIMD::Int(0));
1790                         for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1791                         {
1792                                 // i wish i had a Float,Float,Float,Float constructor here..
1793                                 for (int j = 0; j < SIMD::Width; j++)
1794                                 {
1795                                         If(Extract(state->activeLaneMask(), j) != 0)
1796                                         {
1797                                                 Int offset = Int(i) + Extract(offsets, j);
1798                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1799                                                 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1800                                         }
1801                                 }
1802                         }
1803                 }
1804                 Else
1805                 {
1806                         // No divergent offsets or masked lanes.
1807                         if (interleavedByLane)
1808                         {
1809                                 // Lane-interleaved data.
1810                                 Pointer<SIMD::Float> src = ptrBase;
1811                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1812                                 {
1813                                         load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1814                                 }
1815                         }
1816                         else
1817                         {
1818                                 // Non-interleaved data.
1819                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1820                                 {
1821                                         load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
1822                                 }
1823                         }
1824                 }
1825
1826                 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1827                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1828                 {
1829                         dst.move(i, load[i]);
1830                 }
1831
1832                 return EmitResult::Continue;
1833         }
1834
1835         SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1836         {
1837                 auto routine = state->routine;
1838                 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1839                 Object::ID pointerId = insn.word(1);
1840                 Object::ID objectId = insn.word(atomic ? 4 : 2);
1841                 auto &object = getObject(objectId);
1842                 auto &pointer = getObject(pointerId);
1843                 auto &pointerTy = getType(pointer.type);
1844                 auto &elementTy = getType(pointerTy.element);
1845                 auto &pointerBase = getObject(pointer.pointerBase);
1846                 auto &pointerBaseTy = getType(pointerBase.type);
1847                 std::memory_order memoryOrder = std::memory_order_relaxed;
1848
1849                 if(atomic)
1850                 {
1851                         Object::ID semanticsId = insn.word(3);
1852                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1853                         memoryOrder = MemoryOrder(memorySemantics);
1854                 }
1855
1856                 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1857
1858                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1859                 {
1860                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1861                 }
1862
1863                 Pointer<Float> ptrBase;
1864                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1865                 {
1866                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1867                 }
1868                 else
1869                 {
1870                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1871                 }
1872
1873                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1874                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1875
1876                 if (object.kind == Object::Kind::Constant)
1877                 {
1878                         // Constant source data.
1879                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1880                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1881                         {
1882                                 // Divergent offsets or masked lanes.
1883                                 auto offsets = pointer.kind == Object::Kind::Value ?
1884                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1885                                                 RValue<SIMD::Int>(SIMD::Int(0));
1886                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1887                                 {
1888                                         for (int j = 0; j < SIMD::Width; j++)
1889                                         {
1890                                                 If(Extract(state->activeLaneMask(), j) != 0)
1891                                                 {
1892                                                         Int offset = Int(i) + Extract(offsets, j);
1893                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1894                                                         Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1895                                                 }
1896                                         }
1897                                 }
1898                         }
1899                         Else
1900                         {
1901                                 // Constant source data.
1902                                 // No divergent offsets or masked lanes.
1903                                 Pointer<SIMD::Float> dst = ptrBase;
1904                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1905                                 {
1906                                         Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1907                                 }
1908                         }
1909                 }
1910                 else
1911                 {
1912                         // Intermediate source data.
1913                         auto &src = routine->getIntermediate(objectId);
1914                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1915                         {
1916                                 // Divergent offsets or masked lanes.
1917                                 auto offsets = pointer.kind == Object::Kind::Value ?
1918                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1919                                                 RValue<SIMD::Int>(SIMD::Int(0));
1920                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1921                                 {
1922                                         for (int j = 0; j < SIMD::Width; j++)
1923                                         {
1924                                                 If(Extract(state->activeLaneMask(), j) != 0)
1925                                                 {
1926                                                         Int offset = Int(i) + Extract(offsets, j);
1927                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1928                                                         Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1929                                                 }
1930                                         }
1931                                 }
1932                         }
1933                         Else
1934                         {
1935                                 // No divergent offsets or masked lanes.
1936                                 if (interleavedByLane)
1937                                 {
1938                                         // Lane-interleaved data.
1939                                         Pointer<SIMD::Float> dst = ptrBase;
1940                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1941                                         {
1942                                                 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1943                                         }
1944                                 }
1945                                 else
1946                                 {
1947                                         // Intermediate source data. Non-interleaved data.
1948                                         Pointer<SIMD::Float> dst = ptrBase;
1949                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1950                                         {
1951                                                 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1952                                         }
1953                                 }
1954                         }
1955                 }
1956
1957                 return EmitResult::Continue;
1958         }
1959
1960         SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1961         {
1962                 auto routine = state->routine;
1963                 Type::ID typeId = insn.word(1);
1964                 Object::ID resultId = insn.word(2);
1965                 Object::ID baseId = insn.word(3);
1966                 uint32_t numIndexes = insn.wordCount() - 4;
1967                 const uint32_t *indexes = insn.wordPointer(4);
1968                 auto &type = getType(typeId);
1969                 ASSERT(type.sizeInComponents == 1);
1970                 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1971
1972                 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1973
1974                 if(type.storageClass == spv::StorageClassPushConstant ||
1975                    type.storageClass == spv::StorageClassUniform ||
1976                    type.storageClass == spv::StorageClassStorageBuffer)
1977                 {
1978                         dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1979                 }
1980                 else
1981                 {
1982                         dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1983                 }
1984
1985                 return EmitResult::Continue;
1986         }
1987
1988         SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1989         {
1990                 auto routine = state->routine;
1991                 auto &type = getType(insn.word(1));
1992                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1993                 auto offset = 0u;
1994
1995                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1996                 {
1997                         Object::ID srcObjectId = insn.word(3u + i);
1998                         auto & srcObject = getObject(srcObjectId);
1999                         auto & srcObjectTy = getType(srcObject.type);
2000                         GenericValue srcObjectAccess(this, routine, srcObjectId);
2001
2002                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
2003                         {
2004                                 dst.move(offset++, srcObjectAccess.Float(j));
2005                         }
2006                 }
2007
2008                 return EmitResult::Continue;
2009         }
2010
2011         SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
2012         {
2013                 auto routine = state->routine;
2014                 Type::ID resultTypeId = insn.word(1);
2015                 auto &type = getType(resultTypeId);
2016                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2017                 auto &newPartObject = getObject(insn.word(3));
2018                 auto &newPartObjectTy = getType(newPartObject.type);
2019                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
2020
2021                 GenericValue srcObjectAccess(this, routine, insn.word(4));
2022                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
2023
2024                 // old components before
2025                 for (auto i = 0u; i < firstNewComponent; i++)
2026                 {
2027                         dst.move(i, srcObjectAccess.Float(i));
2028                 }
2029                 // new part
2030                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
2031                 {
2032                         dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
2033                 }
2034                 // old components after
2035                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
2036                 {
2037                         dst.move(i, srcObjectAccess.Float(i));
2038                 }
2039
2040                 return EmitResult::Continue;
2041         }
2042
2043         SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
2044         {
2045                 auto routine = state->routine;
2046                 auto &type = getType(insn.word(1));
2047                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2048                 auto &compositeObject = getObject(insn.word(3));
2049                 Type::ID compositeTypeId = compositeObject.definition.word(1);
2050                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
2051
2052                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
2053                 for (auto i = 0u; i < type.sizeInComponents; i++)
2054                 {
2055                         dst.move(i, compositeObjectAccess.Float(firstComponent + i));
2056                 }
2057
2058                 return EmitResult::Continue;
2059         }
2060
2061         SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
2062         {
2063                 auto routine = state->routine;
2064                 auto &type = getType(insn.word(1));
2065                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2066
2067                 // Note: number of components in result type, first half type, and second
2068                 // half type are all independent.
2069                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
2070
2071                 GenericValue firstHalfAccess(this, routine, insn.word(3));
2072                 GenericValue secondHalfAccess(this, routine, insn.word(4));
2073
2074                 for (auto i = 0u; i < type.sizeInComponents; i++)
2075                 {
2076                         auto selector = insn.word(5 + i);
2077                         if (selector == static_cast<uint32_t>(-1))
2078                         {
2079                                 // Undefined value. Until we decide to do real undef values, zero is as good
2080                                 // a value as any
2081                                 dst.move(i, RValue<SIMD::Float>(0.0f));
2082                         }
2083                         else if (selector < firstHalfType.sizeInComponents)
2084                         {
2085                                 dst.move(i, firstHalfAccess.Float(selector));
2086                         }
2087                         else
2088                         {
2089                                 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
2090                         }
2091                 }
2092
2093                 return EmitResult::Continue;
2094         }
2095
2096         SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2097         {
2098                 auto routine = state->routine;
2099                 auto &type = getType(insn.word(1));
2100                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2101                 auto &srcType = getType(getObject(insn.word(3)).type);
2102
2103                 GenericValue src(this, routine, insn.word(3));
2104                 GenericValue index(this, routine, insn.word(4));
2105
2106                 SIMD::UInt v = SIMD::UInt(0);
2107
2108                 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2109                 {
2110                         v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2111                 }
2112
2113                 dst.move(0, v);
2114                 return EmitResult::Continue;
2115         }
2116
2117         SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2118         {
2119                 auto routine = state->routine;
2120                 auto &type = getType(insn.word(1));
2121                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2122
2123                 GenericValue src(this, routine, insn.word(3));
2124                 GenericValue component(this, routine, insn.word(4));
2125                 GenericValue index(this, routine, insn.word(5));
2126
2127                 for (auto i = 0u; i < type.sizeInComponents; i++)
2128                 {
2129                         SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2130                         dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2131                 }
2132                 return EmitResult::Continue;
2133         }
2134
2135         SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
2136         {
2137                 auto routine = state->routine;
2138                 auto &type = getType(insn.word(1));
2139                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2140                 auto lhs = GenericValue(this, routine, insn.word(3));
2141                 auto rhs = GenericValue(this, routine, insn.word(4));
2142
2143                 for (auto i = 0u; i < type.sizeInComponents; i++)
2144                 {
2145                         dst.move(i, lhs.Float(i) * rhs.Float(0));
2146                 }
2147
2148                 return EmitResult::Continue;
2149         }
2150
2151         SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
2152         {
2153                 auto routine = state->routine;
2154                 auto &type = getType(insn.word(1));
2155                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2156                 auto lhs = GenericValue(this, routine, insn.word(3));
2157                 auto rhs = GenericValue(this, routine, insn.word(4));
2158                 auto rhsType = getType(getObject(insn.word(4)).type);
2159
2160                 for (auto i = 0u; i < type.sizeInComponents; i++)
2161                 {
2162                         SIMD::Float v = lhs.Float(i) * rhs.Float(0);
2163                         for (auto j = 1u; j < rhsType.sizeInComponents; j++)
2164                         {
2165                                 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
2166                         }
2167                         dst.move(i, v);
2168                 }
2169
2170                 return EmitResult::Continue;
2171         }
2172
2173         SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
2174         {
2175                 auto routine = state->routine;
2176                 auto &type = getType(insn.word(1));
2177                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2178                 auto lhs = GenericValue(this, routine, insn.word(3));
2179                 auto rhs = GenericValue(this, routine, insn.word(4));
2180                 auto lhsType = getType(getObject(insn.word(3)).type);
2181
2182                 for (auto i = 0u; i < type.sizeInComponents; i++)
2183                 {
2184                         SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
2185                         for (auto j = 1u; j < lhsType.sizeInComponents; j++)
2186                         {
2187                                 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
2188                         }
2189                         dst.move(i, v);
2190                 }
2191
2192                 return EmitResult::Continue;
2193         }
2194
2195         SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
2196         {
2197                 auto routine = state->routine;
2198                 auto &type = getType(insn.word(1));
2199                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2200                 auto lhs = GenericValue(this, routine, insn.word(3));
2201                 auto rhs = GenericValue(this, routine, insn.word(4));
2202
2203                 auto numColumns = type.definition.word(3);
2204                 auto numRows = getType(type.definition.word(2)).definition.word(3);
2205                 auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
2206
2207                 for (auto row = 0u; row < numRows; row++)
2208                 {
2209                         for (auto col = 0u; col < numColumns; col++)
2210                         {
2211                                 SIMD::Float v = SIMD::Float(0);
2212                                 for (auto i = 0u; i < numAdds; i++)
2213                                 {
2214                                         v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
2215                                 }
2216                                 dst.move(numRows * col + row, v);
2217                         }
2218                 }
2219
2220                 return EmitResult::Continue;
2221         }
2222
2223         SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
2224         {
2225                 auto routine = state->routine;
2226                 auto &type = getType(insn.word(1));
2227                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2228                 auto src = GenericValue(this, routine, insn.word(3));
2229
2230                 for (auto i = 0u; i < type.sizeInComponents; i++)
2231                 {
2232                         switch (insn.opcode())
2233                         {
2234                         case spv::OpNot:
2235                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
2236                                 dst.move(i, ~src.UInt(i));
2237                                 break;
2238                         case spv::OpSNegate:
2239                                 dst.move(i, -src.Int(i));
2240                                 break;
2241                         case spv::OpFNegate:
2242                                 dst.move(i, -src.Float(i));
2243                                 break;
2244                         case spv::OpConvertFToU:
2245                                 dst.move(i, SIMD::UInt(src.Float(i)));
2246                                 break;
2247                         case spv::OpConvertFToS:
2248                                 dst.move(i, SIMD::Int(src.Float(i)));
2249                                 break;
2250                         case spv::OpConvertSToF:
2251                                 dst.move(i, SIMD::Float(src.Int(i)));
2252                                 break;
2253                         case spv::OpConvertUToF:
2254                                 dst.move(i, SIMD::Float(src.UInt(i)));
2255                                 break;
2256                         case spv::OpBitcast:
2257                                 dst.move(i, src.Float(i));
2258                                 break;
2259                         case spv::OpIsInf:
2260                                 dst.move(i, IsInf(src.Float(i)));
2261                                 break;
2262                         case spv::OpIsNan:
2263                                 dst.move(i, IsNan(src.Float(i)));
2264                                 break;
2265                         case spv::OpDPdx:
2266                         case spv::OpDPdxCoarse:
2267                                 // Derivative instructions: FS invocations are laid out like so:
2268                                 //    0 1
2269                                 //    2 3
2270                                 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
2271                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
2272                                 break;
2273                         case spv::OpDPdy:
2274                         case spv::OpDPdyCoarse:
2275                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
2276                                 break;
2277                         case spv::OpFwidth:
2278                         case spv::OpFwidthCoarse:
2279                                 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
2280                                                         + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
2281                                 break;
2282                         case spv::OpDPdxFine:
2283                         {
2284                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2285                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2286                                 SIMD::Float v = SIMD::Float(firstRow);
2287                                 v = Insert(v, secondRow, 2);
2288                                 v = Insert(v, secondRow, 3);
2289                                 dst.move(i, v);
2290                                 break;
2291                         }
2292                         case spv::OpDPdyFine:
2293                         {
2294                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2295                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2296                                 SIMD::Float v = SIMD::Float(firstColumn);
2297                                 v = Insert(v, secondColumn, 1);
2298                                 v = Insert(v, secondColumn, 3);
2299                                 dst.move(i, v);
2300                                 break;
2301                         }
2302                         case spv::OpFwidthFine:
2303                         {
2304                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2305                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2306                                 SIMD::Float dpdx = SIMD::Float(firstRow);
2307                                 dpdx = Insert(dpdx, secondRow, 2);
2308                                 dpdx = Insert(dpdx, secondRow, 3);
2309                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2310                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2311                                 SIMD::Float dpdy = SIMD::Float(firstColumn);
2312                                 dpdy = Insert(dpdy, secondColumn, 1);
2313                                 dpdy = Insert(dpdy, secondColumn, 3);
2314                                 dst.move(i, Abs(dpdx) + Abs(dpdy));
2315                                 break;
2316                         }
2317                         default:
2318                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
2319                         }
2320                 }
2321
2322                 return EmitResult::Continue;
2323         }
2324
2325         SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
2326         {
2327                 auto routine = state->routine;
2328                 auto &type = getType(insn.word(1));
2329                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2330                 auto &lhsType = getType(getObject(insn.word(3)).type);
2331                 auto lhs = GenericValue(this, routine, insn.word(3));
2332                 auto rhs = GenericValue(this, routine, insn.word(4));
2333
2334                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2335                 {
2336                         switch (insn.opcode())
2337                         {
2338                         case spv::OpIAdd:
2339                                 dst.move(i, lhs.Int(i) + rhs.Int(i));
2340                                 break;
2341                         case spv::OpISub:
2342                                 dst.move(i, lhs.Int(i) - rhs.Int(i));
2343                                 break;
2344                         case spv::OpIMul:
2345                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2346                                 break;
2347                         case spv::OpSDiv:
2348                         {
2349                                 SIMD::Int a = lhs.Int(i);
2350                                 SIMD::Int b = rhs.Int(i);
2351                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2352                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2353                                 dst.move(i, a / b);
2354                                 break;
2355                         }
2356                         case spv::OpUDiv:
2357                         {
2358                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2359                                 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2360                                 break;
2361                         }
2362                         case spv::OpSRem:
2363                         {
2364                                 SIMD::Int a = lhs.Int(i);
2365                                 SIMD::Int b = rhs.Int(i);
2366                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2367                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2368                                 dst.move(i, a % b);
2369                                 break;
2370                         }
2371                         case spv::OpSMod:
2372                         {
2373                                 SIMD::Int a = lhs.Int(i);
2374                                 SIMD::Int b = rhs.Int(i);
2375                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2376                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2377                                 auto mod = a % b;
2378                                 // If a and b have opposite signs, the remainder operation takes
2379                                 // the sign from a but OpSMod is supposed to take the sign of b.
2380                                 // Adding b will ensure that the result has the correct sign and
2381                                 // that it is still congruent to a modulo b.
2382                                 //
2383                                 // See also http://mathforum.org/library/drmath/view/52343.html
2384                                 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2385                                 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2386                                 dst.move(i, As<SIMD::Float>(fixedMod));
2387                                 break;
2388                         }
2389                         case spv::OpUMod:
2390                         {
2391                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2392                                 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2393                                 break;
2394                         }
2395                         case spv::OpIEqual:
2396                         case spv::OpLogicalEqual:
2397                                 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2398                                 break;
2399                         case spv::OpINotEqual:
2400                         case spv::OpLogicalNotEqual:
2401                                 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2402                                 break;
2403                         case spv::OpUGreaterThan:
2404                                 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2405                                 break;
2406                         case spv::OpSGreaterThan:
2407                                 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2408                                 break;
2409                         case spv::OpUGreaterThanEqual:
2410                                 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2411                                 break;
2412                         case spv::OpSGreaterThanEqual:
2413                                 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2414                                 break;
2415                         case spv::OpULessThan:
2416                                 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2417                                 break;
2418                         case spv::OpSLessThan:
2419                                 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2420                                 break;
2421                         case spv::OpULessThanEqual:
2422                                 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2423                                 break;
2424                         case spv::OpSLessThanEqual:
2425                                 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2426                                 break;
2427                         case spv::OpFAdd:
2428                                 dst.move(i, lhs.Float(i) + rhs.Float(i));
2429                                 break;
2430                         case spv::OpFSub:
2431                                 dst.move(i, lhs.Float(i) - rhs.Float(i));
2432                                 break;
2433                         case spv::OpFMul:
2434                                 dst.move(i, lhs.Float(i) * rhs.Float(i));
2435                                 break;
2436                         case spv::OpFDiv:
2437                                 dst.move(i, lhs.Float(i) / rhs.Float(i));
2438                                 break;
2439                         case spv::OpFMod:
2440                                 // TODO(b/126873455): inaccurate for values greater than 2^24
2441                                 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2442                                 break;
2443                         case spv::OpFRem:
2444                                 dst.move(i, lhs.Float(i) % rhs.Float(i));
2445                                 break;
2446                         case spv::OpFOrdEqual:
2447                                 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2448                                 break;
2449                         case spv::OpFUnordEqual:
2450                                 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2451                                 break;
2452                         case spv::OpFOrdNotEqual:
2453                                 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2454                                 break;
2455                         case spv::OpFUnordNotEqual:
2456                                 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2457                                 break;
2458                         case spv::OpFOrdLessThan:
2459                                 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2460                                 break;
2461                         case spv::OpFUnordLessThan:
2462                                 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2463                                 break;
2464                         case spv::OpFOrdGreaterThan:
2465                                 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2466                                 break;
2467                         case spv::OpFUnordGreaterThan:
2468                                 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2469                                 break;
2470                         case spv::OpFOrdLessThanEqual:
2471                                 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2472                                 break;
2473                         case spv::OpFUnordLessThanEqual:
2474                                 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2475                                 break;
2476                         case spv::OpFOrdGreaterThanEqual:
2477                                 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2478                                 break;
2479                         case spv::OpFUnordGreaterThanEqual:
2480                                 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2481                                 break;
2482                         case spv::OpShiftRightLogical:
2483                                 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2484                                 break;
2485                         case spv::OpShiftRightArithmetic:
2486                                 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2487                                 break;
2488                         case spv::OpShiftLeftLogical:
2489                                 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2490                                 break;
2491                         case spv::OpBitwiseOr:
2492                         case spv::OpLogicalOr:
2493                                 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2494                                 break;
2495                         case spv::OpBitwiseXor:
2496                                 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2497                                 break;
2498                         case spv::OpBitwiseAnd:
2499                         case spv::OpLogicalAnd:
2500                                 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2501                                 break;
2502                         case spv::OpSMulExtended:
2503                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2504                                 // In our flat view then, component i is the i'th component of the first member;
2505                                 // component i + N is the i'th component of the second member.
2506                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2507                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2508                                 break;
2509                         case spv::OpUMulExtended:
2510                                 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2511                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2512                                 break;
2513                         default:
2514                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2515                         }
2516                 }
2517
2518                 return EmitResult::Continue;
2519         }
2520
2521         SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2522         {
2523                 auto routine = state->routine;
2524                 auto &type = getType(insn.word(1));
2525                 ASSERT(type.sizeInComponents == 1);
2526                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2527                 auto &lhsType = getType(getObject(insn.word(3)).type);
2528                 auto lhs = GenericValue(this, routine, insn.word(3));
2529                 auto rhs = GenericValue(this, routine, insn.word(4));
2530
2531                 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2532                 return EmitResult::Continue;
2533         }
2534
2535         SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2536         {
2537                 auto routine = state->routine;
2538                 auto &type = getType(insn.word(1));
2539                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2540                 auto cond = GenericValue(this, routine, insn.word(3));
2541                 auto lhs = GenericValue(this, routine, insn.word(4));
2542                 auto rhs = GenericValue(this, routine, insn.word(5));
2543
2544                 for (auto i = 0u; i < type.sizeInComponents; i++)
2545                 {
2546                         dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i)));   // FIXME: IfThenElse()
2547                 }
2548
2549                 return EmitResult::Continue;
2550         }
2551
2552         SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2553         {
2554                 auto routine = state->routine;
2555                 auto &type = getType(insn.word(1));
2556                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2557                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2558
2559                 switch (extInstIndex)
2560                 {
2561                 case GLSLstd450FAbs:
2562                 {
2563                         auto src = GenericValue(this, routine, insn.word(5));
2564                         for (auto i = 0u; i < type.sizeInComponents; i++)
2565                         {
2566                                 dst.move(i, Abs(src.Float(i)));
2567                         }
2568                         break;
2569                 }
2570                 case GLSLstd450SAbs:
2571                 {
2572                         auto src = GenericValue(this, routine, insn.word(5));
2573                         for (auto i = 0u; i < type.sizeInComponents; i++)
2574                         {
2575                                 dst.move(i, Abs(src.Int(i)));
2576                         }
2577                         break;
2578                 }
2579                 case GLSLstd450Cross:
2580                 {
2581                         auto lhs = GenericValue(this, routine, insn.word(5));
2582                         auto rhs = GenericValue(this, routine, insn.word(6));
2583                         dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2584                         dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2585                         dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2586                         break;
2587                 }
2588                 case GLSLstd450Floor:
2589                 {
2590                         auto src = GenericValue(this, routine, insn.word(5));
2591                         for (auto i = 0u; i < type.sizeInComponents; i++)
2592                         {
2593                                 dst.move(i, Floor(src.Float(i)));
2594                         }
2595                         break;
2596                 }
2597                 case GLSLstd450Trunc:
2598                 {
2599                         auto src = GenericValue(this, routine, insn.word(5));
2600                         for (auto i = 0u; i < type.sizeInComponents; i++)
2601                         {
2602                                 dst.move(i, Trunc(src.Float(i)));
2603                         }
2604                         break;
2605                 }
2606                 case GLSLstd450Ceil:
2607                 {
2608                         auto src = GenericValue(this, routine, insn.word(5));
2609                         for (auto i = 0u; i < type.sizeInComponents; i++)
2610                         {
2611                                 dst.move(i, Ceil(src.Float(i)));
2612                         }
2613                         break;
2614                 }
2615                 case GLSLstd450Fract:
2616                 {
2617                         auto src = GenericValue(this, routine, insn.word(5));
2618                         for (auto i = 0u; i < type.sizeInComponents; i++)
2619                         {
2620                                 dst.move(i, Frac(src.Float(i)));
2621                         }
2622                         break;
2623                 }
2624                 case GLSLstd450Round:
2625                 {
2626                         auto src = GenericValue(this, routine, insn.word(5));
2627                         for (auto i = 0u; i < type.sizeInComponents; i++)
2628                         {
2629                                 dst.move(i, Round(src.Float(i)));
2630                         }
2631                         break;
2632                 }
2633                 case GLSLstd450RoundEven:
2634                 {
2635                         auto src = GenericValue(this, routine, insn.word(5));
2636                         for (auto i = 0u; i < type.sizeInComponents; i++)
2637                         {
2638                                 auto x = Round(src.Float(i));
2639                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2640                                 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2641                                                 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2642                         }
2643                         break;
2644                 }
2645                 case GLSLstd450FMin:
2646                 {
2647                         auto lhs = GenericValue(this, routine, insn.word(5));
2648                         auto rhs = GenericValue(this, routine, insn.word(6));
2649                         for (auto i = 0u; i < type.sizeInComponents; i++)
2650                         {
2651                                 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2652                         }
2653                         break;
2654                 }
2655                 case GLSLstd450FMax:
2656                 {
2657                         auto lhs = GenericValue(this, routine, insn.word(5));
2658                         auto rhs = GenericValue(this, routine, insn.word(6));
2659                         for (auto i = 0u; i < type.sizeInComponents; i++)
2660                         {
2661                                 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2662                         }
2663                         break;
2664                 }
2665                 case GLSLstd450SMin:
2666                 {
2667                         auto lhs = GenericValue(this, routine, insn.word(5));
2668                         auto rhs = GenericValue(this, routine, insn.word(6));
2669                         for (auto i = 0u; i < type.sizeInComponents; i++)
2670                         {
2671                                 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2672                         }
2673                         break;
2674                 }
2675                 case GLSLstd450SMax:
2676                 {
2677                         auto lhs = GenericValue(this, routine, insn.word(5));
2678                         auto rhs = GenericValue(this, routine, insn.word(6));
2679                         for (auto i = 0u; i < type.sizeInComponents; i++)
2680                         {
2681                                 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2682                         }
2683                         break;
2684                 }
2685                 case GLSLstd450UMin:
2686                 {
2687                         auto lhs = GenericValue(this, routine, insn.word(5));
2688                         auto rhs = GenericValue(this, routine, insn.word(6));
2689                         for (auto i = 0u; i < type.sizeInComponents; i++)
2690                         {
2691                                 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2692                         }
2693                         break;
2694                 }
2695                 case GLSLstd450UMax:
2696                 {
2697                         auto lhs = GenericValue(this, routine, insn.word(5));
2698                         auto rhs = GenericValue(this, routine, insn.word(6));
2699                         for (auto i = 0u; i < type.sizeInComponents; i++)
2700                         {
2701                                 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2702                         }
2703                         break;
2704                 }
2705                 case GLSLstd450Step:
2706                 {
2707                         auto edge = GenericValue(this, routine, insn.word(5));
2708                         auto x = GenericValue(this, routine, insn.word(6));
2709                         for (auto i = 0u; i < type.sizeInComponents; i++)
2710                         {
2711                                 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2712                         }
2713                         break;
2714                 }
2715                 case GLSLstd450SmoothStep:
2716                 {
2717                         auto edge0 = GenericValue(this, routine, insn.word(5));
2718                         auto edge1 = GenericValue(this, routine, insn.word(6));
2719                         auto x = GenericValue(this, routine, insn.word(7));
2720                         for (auto i = 0u; i < type.sizeInComponents; i++)
2721                         {
2722                                 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2723                                                 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2724                                 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2725                         }
2726                         break;
2727                 }
2728                 case GLSLstd450FMix:
2729                 {
2730                         auto x = GenericValue(this, routine, insn.word(5));
2731                         auto y = GenericValue(this, routine, insn.word(6));
2732                         auto a = GenericValue(this, routine, insn.word(7));
2733                         for (auto i = 0u; i < type.sizeInComponents; i++)
2734                         {
2735                                 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2736                         }
2737                         break;
2738                 }
2739                 case GLSLstd450FClamp:
2740                 {
2741                         auto x = GenericValue(this, routine, insn.word(5));
2742                         auto minVal = GenericValue(this, routine, insn.word(6));
2743                         auto maxVal = GenericValue(this, routine, insn.word(7));
2744                         for (auto i = 0u; i < type.sizeInComponents; i++)
2745                         {
2746                                 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2747                         }
2748                         break;
2749                 }
2750                 case GLSLstd450SClamp:
2751                 {
2752                         auto x = GenericValue(this, routine, insn.word(5));
2753                         auto minVal = GenericValue(this, routine, insn.word(6));
2754                         auto maxVal = GenericValue(this, routine, insn.word(7));
2755                         for (auto i = 0u; i < type.sizeInComponents; i++)
2756                         {
2757                                 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2758                         }
2759                         break;
2760                 }
2761                 case GLSLstd450UClamp:
2762                 {
2763                         auto x = GenericValue(this, routine, insn.word(5));
2764                         auto minVal = GenericValue(this, routine, insn.word(6));
2765                         auto maxVal = GenericValue(this, routine, insn.word(7));
2766                         for (auto i = 0u; i < type.sizeInComponents; i++)
2767                         {
2768                                 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2769                         }
2770                         break;
2771                 }
2772                 case GLSLstd450FSign:
2773                 {
2774                         auto src = GenericValue(this, routine, insn.word(5));
2775                         for (auto i = 0u; i < type.sizeInComponents; i++)
2776                         {
2777                                 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2778                                 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2779                                 dst.move(i, neg | pos);
2780                         }
2781                         break;
2782                 }
2783                 case GLSLstd450SSign:
2784                 {
2785                         auto src = GenericValue(this, routine, insn.word(5));
2786                         for (auto i = 0u; i < type.sizeInComponents; i++)
2787                         {
2788                                 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2789                                 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2790                                 dst.move(i, neg | pos);
2791                         }
2792                         break;
2793                 }
2794                 case GLSLstd450Reflect:
2795                 {
2796                         auto I = GenericValue(this, routine, insn.word(5));
2797                         auto N = GenericValue(this, routine, insn.word(6));
2798
2799                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2800
2801                         for (auto i = 0u; i < type.sizeInComponents; i++)
2802                         {
2803                                 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2804                         }
2805                         break;
2806                 }
2807                 case GLSLstd450Refract:
2808                 {
2809                         auto I = GenericValue(this, routine, insn.word(5));
2810                         auto N = GenericValue(this, routine, insn.word(6));
2811                         auto eta = GenericValue(this, routine, insn.word(7));
2812
2813                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2814                         SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2815                         SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2816                         SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2817
2818                         for (auto i = 0u; i < type.sizeInComponents; i++)
2819                         {
2820                                 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2821                         }
2822                         break;
2823                 }
2824                 case GLSLstd450FaceForward:
2825                 {
2826                         auto N = GenericValue(this, routine, insn.word(5));
2827                         auto I = GenericValue(this, routine, insn.word(6));
2828                         auto Nref = GenericValue(this, routine, insn.word(7));
2829
2830                         SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2831                         SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2832
2833                         for (auto i = 0u; i < type.sizeInComponents; i++)
2834                         {
2835                                 auto n = N.Float(i);
2836                                 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2837                         }
2838                         break;
2839                 }
2840                 case GLSLstd450Length:
2841                 {
2842                         auto x = GenericValue(this, routine, insn.word(5));
2843                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2844
2845                         dst.move(0, Sqrt(d));
2846                         break;
2847                 }
2848                 case GLSLstd450Normalize:
2849                 {
2850                         auto x = GenericValue(this, routine, insn.word(5));
2851                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2852                         SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2853
2854                         for (auto i = 0u; i < type.sizeInComponents; i++)
2855                         {
2856                                 dst.move(i, invLength * x.Float(i));
2857                         }
2858                         break;
2859                 }
2860                 case GLSLstd450Distance:
2861                 {
2862                         auto p0 = GenericValue(this, routine, insn.word(5));
2863                         auto p1 = GenericValue(this, routine, insn.word(6));
2864                         auto p0Type = getType(getObject(insn.word(5)).type);
2865
2866                         // sqrt(dot(p0-p1, p0-p1))
2867                         SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2868
2869                         for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2870                         {
2871                                 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2872                         }
2873
2874                         dst.move(0, Sqrt(d));
2875                         break;
2876                 }
2877                 default:
2878                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2879                 }
2880
2881                 return EmitResult::Continue;
2882         }
2883
2884         std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2885         {
2886                 switch(memorySemantics)
2887                 {
2888                 case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
2889                 case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
2890                 case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
2891                 case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
2892                 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2893                 default:
2894                         UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2895                         return std::memory_order_acq_rel;
2896                 }
2897         }
2898
2899         SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2900         {
2901                 SIMD::Float d = x.Float(0) * y.Float(0);
2902
2903                 for (auto i = 1u; i < numComponents; i++)
2904                 {
2905                         d += x.Float(i) * y.Float(i);
2906                 }
2907
2908                 return d;
2909         }
2910
2911         SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2912         {
2913                 auto routine = state->routine;
2914                 auto &type = getType(insn.word(1));
2915                 ASSERT(type.sizeInComponents == 1);
2916                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2917                 auto &srcType = getType(getObject(insn.word(3)).type);
2918                 auto src = GenericValue(this, routine, insn.word(3));
2919
2920                 SIMD::UInt result = src.UInt(0);
2921
2922                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2923                 {
2924                         result |= src.UInt(i);
2925                 }
2926
2927                 dst.move(0, result);
2928                 return EmitResult::Continue;
2929         }
2930
2931         SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2932         {
2933                 auto routine = state->routine;
2934                 auto &type = getType(insn.word(1));
2935                 ASSERT(type.sizeInComponents == 1);
2936                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2937                 auto &srcType = getType(getObject(insn.word(3)).type);
2938                 auto src = GenericValue(this, routine, insn.word(3));
2939
2940                 SIMD::UInt result = src.UInt(0);
2941
2942                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2943                 {
2944                         result &= src.UInt(i);
2945                 }
2946
2947                 dst.move(0, result);
2948                 return EmitResult::Continue;
2949         }
2950
2951         SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2952         {
2953                 auto target = Block::ID(insn.word(1));
2954                 auto edge = Block::Edge{state->currentBlock, target};
2955                 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2956                 return EmitResult::Terminator;
2957         }
2958
2959         SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2960         {
2961                 auto block = getBlock(state->currentBlock);
2962                 ASSERT(block.branchInstruction == insn);
2963
2964                 auto condId = Object::ID(block.branchInstruction.word(1));
2965                 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2966                 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2967
2968                 auto cond = GenericValue(this, state->routine, condId);
2969                 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2970
2971                 // TODO: Optimize for case where all lanes take same path.
2972
2973                 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2974                 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2975
2976                 return EmitResult::Terminator;
2977         }
2978
2979         SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2980         {
2981                 auto block = getBlock(state->currentBlock);
2982                 ASSERT(block.branchInstruction == insn);
2983
2984                 auto selId = Object::ID(block.branchInstruction.word(1));
2985
2986                 auto sel = GenericValue(this, state->routine, selId);
2987                 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2988
2989                 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2990
2991                 // TODO: Optimize for case where all lanes take same path.
2992
2993                 SIMD::Int defaultLaneMask = state->activeLaneMask();
2994
2995                 // Gather up the case label matches and calculate defaultLaneMask.
2996                 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2997                 caseLabelMatches.reserve(numCases);
2998                 for (uint32_t i = 0; i < numCases; i++)
2999                 {
3000                         auto label = block.branchInstruction.word(i * 2 + 3);
3001                         auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
3002                         auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
3003                         state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
3004                         defaultLaneMask &= ~caseLabelMatch;
3005                 }
3006
3007                 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
3008                 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
3009
3010                 return EmitResult::Terminator;
3011         }
3012
3013         SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
3014         {
3015                 // TODO: Log something in this case?
3016                 state->setActiveLaneMask(SIMD::Int(0));
3017                 return EmitResult::Terminator;
3018         }
3019
3020         SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
3021         {
3022                 state->setActiveLaneMask(SIMD::Int(0));
3023                 return EmitResult::Terminator;
3024         }
3025
3026         SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
3027         {
3028                 auto routine = state->routine;
3029                 auto typeId = Type::ID(insn.word(1));
3030                 auto type = getType(typeId);
3031                 auto objectId = Object::ID(insn.word(2));
3032
3033                 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
3034
3035                 bool first = true;
3036                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
3037                 {
3038                         auto varId = Object::ID(insn.word(w + 0));
3039                         auto blockId = Block::ID(insn.word(w + 1));
3040
3041                         auto in = GenericValue(this, routine, varId);
3042                         auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
3043
3044                         for (uint32_t i = 0; i < type.sizeInComponents; i++)
3045                         {
3046                                 auto inMasked = in.Int(i) & mask;
3047                                 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
3048                         }
3049                         first = false;
3050                 }
3051
3052                 return EmitResult::Continue;
3053         }
3054
3055         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
3056         {
3057                 for (auto insn : *this)
3058                 {
3059                         switch (insn.opcode())
3060                         {
3061                         case spv::OpVariable:
3062                         {
3063                                 Object::ID resultId = insn.word(2);
3064                                 auto &object = getObject(resultId);
3065                                 auto &objectTy = getType(object.type);
3066                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
3067                                 {
3068                                         auto &dst = routine->getValue(resultId);
3069                                         int offset = 0;
3070                                         VisitInterface(resultId,
3071                                                                    [&](Decorations const &d, AttribType type) {
3072                                                                            auto scalarSlot = d.Location << 2 | d.Component;
3073                                                                            routine->outputs[scalarSlot] = dst[offset++];
3074                                                                    });
3075                                 }
3076                                 break;
3077                         }
3078                         default:
3079                                 break;
3080                         }
3081                 }
3082         }
3083
3084         SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
3085         {
3086                 // Default to a Simple, this may change later.
3087                 kind = Block::Simple;
3088
3089                 // Walk the instructions to find the last two of the block.
3090                 InsnIterator insns[2];
3091                 for (auto insn : *this)
3092                 {
3093                         insns[0] = insns[1];
3094                         insns[1] = insn;
3095                 }
3096
3097                 switch (insns[1].opcode())
3098                 {
3099                         case spv::OpBranch:
3100                                 branchInstruction = insns[1];
3101                                 outs.emplace(Block::ID(branchInstruction.word(1)));
3102
3103                                 switch (insns[0].opcode())
3104                                 {
3105                                         case spv::OpLoopMerge:
3106                                                 kind = Loop;
3107                                                 mergeInstruction = insns[0];
3108                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3109                                                 continueTarget = Block::ID(mergeInstruction.word(2));
3110                                                 break;
3111
3112                                         default:
3113                                                 kind = Block::Simple;
3114                                                 break;
3115                                 }
3116                                 break;
3117
3118                         case spv::OpBranchConditional:
3119                                 branchInstruction = insns[1];
3120                                 outs.emplace(Block::ID(branchInstruction.word(2)));
3121                                 outs.emplace(Block::ID(branchInstruction.word(3)));
3122
3123                                 switch (insns[0].opcode())
3124                                 {
3125                                         case spv::OpSelectionMerge:
3126                                                 kind = StructuredBranchConditional;
3127                                                 mergeInstruction = insns[0];
3128                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3129                                                 break;
3130
3131                                         case spv::OpLoopMerge:
3132                                                 kind = Loop;
3133                                                 mergeInstruction = insns[0];
3134                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3135                                                 continueTarget = Block::ID(mergeInstruction.word(2));
3136                                                 break;
3137
3138                                         default:
3139                                                 kind = UnstructuredBranchConditional;
3140                                                 break;
3141                                 }
3142                                 break;
3143
3144                         case spv::OpSwitch:
3145                                 branchInstruction = insns[1];
3146                                 outs.emplace(Block::ID(branchInstruction.word(2)));
3147                                 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
3148                                 {
3149                                         outs.emplace(Block::ID(branchInstruction.word(w)));
3150                                 }
3151
3152                                 switch (insns[0].opcode())
3153                                 {
3154                                         case spv::OpSelectionMerge:
3155                                                 kind = StructuredSwitch;
3156                                                 mergeInstruction = insns[0];
3157                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
3158                                                 break;
3159
3160                                         default:
3161                                                 kind = UnstructuredSwitch;
3162                                                 break;
3163                                 }
3164                                 break;
3165
3166                         default:
3167                                 break;
3168                 }
3169         }
3170
3171         bool SpirvShader::existsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
3172         {
3173                 // TODO: Optimize: This can be cached on the block.
3174                 Block::Set seen;
3175                 seen.emplace(notPassingThrough);
3176
3177                 std::queue<Block::ID> pending;
3178                 pending.emplace(from);
3179
3180                 while (pending.size() > 0)
3181                 {
3182                         auto id = pending.front();
3183                         pending.pop();
3184                         for (auto out : getBlock(id).outs)
3185                         {
3186                                 if (seen.count(out) != 0) { continue; }
3187                                 if (out == to) { return true; }
3188                                 pending.emplace(out);
3189                         }
3190                         seen.emplace(id);
3191                 }
3192
3193                 return false;
3194         }
3195
3196         void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
3197         {
3198                 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
3199         }
3200
3201         void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
3202         {
3203                 auto edge = Block::Edge{from, to};
3204                 auto it = edgeActiveLaneMasks.find(edge);
3205                 if (it == edgeActiveLaneMasks.end())
3206                 {
3207                         edgeActiveLaneMasks.emplace(edge, mask);
3208                 }
3209                 else
3210                 {
3211                         auto combined = it->second | mask;
3212                         edgeActiveLaneMasks.erase(edge);
3213                         edgeActiveLaneMasks.emplace(edge, combined);
3214                 }
3215         }
3216
3217         RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
3218         {
3219                 auto edge = Block::Edge{from, to};
3220                 auto it = state->edgeActiveLaneMasks.find(edge);
3221                 ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
3222                 return it->second;
3223         }
3224
3225         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
3226                 pipelineLayout(pipelineLayout)
3227         {
3228         }
3229
3230 }