1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
27 #undef Bool // b/127920555
32 rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
34 return rr::SignMask(ints) != 0;
37 rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
39 return rr::SignMask(~ints) != 0;
45 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
47 SpirvShader::SpirvShader(InsnStore const &insns)
48 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
49 outputs{MAX_INTERFACE_COMPONENTS},
50 serialID{serialCounter++}, modes{}
52 ASSERT(insns.size() > 0);
54 // Simplifying assumptions (to be satisfied by earlier transformations)
55 // - There is exactly one entrypoint in the module, and it's the one we want
56 // - The only input/output OpVariables present are those used by the entrypoint
58 Block::ID currentBlock;
59 InsnIterator blockStart;
61 for (auto insn : *this)
63 switch (insn.opcode())
65 case spv::OpExecutionMode:
66 ProcessExecutionMode(insn);
71 TypeOrObjectID targetId = insn.word(1);
72 auto decoration = static_cast<spv::Decoration>(insn.word(2));
73 decorations[targetId].Apply(
75 insn.wordCount() > 3 ? insn.word(3) : 0);
77 if (decoration == spv::DecorationCentroid)
78 modes.NeedsCentroid = true;
82 case spv::OpMemberDecorate:
84 Type::ID targetId = insn.word(1);
85 auto memberIndex = insn.word(2);
86 auto &d = memberDecorations[targetId];
87 if (memberIndex >= d.size())
88 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
89 auto decoration = static_cast<spv::Decoration>(insn.word(3));
92 insn.wordCount() > 4 ? insn.word(4) : 0);
94 if (decoration == spv::DecorationCentroid)
95 modes.NeedsCentroid = true;
99 case spv::OpDecorationGroup:
100 // Nothing to do here. We don't need to record the definition of the group; we'll just have
101 // the bundle of decorations float around. If we were to ever walk the decorations directly,
102 // we might think about introducing this as a real Object.
105 case spv::OpGroupDecorate:
107 auto const &srcDecorations = decorations[insn.word(1)];
108 for (auto i = 2u; i < insn.wordCount(); i++)
110 // remaining operands are targets to apply the group to.
111 decorations[insn.word(i)].Apply(srcDecorations);
116 case spv::OpGroupMemberDecorate:
118 auto const &srcDecorations = decorations[insn.word(1)];
119 for (auto i = 2u; i < insn.wordCount(); i += 2)
121 // remaining operands are pairs of <id>, literal for members to apply to.
122 auto &d = memberDecorations[insn.word(i)];
123 auto memberIndex = insn.word(i + 1);
124 if (memberIndex >= d.size())
125 d.resize(memberIndex + 1); // on demand resize, see above...
126 d[memberIndex].Apply(srcDecorations);
133 ASSERT(currentBlock.value() == 0);
134 currentBlock = Block::ID(insn.word(1));
139 // Branch Instructions (subset of Termination Instructions):
141 case spv::OpBranchConditional:
146 // Termination instruction:
148 case spv::OpUnreachable:
150 ASSERT(currentBlock.value() != 0);
151 auto blockEnd = insn; blockEnd++;
152 blocks[currentBlock] = Block(blockStart, blockEnd);
153 currentBlock = Block::ID(0);
155 if (insn.opcode() == spv::OpKill)
157 modes.ContainsKill = true;
162 case spv::OpLoopMerge:
163 case spv::OpSelectionMerge:
164 break; // Nothing to do in analysis pass.
166 case spv::OpTypeVoid:
167 case spv::OpTypeBool:
169 case spv::OpTypeFloat:
170 case spv::OpTypeVector:
171 case spv::OpTypeMatrix:
172 case spv::OpTypeImage:
173 case spv::OpTypeSampler:
174 case spv::OpTypeSampledImage:
175 case spv::OpTypeArray:
176 case spv::OpTypeRuntimeArray:
177 case spv::OpTypeStruct:
178 case spv::OpTypePointer:
179 case spv::OpTypeFunction:
183 case spv::OpVariable:
185 Type::ID typeId = insn.word(1);
186 Object::ID resultId = insn.word(2);
187 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
188 if (insn.wordCount() > 4)
189 UNIMPLEMENTED("Variable initializers not yet supported");
191 auto &object = defs[resultId];
192 object.kind = Object::Kind::Variable;
193 object.definition = insn;
194 object.type = typeId;
195 object.pointerBase = insn.word(2); // base is itself
197 ASSERT(getType(typeId).storageClass == storageClass);
199 switch (storageClass)
201 case spv::StorageClassInput:
202 case spv::StorageClassOutput:
203 ProcessInterfaceVariable(object);
205 case spv::StorageClassUniform:
206 case spv::StorageClassStorageBuffer:
207 case spv::StorageClassPushConstant:
208 object.kind = Object::Kind::PhysicalPointer;
211 case spv::StorageClassPrivate:
212 case spv::StorageClassFunction:
213 break; // Correctly handled.
215 case spv::StorageClassUniformConstant:
216 case spv::StorageClassWorkgroup:
217 case spv::StorageClassCrossWorkgroup:
218 case spv::StorageClassGeneric:
219 case spv::StorageClassAtomicCounter:
220 case spv::StorageClassImage:
221 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
225 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
231 case spv::OpConstant:
232 CreateConstant(insn).constantValue[0] = insn.word(3);
234 case spv::OpConstantFalse:
235 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
237 case spv::OpConstantTrue:
238 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
240 case spv::OpConstantNull:
243 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
244 // OpConstantNull forms a constant of arbitrary type, all zeros.
245 auto &object = CreateConstant(insn);
246 auto &objectTy = getType(object.type);
247 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
249 object.constantValue[i] = 0;
253 case spv::OpConstantComposite:
255 auto &object = CreateConstant(insn);
257 for (auto i = 0u; i < insn.wordCount() - 3; i++)
259 auto &constituent = getObject(insn.word(i + 3));
260 auto &constituentTy = getType(constituent.type);
261 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
262 object.constantValue[offset++] = constituent.constantValue[j];
265 auto objectId = Object::ID(insn.word(2));
266 auto decorationsIt = decorations.find(objectId);
267 if (decorationsIt != decorations.end() &&
268 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
270 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
271 // Decorating an object with the WorkgroupSize built-in
272 // decoration will make that object contain the dimensions
273 // of a local workgroup. If an object is decorated with the
274 // WorkgroupSize decoration, this must take precedence over
275 // any execution mode set for LocalSize.
276 // The object decorated with WorkgroupSize must be declared
277 // as a three-component vector of 32-bit integers.
278 ASSERT(getType(object.type).sizeInComponents == 3);
279 modes.WorkgroupSizeX = object.constantValue[0];
280 modes.WorkgroupSizeY = object.constantValue[1];
281 modes.WorkgroupSizeZ = object.constantValue[2];
286 case spv::OpCapability:
287 break; // Various capabilities will be declared, but none affect our code generation at this point.
288 case spv::OpMemoryModel:
289 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
291 case spv::OpEntryPoint:
293 case spv::OpFunction:
294 ASSERT(mainBlockId.value() == 0); // Multiple functions found
295 // Scan forward to find the function's label.
296 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
300 case spv::OpFunction:
301 case spv::OpFunctionParameter:
304 mainBlockId = Block::ID(it.word(1));
307 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
310 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
312 case spv::OpFunctionEnd:
313 // Due to preprocessing, the entrypoint and its function provide no value.
315 case spv::OpExtInstImport:
316 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
317 // Valid shaders will not attempt to import any other instruction sets.
318 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
320 UNIMPLEMENTED("Only GLSL extended instruction set is supported");
324 case spv::OpMemberName:
326 case spv::OpSourceContinued:
327 case spv::OpSourceExtension:
330 case spv::OpModuleProcessed:
332 // No semantic impact
335 case spv::OpFunctionParameter:
336 case spv::OpFunctionCall:
337 case spv::OpSpecConstant:
338 case spv::OpSpecConstantComposite:
339 case spv::OpSpecConstantFalse:
340 case spv::OpSpecConstantOp:
341 case spv::OpSpecConstantTrue:
342 // These should have all been removed by preprocessing passes. If we see them here,
343 // our assumptions are wrong and we will probably generate wrong code.
344 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
347 case spv::OpFConvert:
348 case spv::OpSConvert:
349 case spv::OpUConvert:
350 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
354 case spv::OpAccessChain:
355 case spv::OpInBoundsAccessChain:
356 case spv::OpCompositeConstruct:
357 case spv::OpCompositeInsert:
358 case spv::OpCompositeExtract:
359 case spv::OpVectorShuffle:
360 case spv::OpVectorTimesScalar:
361 case spv::OpMatrixTimesScalar:
362 case spv::OpVectorExtractDynamic:
363 case spv::OpVectorInsertDynamic:
364 case spv::OpNot: // Unary ops
367 case spv::OpLogicalNot:
368 case spv::OpIAdd: // Binary ops
379 case spv::OpFOrdEqual:
380 case spv::OpFUnordEqual:
381 case spv::OpFOrdNotEqual:
382 case spv::OpFUnordNotEqual:
383 case spv::OpFOrdLessThan:
384 case spv::OpFUnordLessThan:
385 case spv::OpFOrdGreaterThan:
386 case spv::OpFUnordGreaterThan:
387 case spv::OpFOrdLessThanEqual:
388 case spv::OpFUnordLessThanEqual:
389 case spv::OpFOrdGreaterThanEqual:
390 case spv::OpFUnordGreaterThanEqual:
395 case spv::OpINotEqual:
396 case spv::OpUGreaterThan:
397 case spv::OpSGreaterThan:
398 case spv::OpUGreaterThanEqual:
399 case spv::OpSGreaterThanEqual:
400 case spv::OpULessThan:
401 case spv::OpSLessThan:
402 case spv::OpULessThanEqual:
403 case spv::OpSLessThanEqual:
404 case spv::OpShiftRightLogical:
405 case spv::OpShiftRightArithmetic:
406 case spv::OpShiftLeftLogical:
407 case spv::OpBitwiseOr:
408 case spv::OpBitwiseXor:
409 case spv::OpBitwiseAnd:
410 case spv::OpLogicalOr:
411 case spv::OpLogicalAnd:
412 case spv::OpLogicalEqual:
413 case spv::OpLogicalNotEqual:
414 case spv::OpUMulExtended:
415 case spv::OpSMulExtended:
417 case spv::OpConvertFToU:
418 case spv::OpConvertFToS:
419 case spv::OpConvertSToF:
420 case spv::OpConvertUToF:
429 case spv::OpDPdxCoarse:
431 case spv::OpDPdyCoarse:
433 case spv::OpFwidthCoarse:
434 case spv::OpDPdxFine:
435 case spv::OpDPdyFine:
436 case spv::OpFwidthFine:
437 case spv::OpAtomicLoad:
439 // Instructions that yield an intermediate value
441 Type::ID typeId = insn.word(1);
442 Object::ID resultId = insn.word(2);
443 auto &object = defs[resultId];
444 object.type = typeId;
445 object.kind = Object::Kind::Value;
446 object.definition = insn;
448 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
450 // interior ptr has two parts:
451 // - logical base ptr, common across all lanes and known at compile time
453 Object::ID baseId = insn.word(3);
454 object.pointerBase = getObject(baseId).pointerBase;
460 case spv::OpAtomicStore:
461 // Don't need to do anything during analysis pass
465 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
469 // Assign all Block::ins
470 for (auto &it : blocks)
472 auto &blockId = it.first;
473 auto &block = it.second;
474 for (auto &outId : block.outs)
476 auto outIt = blocks.find(outId);
477 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
478 auto &out = outIt->second;
479 out.ins.emplace(blockId);
484 void SpirvShader::DeclareType(InsnIterator insn)
486 Type::ID resultId = insn.word(1);
488 auto &type = types[resultId];
489 type.definition = insn;
490 type.sizeInComponents = ComputeTypeSize(insn);
492 // A structure is a builtin block if it has a builtin
493 // member. All members of such a structure are builtins.
494 switch (insn.opcode())
496 case spv::OpTypeStruct:
498 auto d = memberDecorations.find(resultId);
499 if (d != memberDecorations.end())
501 for (auto &m : d->second)
505 type.isBuiltInBlock = true;
512 case spv::OpTypePointer:
514 Type::ID elementTypeId = insn.word(3);
515 type.element = elementTypeId;
516 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
517 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
520 case spv::OpTypeVector:
521 case spv::OpTypeMatrix:
522 case spv::OpTypeArray:
523 case spv::OpTypeRuntimeArray:
525 Type::ID elementTypeId = insn.word(2);
526 type.element = elementTypeId;
534 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
536 Type::ID typeId = insn.word(1);
537 Object::ID resultId = insn.word(2);
538 auto &object = defs[resultId];
539 auto &objectTy = getType(typeId);
540 object.type = typeId;
541 object.kind = Object::Kind::Constant;
542 object.definition = insn;
543 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
547 void SpirvShader::ProcessInterfaceVariable(Object &object)
549 auto &objectTy = getType(object.type);
550 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
552 ASSERT(objectTy.opcode() == spv::OpTypePointer);
553 auto pointeeTy = getType(objectTy.element);
555 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
556 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
558 ASSERT(object.opcode() == spv::OpVariable);
559 Object::ID resultId = object.definition.word(2);
561 if (objectTy.isBuiltInBlock)
563 // walk the builtin block, registering each of its members separately.
564 auto m = memberDecorations.find(objectTy.element);
565 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
566 auto &structType = pointeeTy.definition;
569 for (auto &member : m->second)
571 auto &memberType = getType(structType.word(word));
573 if (member.HasBuiltIn)
575 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
578 offset += memberType.sizeInComponents;
584 auto d = decorations.find(resultId);
585 if (d != decorations.end() && d->second.HasBuiltIn)
587 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
591 object.kind = Object::Kind::InterfaceVariable;
592 VisitInterface(resultId,
593 [&userDefinedInterface](Decorations const &d, AttribType type) {
594 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
595 auto scalarSlot = (d.Location << 2) | d.Component;
596 ASSERT(scalarSlot >= 0 &&
597 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
599 auto &slot = userDefinedInterface[scalarSlot];
602 slot.NoPerspective = d.NoPerspective;
603 slot.Centroid = d.Centroid;
608 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
610 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
613 case spv::ExecutionModeEarlyFragmentTests:
614 modes.EarlyFragmentTests = true;
616 case spv::ExecutionModeDepthReplacing:
617 modes.DepthReplacing = true;
619 case spv::ExecutionModeDepthGreater:
620 modes.DepthGreater = true;
622 case spv::ExecutionModeDepthLess:
623 modes.DepthLess = true;
625 case spv::ExecutionModeDepthUnchanged:
626 modes.DepthUnchanged = true;
628 case spv::ExecutionModeLocalSize:
629 modes.WorkgroupSizeX = insn.word(3);
630 modes.WorkgroupSizeY = insn.word(4);
631 modes.WorkgroupSizeZ = insn.word(5);
633 case spv::ExecutionModeOriginUpperLeft:
634 // This is always the case for a Vulkan shader. Do nothing.
637 UNIMPLEMENTED("No other execution modes are permitted");
641 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
643 // Types are always built from the bottom up (with the exception of forward ptrs, which
644 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
645 // already been described (and so their sizes determined)
646 switch (insn.opcode())
648 case spv::OpTypeVoid:
649 case spv::OpTypeSampler:
650 case spv::OpTypeImage:
651 case spv::OpTypeSampledImage:
652 case spv::OpTypeFunction:
653 case spv::OpTypeRuntimeArray:
654 // Objects that don't consume any space.
655 // Descriptor-backed objects currently only need exist at compile-time.
656 // Runtime arrays don't appear in places where their size would be interesting
659 case spv::OpTypeBool:
660 case spv::OpTypeFloat:
662 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
663 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
666 case spv::OpTypeVector:
667 case spv::OpTypeMatrix:
668 // Vectors and matrices both consume element count * element size.
669 return getType(insn.word(2)).sizeInComponents * insn.word(3);
671 case spv::OpTypeArray:
673 // Element count * element size. Array sizes come from constant ids.
674 auto arraySize = GetConstantInt(insn.word(3));
675 return getType(insn.word(2)).sizeInComponents * arraySize;
678 case spv::OpTypeStruct:
681 for (uint32_t i = 2u; i < insn.wordCount(); i++)
683 size += getType(insn.word(i)).sizeInComponents;
688 case spv::OpTypePointer:
689 // Runtime representation of a pointer is a per-lane index.
690 // Note: clients are expected to look through the pointer if they want the pointee size instead.
694 // Some other random insn.
695 UNIMPLEMENTED("Only types are supported");
700 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
702 switch (storageClass)
704 case spv::StorageClassUniform:
705 case spv::StorageClassStorageBuffer:
706 case spv::StorageClassPushConstant:
714 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
716 // Recursively walks variable definition and its type tree, taking into account
717 // any explicit Location or Component decorations encountered; where explicit
718 // Locations or Components are not specified, assigns them sequentially.
719 // Collected decorations are carried down toward the leaves and across
720 // siblings; Effect of decorations intentionally does not flow back up the tree.
722 // F is a functor to be called with the effective decoration set for every component.
724 // Returns the next available location, and calls f().
726 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
728 ApplyDecorationsForId(&d, id);
730 auto const &obj = getType(id);
733 case spv::OpTypePointer:
734 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
735 case spv::OpTypeMatrix:
736 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
738 // consumes same components of N consecutive locations
739 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
742 case spv::OpTypeVector:
743 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
745 // consumes N consecutive components in the same location
746 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
748 return d.Location + 1;
749 case spv::OpTypeFloat:
750 f(d, ATTRIBTYPE_FLOAT);
751 return d.Location + 1;
753 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
754 return d.Location + 1;
755 case spv::OpTypeBool:
756 f(d, ATTRIBTYPE_UINT);
757 return d.Location + 1;
758 case spv::OpTypeStruct:
760 // iterate over members, which may themselves have Location/Component decorations
761 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
763 ApplyDecorationsForIdMember(&d, id, i);
764 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
765 d.Component = 0; // Implicit locations always have component=0
769 case spv::OpTypeArray:
771 auto arraySize = GetConstantInt(obj.definition.word(3));
772 for (auto i = 0u; i < arraySize; i++)
774 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
779 // Intentionally partial; most opcodes do not participate in type hierarchies
785 void SpirvShader::VisitInterface(Object::ID id, F f) const
787 // Walk a variable definition and call f for each component in it.
789 ApplyDecorationsForId(&d, id);
791 auto def = getObject(id).definition;
792 ASSERT(def.opcode() == spv::OpVariable);
793 VisitInterfaceInner<F>(def.word(1), d, f);
796 SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
798 // Produce a offset into external memory in sizeof(float) units
800 int constantOffset = 0;
801 SIMD::Int dynamicOffset = SIMD::Int(0);
802 auto &baseObject = getObject(id);
803 Type::ID typeId = getType(baseObject.type).element;
805 ApplyDecorationsForId(&d, baseObject.type);
807 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
808 // Start with its offset and build from there.
809 if (baseObject.kind == Object::Kind::Value)
811 dynamicOffset += routine->getIntermediate(id).Int(0);
814 for (auto i = 0u; i < numIndexes; i++)
816 auto & type = getType(typeId);
817 switch (type.definition.opcode())
819 case spv::OpTypeStruct:
821 int memberIndex = GetConstantInt(indexIds[i]);
822 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
824 constantOffset += d.Offset / sizeof(float);
825 typeId = type.definition.word(2u + memberIndex);
828 case spv::OpTypeArray:
829 case spv::OpTypeRuntimeArray:
831 // TODO: b/127950082: Check bounds.
832 ApplyDecorationsForId(&d, typeId);
833 ASSERT(d.HasArrayStride);
834 auto & obj = getObject(indexIds[i]);
835 if (obj.kind == Object::Kind::Constant)
836 constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
838 dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
839 typeId = type.element;
842 case spv::OpTypeMatrix:
844 // TODO: b/127950082: Check bounds.
845 ApplyDecorationsForId(&d, typeId);
846 ASSERT(d.HasMatrixStride);
847 auto & obj = getObject(indexIds[i]);
848 if (obj.kind == Object::Kind::Constant)
849 constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
851 dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
852 typeId = type.element;
855 case spv::OpTypeVector:
857 auto & obj = getObject(indexIds[i]);
858 if (obj.kind == Object::Kind::Constant)
859 constantOffset += GetConstantInt(indexIds[i]);
861 dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
862 typeId = type.element;
866 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
870 return dynamicOffset + SIMD::Int(constantOffset);
873 SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
875 // TODO: avoid doing per-lane work in some cases if we can?
876 // Produce a *component* offset into location-oriented memory
878 int constantOffset = 0;
879 SIMD::Int dynamicOffset = SIMD::Int(0);
880 auto &baseObject = getObject(id);
881 Type::ID typeId = getType(baseObject.type).element;
883 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
884 // Start with its offset and build from there.
885 if (baseObject.kind == Object::Kind::Value)
887 dynamicOffset += routine->getIntermediate(id).Int(0);
890 for (auto i = 0u; i < numIndexes; i++)
892 auto & type = getType(typeId);
893 switch(type.opcode())
895 case spv::OpTypeStruct:
897 int memberIndex = GetConstantInt(indexIds[i]);
898 int offsetIntoStruct = 0;
899 for (auto j = 0; j < memberIndex; j++) {
900 auto memberType = type.definition.word(2u + j);
901 offsetIntoStruct += getType(memberType).sizeInComponents;
903 constantOffset += offsetIntoStruct;
904 typeId = type.definition.word(2u + memberIndex);
908 case spv::OpTypeVector:
909 case spv::OpTypeMatrix:
910 case spv::OpTypeArray:
911 case spv::OpTypeRuntimeArray:
913 // TODO: b/127950082: Check bounds.
914 auto stride = getType(type.element).sizeInComponents;
915 auto & obj = getObject(indexIds[i]);
916 if (obj.kind == Object::Kind::Constant)
917 constantOffset += stride * GetConstantInt(indexIds[i]);
919 dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
920 typeId = type.element;
925 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
929 return dynamicOffset + SIMD::Int(constantOffset);
932 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
934 uint32_t constantOffset = 0;
936 for (auto i = 0u; i < numIndexes; i++)
938 auto & type = getType(typeId);
939 switch(type.opcode())
941 case spv::OpTypeStruct:
943 int memberIndex = indexes[i];
944 int offsetIntoStruct = 0;
945 for (auto j = 0; j < memberIndex; j++) {
946 auto memberType = type.definition.word(2u + j);
947 offsetIntoStruct += getType(memberType).sizeInComponents;
949 constantOffset += offsetIntoStruct;
950 typeId = type.definition.word(2u + memberIndex);
954 case spv::OpTypeVector:
955 case spv::OpTypeMatrix:
956 case spv::OpTypeArray:
958 auto elementType = type.definition.word(2);
959 auto stride = getType(elementType).sizeInComponents;
960 constantOffset += stride * indexes[i];
961 typeId = elementType;
966 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
970 return constantOffset;
973 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
977 case spv::DecorationLocation:
979 Location = static_cast<int32_t>(arg);
981 case spv::DecorationComponent:
985 case spv::DecorationDescriptorSet:
986 HasDescriptorSet = true;
989 case spv::DecorationBinding:
993 case spv::DecorationBuiltIn:
995 BuiltIn = static_cast<spv::BuiltIn>(arg);
997 case spv::DecorationFlat:
1000 case spv::DecorationNoPerspective:
1001 NoPerspective = true;
1003 case spv::DecorationCentroid:
1006 case spv::DecorationBlock:
1009 case spv::DecorationBufferBlock:
1012 case spv::DecorationOffset:
1014 Offset = static_cast<int32_t>(arg);
1016 case spv::DecorationArrayStride:
1017 HasArrayStride = true;
1018 ArrayStride = static_cast<int32_t>(arg);
1020 case spv::DecorationMatrixStride:
1021 HasMatrixStride = true;
1022 MatrixStride = static_cast<int32_t>(arg);
1025 // Intentionally partial, there are many decorations we just don't care about.
1030 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1032 // Apply a decoration group to this set of decorations
1036 BuiltIn = src.BuiltIn;
1039 if (src.HasLocation)
1042 Location = src.Location;
1045 if (src.HasComponent)
1047 HasComponent = true;
1048 Component = src.Component;
1051 if (src.HasDescriptorSet)
1053 HasDescriptorSet = true;
1054 DescriptorSet = src.DescriptorSet;
1060 Binding = src.Binding;
1066 Offset = src.Offset;
1069 if (src.HasArrayStride)
1071 HasArrayStride = true;
1072 ArrayStride = src.ArrayStride;
1075 if (src.HasMatrixStride)
1077 HasMatrixStride = true;
1078 MatrixStride = src.MatrixStride;
1082 NoPerspective |= src.NoPerspective;
1083 Centroid |= src.Centroid;
1085 BufferBlock |= src.BufferBlock;
1088 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1090 auto it = decorations.find(id);
1091 if (it != decorations.end())
1092 d->Apply(it->second);
1095 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1097 auto it = memberDecorations.find(id);
1098 if (it != memberDecorations.end() && member < it->second.size())
1100 d->Apply(it->second[member]);
1104 uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1106 // Slightly hackish access to constants very early in translation.
1107 // General consumption of constants by other instructions should
1108 // probably be just lowered to Reactor.
1110 // TODO: not encountered yet since we only use this for array sizes etc,
1111 // but is possible to construct integer constant 0 via OpConstantNull.
1112 auto insn = getObject(id).definition;
1113 ASSERT(insn.opcode() == spv::OpConstant);
1114 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1115 return insn.word(3);
1120 void SpirvShader::emitProlog(SpirvRoutine *routine) const
1122 for (auto insn : *this)
1124 switch (insn.opcode())
1126 case spv::OpVariable:
1128 Type::ID resultPointerTypeId = insn.word(1);
1129 auto resultPointerType = getType(resultPointerTypeId);
1130 auto pointeeType = getType(resultPointerType.element);
1132 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
1134 Object::ID resultId = insn.word(2);
1135 routine->createLvalue(resultId, pointeeType.sizeInComponents);
1140 // Nothing else produces interface variables, so can all be safely ignored.
1146 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1149 state.setActiveLaneMask(activeLaneMask);
1150 state.routine = routine;
1152 // Emit everything up to the first label
1153 // TODO: Separate out dispatch of block from non-block instructions?
1154 for (auto insn : *this)
1156 if (insn.opcode() == spv::OpLabel)
1160 EmitInstruction(insn, &state);
1163 // Emit all the blocks in BFS order, starting with the main block.
1164 std::queue<Block::ID> pending;
1165 pending.push(mainBlockId);
1166 while (pending.size() > 0)
1168 auto id = pending.front();
1170 if (state.visited.count(id) == 0)
1172 EmitBlock(id, &state);
1173 for (auto it : getBlock(id).outs)
1181 void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1183 if (state->visited.count(id) > 0)
1185 return; // Already processed this block.
1188 state->visited.emplace(id);
1190 auto &block = getBlock(id);
1195 case Block::StructuredBranchConditional:
1196 case Block::UnstructuredBranchConditional:
1197 case Block::StructuredSwitch:
1198 case Block::UnstructuredSwitch:
1199 if (id != mainBlockId)
1201 // Emit all preceding blocks and set the activeLaneMask.
1202 Intermediate activeLaneMask(1);
1203 activeLaneMask.move(0, SIMD::Int(0));
1204 for (auto in : block.ins)
1206 EmitBlock(in, state);
1207 auto inMask = state->getActiveLaneMaskEdge(in, id);
1208 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1210 state->setActiveLaneMask(activeLaneMask.Int(0));
1212 state->currentBlock = id;
1213 EmitInstructions(block.begin(), block.end(), state);
1217 state->currentBlock = id;
1222 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
1226 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1228 for (auto insn = begin; insn != end; insn++)
1230 auto res = EmitInstruction(insn, state);
1233 case EmitResult::Continue:
1235 case EmitResult::Terminator:
1238 UNREACHABLE("Unexpected EmitResult %d", int(res));
1244 void SpirvShader::EmitLoop(EmitState *state) const
1246 auto blockId = state->currentBlock;
1247 auto block = getBlock(blockId);
1249 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
1250 // This is initialized with the incoming active lane masks.
1251 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
1252 for (auto in : block.ins)
1254 if (!existsPath(blockId, in)) // if not a loop back edge
1256 EmitBlock(in, state);
1257 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1261 // Generate an alloca for each of the loop's phis.
1262 // These will be primed with the incoming, non back edge Phi values
1263 // before the loop, and then updated just before the loop jumps back to
1267 Object::ID phiId; // The Phi identifier.
1268 Object::ID continueValue; // The source merge value from the loop.
1269 Array<SIMD::Int> storage; // The alloca.
1272 std::vector<LoopPhi> phis;
1274 // For each OpPhi between the block start and the merge instruction:
1275 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
1277 if (insn.opcode() == spv::OpPhi)
1279 auto objectId = Object::ID(insn.word(2));
1280 auto &object = getObject(objectId);
1281 auto &type = getType(object.type);
1284 phi.phiId = Object::ID(insn.word(2));
1285 phi.storage = Array<SIMD::Int>(type.sizeInComponents);
1287 // Start with the Phi set to 0.
1288 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1290 phi.storage[i] = SIMD::Int(0);
1293 // For each Phi source:
1294 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
1296 auto varId = Object::ID(insn.word(w + 0));
1297 auto blockId = Block::ID(insn.word(w + 1));
1298 if (existsPath(state->currentBlock, blockId))
1300 // This source is from a loop back-edge.
1301 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
1302 phi.continueValue = varId;
1306 // This source is from a preceding block.
1307 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1309 auto in = GenericValue(this, state->routine, varId);
1310 auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
1311 phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
1316 phis.push_back(phi);
1320 // Create the loop basic blocks
1321 auto headerBasicBlock = Nucleus::createBasicBlock();
1322 auto mergeBasicBlock = Nucleus::createBasicBlock();
1324 // Start emitting code inside the loop.
1325 Nucleus::createBr(headerBasicBlock);
1326 Nucleus::setInsertBlock(headerBasicBlock);
1328 // Load the Phi values from storage.
1329 // This will load at the start of each loop.
1330 for (auto &phi : phis)
1332 auto &type = getType(getObject(phi.phiId).type);
1333 auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
1334 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1336 dst.move(i, phi.storage[i]);
1340 // Load the active lane mask.
1341 state->setActiveLaneMask(loopActiveLaneMask);
1343 // Emit all the non-phi instructions in this loop header block.
1344 for (auto insn = block.begin(); insn != block.end(); insn++)
1346 if (insn.opcode() != spv::OpPhi)
1348 EmitInstruction(insn, state);
1352 // Emit all the back-edge blocks and use their active lane masks to
1353 // rebuild the loopActiveLaneMask.
1354 loopActiveLaneMask = SIMD::Int(0);
1355 for (auto in : block.ins)
1357 if (existsPath(blockId, in))
1359 EmitBlock(in, state);
1360 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1364 // Update loop phi values
1365 for (auto &phi : phis)
1367 if (phi.continueValue != 0)
1369 auto val = GenericValue(this, state->routine, phi.continueValue);
1370 auto &type = getType(getObject(phi.phiId).type);
1371 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1373 phi.storage[i] = val.Int(i);
1378 // Loop body now done.
1379 // If any lanes are still active, jump back to the loop header,
1380 // otherwise jump to the merge block.
1381 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
1383 // Emit the merge block, and we're done.
1384 Nucleus::setInsertBlock(mergeBasicBlock);
1385 EmitBlock(block.mergeBlock, state);
1388 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1390 switch (insn.opcode())
1392 case spv::OpTypeVoid:
1393 case spv::OpTypeInt:
1394 case spv::OpTypeFloat:
1395 case spv::OpTypeBool:
1396 case spv::OpTypeVector:
1397 case spv::OpTypeArray:
1398 case spv::OpTypeRuntimeArray:
1399 case spv::OpTypeMatrix:
1400 case spv::OpTypeStruct:
1401 case spv::OpTypePointer:
1402 case spv::OpTypeFunction:
1403 case spv::OpExecutionMode:
1404 case spv::OpMemoryModel:
1405 case spv::OpFunction:
1406 case spv::OpFunctionEnd:
1407 case spv::OpConstant:
1408 case spv::OpConstantNull:
1409 case spv::OpConstantTrue:
1410 case spv::OpConstantFalse:
1411 case spv::OpConstantComposite:
1413 case spv::OpExtension:
1414 case spv::OpCapability:
1415 case spv::OpEntryPoint:
1416 case spv::OpExtInstImport:
1417 case spv::OpDecorate:
1418 case spv::OpMemberDecorate:
1419 case spv::OpGroupDecorate:
1420 case spv::OpGroupMemberDecorate:
1421 case spv::OpDecorationGroup:
1423 case spv::OpMemberName:
1425 case spv::OpSourceContinued:
1426 case spv::OpSourceExtension:
1429 case spv::OpModuleProcessed:
1431 // Nothing to do at emit time. These are either fully handled at analysis time,
1432 // or don't require any work at all.
1433 return EmitResult::Continue;
1436 return EmitResult::Continue;
1438 case spv::OpVariable:
1439 return EmitVariable(insn, state);
1442 case spv::OpAtomicLoad:
1443 return EmitLoad(insn, state);
1446 case spv::OpAtomicStore:
1447 return EmitStore(insn, state);
1449 case spv::OpAccessChain:
1450 case spv::OpInBoundsAccessChain:
1451 return EmitAccessChain(insn, state);
1453 case spv::OpCompositeConstruct:
1454 return EmitCompositeConstruct(insn, state);
1456 case spv::OpCompositeInsert:
1457 return EmitCompositeInsert(insn, state);
1459 case spv::OpCompositeExtract:
1460 return EmitCompositeExtract(insn, state);
1462 case spv::OpVectorShuffle:
1463 return EmitVectorShuffle(insn, state);
1465 case spv::OpVectorExtractDynamic:
1466 return EmitVectorExtractDynamic(insn, state);
1468 case spv::OpVectorInsertDynamic:
1469 return EmitVectorInsertDynamic(insn, state);
1471 case spv::OpVectorTimesScalar:
1472 case spv::OpMatrixTimesScalar:
1473 return EmitVectorTimesScalar(insn, state);
1476 case spv::OpSNegate:
1477 case spv::OpFNegate:
1478 case spv::OpLogicalNot:
1479 case spv::OpConvertFToU:
1480 case spv::OpConvertFToS:
1481 case spv::OpConvertSToF:
1482 case spv::OpConvertUToF:
1483 case spv::OpBitcast:
1487 case spv::OpDPdxCoarse:
1489 case spv::OpDPdyCoarse:
1491 case spv::OpFwidthCoarse:
1492 case spv::OpDPdxFine:
1493 case spv::OpDPdyFine:
1494 case spv::OpFwidthFine:
1495 return EmitUnaryOp(insn, state);
1508 case spv::OpFOrdEqual:
1509 case spv::OpFUnordEqual:
1510 case spv::OpFOrdNotEqual:
1511 case spv::OpFUnordNotEqual:
1512 case spv::OpFOrdLessThan:
1513 case spv::OpFUnordLessThan:
1514 case spv::OpFOrdGreaterThan:
1515 case spv::OpFUnordGreaterThan:
1516 case spv::OpFOrdLessThanEqual:
1517 case spv::OpFUnordLessThanEqual:
1518 case spv::OpFOrdGreaterThanEqual:
1519 case spv::OpFUnordGreaterThanEqual:
1524 case spv::OpINotEqual:
1525 case spv::OpUGreaterThan:
1526 case spv::OpSGreaterThan:
1527 case spv::OpUGreaterThanEqual:
1528 case spv::OpSGreaterThanEqual:
1529 case spv::OpULessThan:
1530 case spv::OpSLessThan:
1531 case spv::OpULessThanEqual:
1532 case spv::OpSLessThanEqual:
1533 case spv::OpShiftRightLogical:
1534 case spv::OpShiftRightArithmetic:
1535 case spv::OpShiftLeftLogical:
1536 case spv::OpBitwiseOr:
1537 case spv::OpBitwiseXor:
1538 case spv::OpBitwiseAnd:
1539 case spv::OpLogicalOr:
1540 case spv::OpLogicalAnd:
1541 case spv::OpLogicalEqual:
1542 case spv::OpLogicalNotEqual:
1543 case spv::OpUMulExtended:
1544 case spv::OpSMulExtended:
1545 return EmitBinaryOp(insn, state);
1548 return EmitDot(insn, state);
1551 return EmitSelect(insn, state);
1553 case spv::OpExtInst:
1554 return EmitExtendedInstruction(insn, state);
1557 return EmitAny(insn, state);
1560 return EmitAll(insn, state);
1563 return EmitBranch(insn, state);
1566 return EmitPhi(insn, state);
1568 case spv::OpSelectionMerge:
1569 case spv::OpLoopMerge:
1570 return EmitResult::Continue;
1572 case spv::OpBranchConditional:
1573 return EmitBranchConditional(insn, state);
1576 return EmitSwitch(insn, state);
1578 case spv::OpUnreachable:
1579 return EmitUnreachable(insn, state);
1582 return EmitReturn(insn, state);
1585 UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1589 return EmitResult::Continue;
1592 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1594 auto routine = state->routine;
1595 Object::ID resultId = insn.word(2);
1596 auto &object = getObject(resultId);
1597 auto &objectTy = getType(object.type);
1598 switch (objectTy.storageClass)
1600 case spv::StorageClassInput:
1602 if (object.kind == Object::Kind::InterfaceVariable)
1604 auto &dst = routine->getValue(resultId);
1606 VisitInterface(resultId,
1607 [&](Decorations const &d, AttribType type) {
1608 auto scalarSlot = d.Location << 2 | d.Component;
1609 dst[offset++] = routine->inputs[scalarSlot];
1614 case spv::StorageClassUniform:
1615 case spv::StorageClassStorageBuffer:
1618 ApplyDecorationsForId(&d, resultId);
1619 ASSERT(d.DescriptorSet >= 0);
1620 ASSERT(d.Binding >= 0);
1622 size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1624 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1625 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1626 Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1627 Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1628 Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1629 Pointer<Byte> address = data + offset;
1630 routine->physicalPointers[resultId] = address;
1633 case spv::StorageClassPushConstant:
1635 routine->physicalPointers[resultId] = routine->pushConstants;
1642 return EmitResult::Continue;
1645 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1647 auto routine = state->routine;
1648 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1649 Object::ID resultId = insn.word(2);
1650 Object::ID pointerId = insn.word(3);
1651 auto &result = getObject(resultId);
1652 auto &resultTy = getType(result.type);
1653 auto &pointer = getObject(pointerId);
1654 auto &pointerBase = getObject(pointer.pointerBase);
1655 auto &pointerBaseTy = getType(pointerBase.type);
1656 std::memory_order memoryOrder = std::memory_order_relaxed;
1660 Object::ID semanticsId = insn.word(5);
1661 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1662 memoryOrder = MemoryOrder(memorySemantics);
1665 ASSERT(getType(pointer.type).element == result.type);
1666 ASSERT(Type::ID(insn.word(1)) == result.type);
1667 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1669 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1671 UNIMPLEMENTED("StorageClassImage load not yet implemented");
1674 Pointer<Float> ptrBase;
1675 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1677 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1681 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1684 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1685 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1687 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1689 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1691 // Divergent offsets or masked lanes.
1692 auto offsets = pointer.kind == Object::Kind::Value ?
1693 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1694 RValue<SIMD::Int>(SIMD::Int(0));
1695 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1697 // i wish i had a Float,Float,Float,Float constructor here..
1698 for (int j = 0; j < SIMD::Width; j++)
1700 If(Extract(state->activeLaneMask(), j) != 0)
1702 Int offset = Int(i) + Extract(offsets, j);
1703 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1704 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1711 // No divergent offsets or masked lanes.
1712 if (interleavedByLane)
1714 // Lane-interleaved data.
1715 Pointer<SIMD::Float> src = ptrBase;
1716 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1718 load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1723 // Non-interleaved data.
1724 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1726 load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder)); // TODO: optimize alignment
1731 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1732 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1734 dst.move(i, load[i]);
1737 return EmitResult::Continue;
1740 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1742 auto routine = state->routine;
1743 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1744 Object::ID pointerId = insn.word(1);
1745 Object::ID objectId = insn.word(atomic ? 4 : 2);
1746 auto &object = getObject(objectId);
1747 auto &pointer = getObject(pointerId);
1748 auto &pointerTy = getType(pointer.type);
1749 auto &elementTy = getType(pointerTy.element);
1750 auto &pointerBase = getObject(pointer.pointerBase);
1751 auto &pointerBaseTy = getType(pointerBase.type);
1752 std::memory_order memoryOrder = std::memory_order_relaxed;
1756 Object::ID semanticsId = insn.word(3);
1757 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1758 memoryOrder = MemoryOrder(memorySemantics);
1761 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1763 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1765 UNIMPLEMENTED("StorageClassImage store not yet implemented");
1768 Pointer<Float> ptrBase;
1769 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1771 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1775 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1778 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1779 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1781 if (object.kind == Object::Kind::Constant)
1783 // Constant source data.
1784 auto src = reinterpret_cast<float *>(object.constantValue.get());
1785 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1787 // Divergent offsets or masked lanes.
1788 auto offsets = pointer.kind == Object::Kind::Value ?
1789 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1790 RValue<SIMD::Int>(SIMD::Int(0));
1791 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1793 for (int j = 0; j < SIMD::Width; j++)
1795 If(Extract(state->activeLaneMask(), j) != 0)
1797 Int offset = Int(i) + Extract(offsets, j);
1798 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1799 Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1806 // Constant source data.
1807 // No divergent offsets or masked lanes.
1808 Pointer<SIMD::Float> dst = ptrBase;
1809 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1811 Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1817 // Intermediate source data.
1818 auto &src = routine->getIntermediate(objectId);
1819 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1821 // Divergent offsets or masked lanes.
1822 auto offsets = pointer.kind == Object::Kind::Value ?
1823 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1824 RValue<SIMD::Int>(SIMD::Int(0));
1825 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1827 for (int j = 0; j < SIMD::Width; j++)
1829 If(Extract(state->activeLaneMask(), j) != 0)
1831 Int offset = Int(i) + Extract(offsets, j);
1832 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1833 Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1840 // No divergent offsets or masked lanes.
1841 if (interleavedByLane)
1843 // Lane-interleaved data.
1844 Pointer<SIMD::Float> dst = ptrBase;
1845 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1847 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1852 // Intermediate source data. Non-interleaved data.
1853 Pointer<SIMD::Float> dst = ptrBase;
1854 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1856 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1862 return EmitResult::Continue;
1865 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1867 auto routine = state->routine;
1868 Type::ID typeId = insn.word(1);
1869 Object::ID resultId = insn.word(2);
1870 Object::ID baseId = insn.word(3);
1871 uint32_t numIndexes = insn.wordCount() - 4;
1872 const uint32_t *indexes = insn.wordPointer(4);
1873 auto &type = getType(typeId);
1874 ASSERT(type.sizeInComponents == 1);
1875 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1877 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1879 if(type.storageClass == spv::StorageClassPushConstant ||
1880 type.storageClass == spv::StorageClassUniform ||
1881 type.storageClass == spv::StorageClassStorageBuffer)
1883 dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1887 dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1890 return EmitResult::Continue;
1893 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1895 auto routine = state->routine;
1896 auto &type = getType(insn.word(1));
1897 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1900 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1902 Object::ID srcObjectId = insn.word(3u + i);
1903 auto & srcObject = getObject(srcObjectId);
1904 auto & srcObjectTy = getType(srcObject.type);
1905 GenericValue srcObjectAccess(this, routine, srcObjectId);
1907 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1909 dst.move(offset++, srcObjectAccess.Float(j));
1913 return EmitResult::Continue;
1916 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1918 auto routine = state->routine;
1919 Type::ID resultTypeId = insn.word(1);
1920 auto &type = getType(resultTypeId);
1921 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1922 auto &newPartObject = getObject(insn.word(3));
1923 auto &newPartObjectTy = getType(newPartObject.type);
1924 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1926 GenericValue srcObjectAccess(this, routine, insn.word(4));
1927 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1929 // old components before
1930 for (auto i = 0u; i < firstNewComponent; i++)
1932 dst.move(i, srcObjectAccess.Float(i));
1935 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1937 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1939 // old components after
1940 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1942 dst.move(i, srcObjectAccess.Float(i));
1945 return EmitResult::Continue;
1948 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1950 auto routine = state->routine;
1951 auto &type = getType(insn.word(1));
1952 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1953 auto &compositeObject = getObject(insn.word(3));
1954 Type::ID compositeTypeId = compositeObject.definition.word(1);
1955 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1957 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1958 for (auto i = 0u; i < type.sizeInComponents; i++)
1960 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1963 return EmitResult::Continue;
1966 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1968 auto routine = state->routine;
1969 auto &type = getType(insn.word(1));
1970 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1972 // Note: number of components in result type, first half type, and second
1973 // half type are all independent.
1974 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1976 GenericValue firstHalfAccess(this, routine, insn.word(3));
1977 GenericValue secondHalfAccess(this, routine, insn.word(4));
1979 for (auto i = 0u; i < type.sizeInComponents; i++)
1981 auto selector = insn.word(5 + i);
1982 if (selector == static_cast<uint32_t>(-1))
1984 // Undefined value. Until we decide to do real undef values, zero is as good
1986 dst.move(i, RValue<SIMD::Float>(0.0f));
1988 else if (selector < firstHalfType.sizeInComponents)
1990 dst.move(i, firstHalfAccess.Float(selector));
1994 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
1998 return EmitResult::Continue;
2001 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2003 auto routine = state->routine;
2004 auto &type = getType(insn.word(1));
2005 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2006 auto &srcType = getType(getObject(insn.word(3)).type);
2008 GenericValue src(this, routine, insn.word(3));
2009 GenericValue index(this, routine, insn.word(4));
2011 SIMD::UInt v = SIMD::UInt(0);
2013 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2015 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2019 return EmitResult::Continue;
2022 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2024 auto routine = state->routine;
2025 auto &type = getType(insn.word(1));
2026 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2028 GenericValue src(this, routine, insn.word(3));
2029 GenericValue component(this, routine, insn.word(4));
2030 GenericValue index(this, routine, insn.word(5));
2032 for (auto i = 0u; i < type.sizeInComponents; i++)
2034 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2035 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2037 return EmitResult::Continue;
2040 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
2042 auto routine = state->routine;
2043 auto &type = getType(insn.word(1));
2044 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2045 auto lhs = GenericValue(this, routine, insn.word(3));
2046 auto rhs = GenericValue(this, routine, insn.word(4));
2048 for (auto i = 0u; i < type.sizeInComponents; i++)
2050 dst.move(i, lhs.Float(i) * rhs.Float(0));
2053 return EmitResult::Continue;
2056 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
2058 auto routine = state->routine;
2059 auto &type = getType(insn.word(1));
2060 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2061 auto src = GenericValue(this, routine, insn.word(3));
2063 for (auto i = 0u; i < type.sizeInComponents; i++)
2065 switch (insn.opcode())
2068 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
2069 dst.move(i, ~src.UInt(i));
2071 case spv::OpSNegate:
2072 dst.move(i, -src.Int(i));
2074 case spv::OpFNegate:
2075 dst.move(i, -src.Float(i));
2077 case spv::OpConvertFToU:
2078 dst.move(i, SIMD::UInt(src.Float(i)));
2080 case spv::OpConvertFToS:
2081 dst.move(i, SIMD::Int(src.Float(i)));
2083 case spv::OpConvertSToF:
2084 dst.move(i, SIMD::Float(src.Int(i)));
2086 case spv::OpConvertUToF:
2087 dst.move(i, SIMD::Float(src.UInt(i)));
2089 case spv::OpBitcast:
2090 dst.move(i, src.Float(i));
2093 dst.move(i, IsInf(src.Float(i)));
2096 dst.move(i, IsNan(src.Float(i)));
2099 case spv::OpDPdxCoarse:
2100 // Derivative instructions: FS invocations are laid out like so:
2103 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
2104 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
2107 case spv::OpDPdyCoarse:
2108 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
2111 case spv::OpFwidthCoarse:
2112 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
2113 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
2115 case spv::OpDPdxFine:
2117 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2118 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2119 SIMD::Float v = SIMD::Float(firstRow);
2120 v = Insert(v, secondRow, 2);
2121 v = Insert(v, secondRow, 3);
2125 case spv::OpDPdyFine:
2127 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2128 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2129 SIMD::Float v = SIMD::Float(firstColumn);
2130 v = Insert(v, secondColumn, 1);
2131 v = Insert(v, secondColumn, 3);
2135 case spv::OpFwidthFine:
2137 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2138 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2139 SIMD::Float dpdx = SIMD::Float(firstRow);
2140 dpdx = Insert(dpdx, secondRow, 2);
2141 dpdx = Insert(dpdx, secondRow, 3);
2142 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2143 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2144 SIMD::Float dpdy = SIMD::Float(firstColumn);
2145 dpdy = Insert(dpdy, secondColumn, 1);
2146 dpdy = Insert(dpdy, secondColumn, 3);
2147 dst.move(i, Abs(dpdx) + Abs(dpdy));
2151 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
2155 return EmitResult::Continue;
2158 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
2160 auto routine = state->routine;
2161 auto &type = getType(insn.word(1));
2162 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2163 auto &lhsType = getType(getObject(insn.word(3)).type);
2164 auto lhs = GenericValue(this, routine, insn.word(3));
2165 auto rhs = GenericValue(this, routine, insn.word(4));
2167 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2169 switch (insn.opcode())
2172 dst.move(i, lhs.Int(i) + rhs.Int(i));
2175 dst.move(i, lhs.Int(i) - rhs.Int(i));
2178 dst.move(i, lhs.Int(i) * rhs.Int(i));
2182 SIMD::Int a = lhs.Int(i);
2183 SIMD::Int b = rhs.Int(i);
2184 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2185 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2191 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2192 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2197 SIMD::Int a = lhs.Int(i);
2198 SIMD::Int b = rhs.Int(i);
2199 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2200 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2206 SIMD::Int a = lhs.Int(i);
2207 SIMD::Int b = rhs.Int(i);
2208 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2209 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2211 // If a and b have opposite signs, the remainder operation takes
2212 // the sign from a but OpSMod is supposed to take the sign of b.
2213 // Adding b will ensure that the result has the correct sign and
2214 // that it is still congruent to a modulo b.
2216 // See also http://mathforum.org/library/drmath/view/52343.html
2217 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2218 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2219 dst.move(i, As<SIMD::Float>(fixedMod));
2224 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2225 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2229 case spv::OpLogicalEqual:
2230 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2232 case spv::OpINotEqual:
2233 case spv::OpLogicalNotEqual:
2234 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2236 case spv::OpUGreaterThan:
2237 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2239 case spv::OpSGreaterThan:
2240 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2242 case spv::OpUGreaterThanEqual:
2243 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2245 case spv::OpSGreaterThanEqual:
2246 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2248 case spv::OpULessThan:
2249 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2251 case spv::OpSLessThan:
2252 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2254 case spv::OpULessThanEqual:
2255 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2257 case spv::OpSLessThanEqual:
2258 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2261 dst.move(i, lhs.Float(i) + rhs.Float(i));
2264 dst.move(i, lhs.Float(i) - rhs.Float(i));
2267 dst.move(i, lhs.Float(i) * rhs.Float(i));
2270 dst.move(i, lhs.Float(i) / rhs.Float(i));
2273 // TODO(b/126873455): inaccurate for values greater than 2^24
2274 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2277 dst.move(i, lhs.Float(i) % rhs.Float(i));
2279 case spv::OpFOrdEqual:
2280 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2282 case spv::OpFUnordEqual:
2283 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2285 case spv::OpFOrdNotEqual:
2286 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2288 case spv::OpFUnordNotEqual:
2289 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2291 case spv::OpFOrdLessThan:
2292 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2294 case spv::OpFUnordLessThan:
2295 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2297 case spv::OpFOrdGreaterThan:
2298 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2300 case spv::OpFUnordGreaterThan:
2301 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2303 case spv::OpFOrdLessThanEqual:
2304 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2306 case spv::OpFUnordLessThanEqual:
2307 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2309 case spv::OpFOrdGreaterThanEqual:
2310 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2312 case spv::OpFUnordGreaterThanEqual:
2313 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2315 case spv::OpShiftRightLogical:
2316 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2318 case spv::OpShiftRightArithmetic:
2319 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2321 case spv::OpShiftLeftLogical:
2322 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2324 case spv::OpBitwiseOr:
2325 case spv::OpLogicalOr:
2326 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2328 case spv::OpBitwiseXor:
2329 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2331 case spv::OpBitwiseAnd:
2332 case spv::OpLogicalAnd:
2333 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2335 case spv::OpSMulExtended:
2336 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2337 // In our flat view then, component i is the i'th component of the first member;
2338 // component i + N is the i'th component of the second member.
2339 dst.move(i, lhs.Int(i) * rhs.Int(i));
2340 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2342 case spv::OpUMulExtended:
2343 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2344 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2347 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2351 return EmitResult::Continue;
2354 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2356 auto routine = state->routine;
2357 auto &type = getType(insn.word(1));
2358 ASSERT(type.sizeInComponents == 1);
2359 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2360 auto &lhsType = getType(getObject(insn.word(3)).type);
2361 auto lhs = GenericValue(this, routine, insn.word(3));
2362 auto rhs = GenericValue(this, routine, insn.word(4));
2364 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2365 return EmitResult::Continue;
2368 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2370 auto routine = state->routine;
2371 auto &type = getType(insn.word(1));
2372 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2373 auto cond = GenericValue(this, routine, insn.word(3));
2374 auto lhs = GenericValue(this, routine, insn.word(4));
2375 auto rhs = GenericValue(this, routine, insn.word(5));
2377 for (auto i = 0u; i < type.sizeInComponents; i++)
2379 dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i))); // FIXME: IfThenElse()
2382 return EmitResult::Continue;
2385 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2387 auto routine = state->routine;
2388 auto &type = getType(insn.word(1));
2389 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2390 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2392 switch (extInstIndex)
2394 case GLSLstd450FAbs:
2396 auto src = GenericValue(this, routine, insn.word(5));
2397 for (auto i = 0u; i < type.sizeInComponents; i++)
2399 dst.move(i, Abs(src.Float(i)));
2403 case GLSLstd450SAbs:
2405 auto src = GenericValue(this, routine, insn.word(5));
2406 for (auto i = 0u; i < type.sizeInComponents; i++)
2408 dst.move(i, Abs(src.Int(i)));
2412 case GLSLstd450Cross:
2414 auto lhs = GenericValue(this, routine, insn.word(5));
2415 auto rhs = GenericValue(this, routine, insn.word(6));
2416 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2417 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2418 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2421 case GLSLstd450Floor:
2423 auto src = GenericValue(this, routine, insn.word(5));
2424 for (auto i = 0u; i < type.sizeInComponents; i++)
2426 dst.move(i, Floor(src.Float(i)));
2430 case GLSLstd450Trunc:
2432 auto src = GenericValue(this, routine, insn.word(5));
2433 for (auto i = 0u; i < type.sizeInComponents; i++)
2435 dst.move(i, Trunc(src.Float(i)));
2439 case GLSLstd450Ceil:
2441 auto src = GenericValue(this, routine, insn.word(5));
2442 for (auto i = 0u; i < type.sizeInComponents; i++)
2444 dst.move(i, Ceil(src.Float(i)));
2448 case GLSLstd450Fract:
2450 auto src = GenericValue(this, routine, insn.word(5));
2451 for (auto i = 0u; i < type.sizeInComponents; i++)
2453 dst.move(i, Frac(src.Float(i)));
2457 case GLSLstd450Round:
2459 auto src = GenericValue(this, routine, insn.word(5));
2460 for (auto i = 0u; i < type.sizeInComponents; i++)
2462 dst.move(i, Round(src.Float(i)));
2466 case GLSLstd450RoundEven:
2468 auto src = GenericValue(this, routine, insn.word(5));
2469 for (auto i = 0u; i < type.sizeInComponents; i++)
2471 auto x = Round(src.Float(i));
2472 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2473 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2474 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2478 case GLSLstd450FMin:
2480 auto lhs = GenericValue(this, routine, insn.word(5));
2481 auto rhs = GenericValue(this, routine, insn.word(6));
2482 for (auto i = 0u; i < type.sizeInComponents; i++)
2484 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2488 case GLSLstd450FMax:
2490 auto lhs = GenericValue(this, routine, insn.word(5));
2491 auto rhs = GenericValue(this, routine, insn.word(6));
2492 for (auto i = 0u; i < type.sizeInComponents; i++)
2494 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2498 case GLSLstd450SMin:
2500 auto lhs = GenericValue(this, routine, insn.word(5));
2501 auto rhs = GenericValue(this, routine, insn.word(6));
2502 for (auto i = 0u; i < type.sizeInComponents; i++)
2504 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2508 case GLSLstd450SMax:
2510 auto lhs = GenericValue(this, routine, insn.word(5));
2511 auto rhs = GenericValue(this, routine, insn.word(6));
2512 for (auto i = 0u; i < type.sizeInComponents; i++)
2514 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2518 case GLSLstd450UMin:
2520 auto lhs = GenericValue(this, routine, insn.word(5));
2521 auto rhs = GenericValue(this, routine, insn.word(6));
2522 for (auto i = 0u; i < type.sizeInComponents; i++)
2524 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2528 case GLSLstd450UMax:
2530 auto lhs = GenericValue(this, routine, insn.word(5));
2531 auto rhs = GenericValue(this, routine, insn.word(6));
2532 for (auto i = 0u; i < type.sizeInComponents; i++)
2534 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2538 case GLSLstd450Step:
2540 auto edge = GenericValue(this, routine, insn.word(5));
2541 auto x = GenericValue(this, routine, insn.word(6));
2542 for (auto i = 0u; i < type.sizeInComponents; i++)
2544 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2548 case GLSLstd450SmoothStep:
2550 auto edge0 = GenericValue(this, routine, insn.word(5));
2551 auto edge1 = GenericValue(this, routine, insn.word(6));
2552 auto x = GenericValue(this, routine, insn.word(7));
2553 for (auto i = 0u; i < type.sizeInComponents; i++)
2555 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2556 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2557 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2561 case GLSLstd450FMix:
2563 auto x = GenericValue(this, routine, insn.word(5));
2564 auto y = GenericValue(this, routine, insn.word(6));
2565 auto a = GenericValue(this, routine, insn.word(7));
2566 for (auto i = 0u; i < type.sizeInComponents; i++)
2568 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2572 case GLSLstd450FClamp:
2574 auto x = GenericValue(this, routine, insn.word(5));
2575 auto minVal = GenericValue(this, routine, insn.word(6));
2576 auto maxVal = GenericValue(this, routine, insn.word(7));
2577 for (auto i = 0u; i < type.sizeInComponents; i++)
2579 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2583 case GLSLstd450SClamp:
2585 auto x = GenericValue(this, routine, insn.word(5));
2586 auto minVal = GenericValue(this, routine, insn.word(6));
2587 auto maxVal = GenericValue(this, routine, insn.word(7));
2588 for (auto i = 0u; i < type.sizeInComponents; i++)
2590 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2594 case GLSLstd450UClamp:
2596 auto x = GenericValue(this, routine, insn.word(5));
2597 auto minVal = GenericValue(this, routine, insn.word(6));
2598 auto maxVal = GenericValue(this, routine, insn.word(7));
2599 for (auto i = 0u; i < type.sizeInComponents; i++)
2601 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2605 case GLSLstd450FSign:
2607 auto src = GenericValue(this, routine, insn.word(5));
2608 for (auto i = 0u; i < type.sizeInComponents; i++)
2610 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2611 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2612 dst.move(i, neg | pos);
2616 case GLSLstd450SSign:
2618 auto src = GenericValue(this, routine, insn.word(5));
2619 for (auto i = 0u; i < type.sizeInComponents; i++)
2621 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2622 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2623 dst.move(i, neg | pos);
2627 case GLSLstd450Reflect:
2629 auto I = GenericValue(this, routine, insn.word(5));
2630 auto N = GenericValue(this, routine, insn.word(6));
2632 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2634 for (auto i = 0u; i < type.sizeInComponents; i++)
2636 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2640 case GLSLstd450Refract:
2642 auto I = GenericValue(this, routine, insn.word(5));
2643 auto N = GenericValue(this, routine, insn.word(6));
2644 auto eta = GenericValue(this, routine, insn.word(7));
2646 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2647 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2648 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2649 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2651 for (auto i = 0u; i < type.sizeInComponents; i++)
2653 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2657 case GLSLstd450FaceForward:
2659 auto N = GenericValue(this, routine, insn.word(5));
2660 auto I = GenericValue(this, routine, insn.word(6));
2661 auto Nref = GenericValue(this, routine, insn.word(7));
2663 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2664 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2666 for (auto i = 0u; i < type.sizeInComponents; i++)
2668 auto n = N.Float(i);
2669 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2673 case GLSLstd450Length:
2675 auto x = GenericValue(this, routine, insn.word(5));
2676 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2678 dst.move(0, Sqrt(d));
2681 case GLSLstd450Normalize:
2683 auto x = GenericValue(this, routine, insn.word(5));
2684 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2685 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2687 for (auto i = 0u; i < type.sizeInComponents; i++)
2689 dst.move(i, invLength * x.Float(i));
2693 case GLSLstd450Distance:
2695 auto p0 = GenericValue(this, routine, insn.word(5));
2696 auto p1 = GenericValue(this, routine, insn.word(6));
2697 auto p0Type = getType(getObject(insn.word(5)).type);
2699 // sqrt(dot(p0-p1, p0-p1))
2700 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2702 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2704 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2707 dst.move(0, Sqrt(d));
2711 UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2714 return EmitResult::Continue;
2717 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2719 switch(memorySemantics)
2721 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
2722 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
2723 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
2724 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
2725 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2727 UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2728 return std::memory_order_acq_rel;
2732 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2734 SIMD::Float d = x.Float(0) * y.Float(0);
2736 for (auto i = 1u; i < numComponents; i++)
2738 d += x.Float(i) * y.Float(i);
2744 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2746 auto routine = state->routine;
2747 auto &type = getType(insn.word(1));
2748 ASSERT(type.sizeInComponents == 1);
2749 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2750 auto &srcType = getType(getObject(insn.word(3)).type);
2751 auto src = GenericValue(this, routine, insn.word(3));
2753 SIMD::UInt result = src.UInt(0);
2755 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2757 result |= src.UInt(i);
2760 dst.move(0, result);
2761 return EmitResult::Continue;
2764 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2766 auto routine = state->routine;
2767 auto &type = getType(insn.word(1));
2768 ASSERT(type.sizeInComponents == 1);
2769 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2770 auto &srcType = getType(getObject(insn.word(3)).type);
2771 auto src = GenericValue(this, routine, insn.word(3));
2773 SIMD::UInt result = src.UInt(0);
2775 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2777 result &= src.UInt(i);
2780 dst.move(0, result);
2781 return EmitResult::Continue;
2784 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2786 auto target = Block::ID(insn.word(1));
2787 auto edge = Block::Edge{state->currentBlock, target};
2788 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2789 return EmitResult::Terminator;
2792 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2794 auto block = getBlock(state->currentBlock);
2795 ASSERT(block.branchInstruction == insn);
2797 auto condId = Object::ID(block.branchInstruction.word(1));
2798 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2799 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2801 auto cond = GenericValue(this, state->routine, condId);
2802 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2804 // TODO: Optimize for case where all lanes take same path.
2806 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2807 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2809 return EmitResult::Terminator;
2812 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2814 auto block = getBlock(state->currentBlock);
2815 ASSERT(block.branchInstruction == insn);
2817 auto selId = Object::ID(block.branchInstruction.word(1));
2819 auto sel = GenericValue(this, state->routine, selId);
2820 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2822 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2824 // TODO: Optimize for case where all lanes take same path.
2826 SIMD::Int defaultLaneMask = state->activeLaneMask();
2828 // Gather up the case label matches and calculate defaultLaneMask.
2829 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2830 caseLabelMatches.reserve(numCases);
2831 for (uint32_t i = 0; i < numCases; i++)
2833 auto label = block.branchInstruction.word(i * 2 + 3);
2834 auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2835 auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2836 state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2837 defaultLaneMask &= ~caseLabelMatch;
2840 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2841 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2843 return EmitResult::Terminator;
2846 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2848 // TODO: Log something in this case?
2849 state->setActiveLaneMask(SIMD::Int(0));
2850 return EmitResult::Terminator;
2853 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2855 state->setActiveLaneMask(SIMD::Int(0));
2856 return EmitResult::Terminator;
2859 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2861 auto routine = state->routine;
2862 auto typeId = Type::ID(insn.word(1));
2863 auto type = getType(typeId);
2864 auto objectId = Object::ID(insn.word(2));
2866 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2869 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2871 auto varId = Object::ID(insn.word(w + 0));
2872 auto blockId = Block::ID(insn.word(w + 1));
2874 auto in = GenericValue(this, routine, varId);
2875 auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2877 for (uint32_t i = 0; i < type.sizeInComponents; i++)
2879 auto inMasked = in.Int(i) & mask;
2880 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2885 return EmitResult::Continue;
2888 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2890 for (auto insn : *this)
2892 switch (insn.opcode())
2894 case spv::OpVariable:
2896 Object::ID resultId = insn.word(2);
2897 auto &object = getObject(resultId);
2898 auto &objectTy = getType(object.type);
2899 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2901 auto &dst = routine->getValue(resultId);
2903 VisitInterface(resultId,
2904 [&](Decorations const &d, AttribType type) {
2905 auto scalarSlot = d.Location << 2 | d.Component;
2906 routine->outputs[scalarSlot] = dst[offset++];
2917 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
2919 // Default to a Simple, this may change later.
2920 kind = Block::Simple;
2922 // Walk the instructions to find the last two of the block.
2923 InsnIterator insns[2];
2924 for (auto insn : *this)
2926 insns[0] = insns[1];
2930 switch (insns[1].opcode())
2933 branchInstruction = insns[1];
2934 outs.emplace(Block::ID(branchInstruction.word(1)));
2936 switch (insns[0].opcode())
2938 case spv::OpLoopMerge:
2940 mergeInstruction = insns[0];
2941 mergeBlock = Block::ID(mergeInstruction.word(1));
2942 continueTarget = Block::ID(mergeInstruction.word(2));
2946 kind = Block::Simple;
2951 case spv::OpBranchConditional:
2952 branchInstruction = insns[1];
2953 outs.emplace(Block::ID(branchInstruction.word(2)));
2954 outs.emplace(Block::ID(branchInstruction.word(3)));
2956 switch (insns[0].opcode())
2958 case spv::OpSelectionMerge:
2959 kind = StructuredBranchConditional;
2960 mergeInstruction = insns[0];
2961 mergeBlock = Block::ID(mergeInstruction.word(1));
2964 case spv::OpLoopMerge:
2966 mergeInstruction = insns[0];
2967 mergeBlock = Block::ID(mergeInstruction.word(1));
2968 continueTarget = Block::ID(mergeInstruction.word(2));
2972 kind = UnstructuredBranchConditional;
2978 branchInstruction = insns[1];
2979 outs.emplace(Block::ID(branchInstruction.word(2)));
2980 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
2982 outs.emplace(Block::ID(branchInstruction.word(w)));
2985 switch (insns[0].opcode())
2987 case spv::OpSelectionMerge:
2988 kind = StructuredSwitch;
2989 mergeInstruction = insns[0];
2990 mergeBlock = Block::ID(mergeInstruction.word(1));
2994 kind = UnstructuredSwitch;
3004 bool SpirvShader::existsPath(Block::ID from, Block::ID to) const
3006 // TODO: Optimize: This can be cached on the block.
3009 std::queue<Block::ID> pending;
3010 pending.emplace(from);
3012 while (pending.size() > 0)
3014 auto id = pending.front();
3016 for (auto out : getBlock(id).outs)
3018 if (seen.count(out) != 0) { continue; }
3019 if (out == to) { return true; }
3020 pending.emplace(out);
3028 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
3030 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
3033 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
3035 auto edge = Block::Edge{from, to};
3036 auto it = edgeActiveLaneMasks.find(edge);
3037 if (it == edgeActiveLaneMasks.end())
3039 edgeActiveLaneMasks.emplace(edge, mask);
3043 auto combined = it->second | mask;
3044 edgeActiveLaneMasks.erase(edge);
3045 edgeActiveLaneMasks.emplace(edge, combined);
3049 RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
3051 auto edge = Block::Edge{from, to};
3052 auto it = edgeActiveLaneMasks.find(edge);
3053 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
3057 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
3058 pipelineLayout(pipelineLayout)