1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
27 #undef Bool // b/127920555
32 rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
34 return rr::SignMask(ints) != 0;
37 rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
39 return rr::SignMask(~ints) != 0;
45 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
47 SpirvShader::SpirvShader(InsnStore const &insns)
48 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
49 outputs{MAX_INTERFACE_COMPONENTS},
50 serialID{serialCounter++}, modes{}
52 ASSERT(insns.size() > 0);
54 // Simplifying assumptions (to be satisfied by earlier transformations)
55 // - There is exactly one entrypoint in the module, and it's the one we want
56 // - The only input/output OpVariables present are those used by the entrypoint
58 Block::ID currentBlock;
59 InsnIterator blockStart;
61 for (auto insn : *this)
63 switch (insn.opcode())
65 case spv::OpExecutionMode:
66 ProcessExecutionMode(insn);
71 TypeOrObjectID targetId = insn.word(1);
72 auto decoration = static_cast<spv::Decoration>(insn.word(2));
73 decorations[targetId].Apply(
75 insn.wordCount() > 3 ? insn.word(3) : 0);
77 if (decoration == spv::DecorationCentroid)
78 modes.NeedsCentroid = true;
82 case spv::OpMemberDecorate:
84 Type::ID targetId = insn.word(1);
85 auto memberIndex = insn.word(2);
86 auto &d = memberDecorations[targetId];
87 if (memberIndex >= d.size())
88 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
89 auto decoration = static_cast<spv::Decoration>(insn.word(3));
92 insn.wordCount() > 4 ? insn.word(4) : 0);
94 if (decoration == spv::DecorationCentroid)
95 modes.NeedsCentroid = true;
99 case spv::OpDecorationGroup:
100 // Nothing to do here. We don't need to record the definition of the group; we'll just have
101 // the bundle of decorations float around. If we were to ever walk the decorations directly,
102 // we might think about introducing this as a real Object.
105 case spv::OpGroupDecorate:
107 auto const &srcDecorations = decorations[insn.word(1)];
108 for (auto i = 2u; i < insn.wordCount(); i++)
110 // remaining operands are targets to apply the group to.
111 decorations[insn.word(i)].Apply(srcDecorations);
116 case spv::OpGroupMemberDecorate:
118 auto const &srcDecorations = decorations[insn.word(1)];
119 for (auto i = 2u; i < insn.wordCount(); i += 2)
121 // remaining operands are pairs of <id>, literal for members to apply to.
122 auto &d = memberDecorations[insn.word(i)];
123 auto memberIndex = insn.word(i + 1);
124 if (memberIndex >= d.size())
125 d.resize(memberIndex + 1); // on demand resize, see above...
126 d[memberIndex].Apply(srcDecorations);
133 ASSERT(currentBlock.value() == 0);
134 currentBlock = Block::ID(insn.word(1));
139 // Branch Instructions (subset of Termination Instructions):
141 case spv::OpBranchConditional:
146 // Termination instruction:
148 case spv::OpUnreachable:
150 ASSERT(currentBlock.value() != 0);
151 auto blockEnd = insn; blockEnd++;
152 blocks[currentBlock] = Block(blockStart, blockEnd);
153 currentBlock = Block::ID(0);
155 if (insn.opcode() == spv::OpKill)
157 modes.ContainsKill = true;
162 case spv::OpLoopMerge:
163 case spv::OpSelectionMerge:
164 break; // Nothing to do in analysis pass.
166 case spv::OpTypeVoid:
167 case spv::OpTypeBool:
169 case spv::OpTypeFloat:
170 case spv::OpTypeVector:
171 case spv::OpTypeMatrix:
172 case spv::OpTypeImage:
173 case spv::OpTypeSampler:
174 case spv::OpTypeSampledImage:
175 case spv::OpTypeArray:
176 case spv::OpTypeRuntimeArray:
177 case spv::OpTypeStruct:
178 case spv::OpTypePointer:
179 case spv::OpTypeFunction:
183 case spv::OpVariable:
185 Type::ID typeId = insn.word(1);
186 Object::ID resultId = insn.word(2);
187 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
188 if (insn.wordCount() > 4)
189 UNIMPLEMENTED("Variable initializers not yet supported");
191 auto &object = defs[resultId];
192 object.kind = Object::Kind::Variable;
193 object.definition = insn;
194 object.type = typeId;
195 object.pointerBase = insn.word(2); // base is itself
197 ASSERT(getType(typeId).storageClass == storageClass);
199 switch (storageClass)
201 case spv::StorageClassInput:
202 case spv::StorageClassOutput:
203 ProcessInterfaceVariable(object);
205 case spv::StorageClassUniform:
206 case spv::StorageClassStorageBuffer:
207 case spv::StorageClassPushConstant:
208 object.kind = Object::Kind::PhysicalPointer;
211 case spv::StorageClassPrivate:
212 case spv::StorageClassFunction:
213 break; // Correctly handled.
215 case spv::StorageClassUniformConstant:
216 case spv::StorageClassWorkgroup:
217 case spv::StorageClassCrossWorkgroup:
218 case spv::StorageClassGeneric:
219 case spv::StorageClassAtomicCounter:
220 case spv::StorageClassImage:
221 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
225 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
231 case spv::OpConstant:
232 CreateConstant(insn).constantValue[0] = insn.word(3);
234 case spv::OpConstantFalse:
235 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
237 case spv::OpConstantTrue:
238 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
240 case spv::OpConstantNull:
243 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
244 // OpConstantNull forms a constant of arbitrary type, all zeros.
245 auto &object = CreateConstant(insn);
246 auto &objectTy = getType(object.type);
247 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
249 object.constantValue[i] = 0;
253 case spv::OpConstantComposite:
255 auto &object = CreateConstant(insn);
257 for (auto i = 0u; i < insn.wordCount() - 3; i++)
259 auto &constituent = getObject(insn.word(i + 3));
260 auto &constituentTy = getType(constituent.type);
261 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
262 object.constantValue[offset++] = constituent.constantValue[j];
265 auto objectId = Object::ID(insn.word(2));
266 auto decorationsIt = decorations.find(objectId);
267 if (decorationsIt != decorations.end() &&
268 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
270 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
271 // Decorating an object with the WorkgroupSize built-in
272 // decoration will make that object contain the dimensions
273 // of a local workgroup. If an object is decorated with the
274 // WorkgroupSize decoration, this must take precedence over
275 // any execution mode set for LocalSize.
276 // The object decorated with WorkgroupSize must be declared
277 // as a three-component vector of 32-bit integers.
278 ASSERT(getType(object.type).sizeInComponents == 3);
279 modes.WorkgroupSizeX = object.constantValue[0];
280 modes.WorkgroupSizeY = object.constantValue[1];
281 modes.WorkgroupSizeZ = object.constantValue[2];
286 case spv::OpCapability:
287 break; // Various capabilities will be declared, but none affect our code generation at this point.
288 case spv::OpMemoryModel:
289 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
291 case spv::OpEntryPoint:
293 case spv::OpFunction:
294 ASSERT(mainBlockId.value() == 0); // Multiple functions found
295 // Scan forward to find the function's label.
296 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
300 case spv::OpFunction:
301 case spv::OpFunctionParameter:
304 mainBlockId = Block::ID(it.word(1));
307 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
310 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
312 case spv::OpFunctionEnd:
313 // Due to preprocessing, the entrypoint and its function provide no value.
315 case spv::OpExtInstImport:
316 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
317 // Valid shaders will not attempt to import any other instruction sets.
318 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
320 UNIMPLEMENTED("Only GLSL extended instruction set is supported");
324 case spv::OpMemberName:
326 case spv::OpSourceContinued:
327 case spv::OpSourceExtension:
330 case spv::OpModuleProcessed:
332 // No semantic impact
335 case spv::OpFunctionParameter:
336 case spv::OpFunctionCall:
337 case spv::OpSpecConstant:
338 case spv::OpSpecConstantComposite:
339 case spv::OpSpecConstantFalse:
340 case spv::OpSpecConstantOp:
341 case spv::OpSpecConstantTrue:
342 // These should have all been removed by preprocessing passes. If we see them here,
343 // our assumptions are wrong and we will probably generate wrong code.
344 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
347 case spv::OpFConvert:
348 case spv::OpSConvert:
349 case spv::OpUConvert:
350 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
354 case spv::OpAccessChain:
355 case spv::OpInBoundsAccessChain:
356 case spv::OpCompositeConstruct:
357 case spv::OpCompositeInsert:
358 case spv::OpCompositeExtract:
359 case spv::OpVectorShuffle:
360 case spv::OpVectorTimesScalar:
361 case spv::OpMatrixTimesScalar:
362 case spv::OpMatrixTimesVector:
363 case spv::OpVectorTimesMatrix:
364 case spv::OpVectorExtractDynamic:
365 case spv::OpVectorInsertDynamic:
366 case spv::OpNot: // Unary ops
369 case spv::OpLogicalNot:
370 case spv::OpIAdd: // Binary ops
381 case spv::OpFOrdEqual:
382 case spv::OpFUnordEqual:
383 case spv::OpFOrdNotEqual:
384 case spv::OpFUnordNotEqual:
385 case spv::OpFOrdLessThan:
386 case spv::OpFUnordLessThan:
387 case spv::OpFOrdGreaterThan:
388 case spv::OpFUnordGreaterThan:
389 case spv::OpFOrdLessThanEqual:
390 case spv::OpFUnordLessThanEqual:
391 case spv::OpFOrdGreaterThanEqual:
392 case spv::OpFUnordGreaterThanEqual:
397 case spv::OpINotEqual:
398 case spv::OpUGreaterThan:
399 case spv::OpSGreaterThan:
400 case spv::OpUGreaterThanEqual:
401 case spv::OpSGreaterThanEqual:
402 case spv::OpULessThan:
403 case spv::OpSLessThan:
404 case spv::OpULessThanEqual:
405 case spv::OpSLessThanEqual:
406 case spv::OpShiftRightLogical:
407 case spv::OpShiftRightArithmetic:
408 case spv::OpShiftLeftLogical:
409 case spv::OpBitwiseOr:
410 case spv::OpBitwiseXor:
411 case spv::OpBitwiseAnd:
412 case spv::OpLogicalOr:
413 case spv::OpLogicalAnd:
414 case spv::OpLogicalEqual:
415 case spv::OpLogicalNotEqual:
416 case spv::OpUMulExtended:
417 case spv::OpSMulExtended:
419 case spv::OpConvertFToU:
420 case spv::OpConvertFToS:
421 case spv::OpConvertSToF:
422 case spv::OpConvertUToF:
431 case spv::OpDPdxCoarse:
433 case spv::OpDPdyCoarse:
435 case spv::OpFwidthCoarse:
436 case spv::OpDPdxFine:
437 case spv::OpDPdyFine:
438 case spv::OpFwidthFine:
439 case spv::OpAtomicLoad:
441 // Instructions that yield an intermediate value
443 Type::ID typeId = insn.word(1);
444 Object::ID resultId = insn.word(2);
445 auto &object = defs[resultId];
446 object.type = typeId;
447 object.kind = Object::Kind::Value;
448 object.definition = insn;
450 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
452 // interior ptr has two parts:
453 // - logical base ptr, common across all lanes and known at compile time
455 Object::ID baseId = insn.word(3);
456 object.pointerBase = getObject(baseId).pointerBase;
462 case spv::OpAtomicStore:
463 // Don't need to do anything during analysis pass
467 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
471 // Assign all Block::ins
472 for (auto &it : blocks)
474 auto &blockId = it.first;
475 auto &block = it.second;
476 for (auto &outId : block.outs)
478 auto outIt = blocks.find(outId);
479 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
480 auto &out = outIt->second;
481 out.ins.emplace(blockId);
486 void SpirvShader::DeclareType(InsnIterator insn)
488 Type::ID resultId = insn.word(1);
490 auto &type = types[resultId];
491 type.definition = insn;
492 type.sizeInComponents = ComputeTypeSize(insn);
494 // A structure is a builtin block if it has a builtin
495 // member. All members of such a structure are builtins.
496 switch (insn.opcode())
498 case spv::OpTypeStruct:
500 auto d = memberDecorations.find(resultId);
501 if (d != memberDecorations.end())
503 for (auto &m : d->second)
507 type.isBuiltInBlock = true;
514 case spv::OpTypePointer:
516 Type::ID elementTypeId = insn.word(3);
517 type.element = elementTypeId;
518 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
519 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
522 case spv::OpTypeVector:
523 case spv::OpTypeMatrix:
524 case spv::OpTypeArray:
525 case spv::OpTypeRuntimeArray:
527 Type::ID elementTypeId = insn.word(2);
528 type.element = elementTypeId;
536 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
538 Type::ID typeId = insn.word(1);
539 Object::ID resultId = insn.word(2);
540 auto &object = defs[resultId];
541 auto &objectTy = getType(typeId);
542 object.type = typeId;
543 object.kind = Object::Kind::Constant;
544 object.definition = insn;
545 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
549 void SpirvShader::ProcessInterfaceVariable(Object &object)
551 auto &objectTy = getType(object.type);
552 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
554 ASSERT(objectTy.opcode() == spv::OpTypePointer);
555 auto pointeeTy = getType(objectTy.element);
557 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
558 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
560 ASSERT(object.opcode() == spv::OpVariable);
561 Object::ID resultId = object.definition.word(2);
563 if (objectTy.isBuiltInBlock)
565 // walk the builtin block, registering each of its members separately.
566 auto m = memberDecorations.find(objectTy.element);
567 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
568 auto &structType = pointeeTy.definition;
571 for (auto &member : m->second)
573 auto &memberType = getType(structType.word(word));
575 if (member.HasBuiltIn)
577 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
580 offset += memberType.sizeInComponents;
586 auto d = decorations.find(resultId);
587 if (d != decorations.end() && d->second.HasBuiltIn)
589 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
593 object.kind = Object::Kind::InterfaceVariable;
594 VisitInterface(resultId,
595 [&userDefinedInterface](Decorations const &d, AttribType type) {
596 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
597 auto scalarSlot = (d.Location << 2) | d.Component;
598 ASSERT(scalarSlot >= 0 &&
599 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
601 auto &slot = userDefinedInterface[scalarSlot];
604 slot.NoPerspective = d.NoPerspective;
605 slot.Centroid = d.Centroid;
610 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
612 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
615 case spv::ExecutionModeEarlyFragmentTests:
616 modes.EarlyFragmentTests = true;
618 case spv::ExecutionModeDepthReplacing:
619 modes.DepthReplacing = true;
621 case spv::ExecutionModeDepthGreater:
622 modes.DepthGreater = true;
624 case spv::ExecutionModeDepthLess:
625 modes.DepthLess = true;
627 case spv::ExecutionModeDepthUnchanged:
628 modes.DepthUnchanged = true;
630 case spv::ExecutionModeLocalSize:
631 modes.WorkgroupSizeX = insn.word(3);
632 modes.WorkgroupSizeY = insn.word(4);
633 modes.WorkgroupSizeZ = insn.word(5);
635 case spv::ExecutionModeOriginUpperLeft:
636 // This is always the case for a Vulkan shader. Do nothing.
639 UNIMPLEMENTED("No other execution modes are permitted");
643 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
645 // Types are always built from the bottom up (with the exception of forward ptrs, which
646 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
647 // already been described (and so their sizes determined)
648 switch (insn.opcode())
650 case spv::OpTypeVoid:
651 case spv::OpTypeSampler:
652 case spv::OpTypeImage:
653 case spv::OpTypeSampledImage:
654 case spv::OpTypeFunction:
655 case spv::OpTypeRuntimeArray:
656 // Objects that don't consume any space.
657 // Descriptor-backed objects currently only need exist at compile-time.
658 // Runtime arrays don't appear in places where their size would be interesting
661 case spv::OpTypeBool:
662 case spv::OpTypeFloat:
664 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
665 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
668 case spv::OpTypeVector:
669 case spv::OpTypeMatrix:
670 // Vectors and matrices both consume element count * element size.
671 return getType(insn.word(2)).sizeInComponents * insn.word(3);
673 case spv::OpTypeArray:
675 // Element count * element size. Array sizes come from constant ids.
676 auto arraySize = GetConstantInt(insn.word(3));
677 return getType(insn.word(2)).sizeInComponents * arraySize;
680 case spv::OpTypeStruct:
683 for (uint32_t i = 2u; i < insn.wordCount(); i++)
685 size += getType(insn.word(i)).sizeInComponents;
690 case spv::OpTypePointer:
691 // Runtime representation of a pointer is a per-lane index.
692 // Note: clients are expected to look through the pointer if they want the pointee size instead.
696 // Some other random insn.
697 UNIMPLEMENTED("Only types are supported");
702 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
704 switch (storageClass)
706 case spv::StorageClassUniform:
707 case spv::StorageClassStorageBuffer:
708 case spv::StorageClassPushConstant:
716 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
718 // Recursively walks variable definition and its type tree, taking into account
719 // any explicit Location or Component decorations encountered; where explicit
720 // Locations or Components are not specified, assigns them sequentially.
721 // Collected decorations are carried down toward the leaves and across
722 // siblings; Effect of decorations intentionally does not flow back up the tree.
724 // F is a functor to be called with the effective decoration set for every component.
726 // Returns the next available location, and calls f().
728 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
730 ApplyDecorationsForId(&d, id);
732 auto const &obj = getType(id);
735 case spv::OpTypePointer:
736 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
737 case spv::OpTypeMatrix:
738 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
740 // consumes same components of N consecutive locations
741 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
744 case spv::OpTypeVector:
745 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
747 // consumes N consecutive components in the same location
748 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
750 return d.Location + 1;
751 case spv::OpTypeFloat:
752 f(d, ATTRIBTYPE_FLOAT);
753 return d.Location + 1;
755 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
756 return d.Location + 1;
757 case spv::OpTypeBool:
758 f(d, ATTRIBTYPE_UINT);
759 return d.Location + 1;
760 case spv::OpTypeStruct:
762 // iterate over members, which may themselves have Location/Component decorations
763 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
765 ApplyDecorationsForIdMember(&d, id, i);
766 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
767 d.Component = 0; // Implicit locations always have component=0
771 case spv::OpTypeArray:
773 auto arraySize = GetConstantInt(obj.definition.word(3));
774 for (auto i = 0u; i < arraySize; i++)
776 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
781 // Intentionally partial; most opcodes do not participate in type hierarchies
787 void SpirvShader::VisitInterface(Object::ID id, F f) const
789 // Walk a variable definition and call f for each component in it.
791 ApplyDecorationsForId(&d, id);
793 auto def = getObject(id).definition;
794 ASSERT(def.opcode() == spv::OpVariable);
795 VisitInterfaceInner<F>(def.word(1), d, f);
798 SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
800 // Produce a offset into external memory in sizeof(float) units
802 int constantOffset = 0;
803 SIMD::Int dynamicOffset = SIMD::Int(0);
804 auto &baseObject = getObject(id);
805 Type::ID typeId = getType(baseObject.type).element;
807 ApplyDecorationsForId(&d, baseObject.type);
809 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
810 // Start with its offset and build from there.
811 if (baseObject.kind == Object::Kind::Value)
813 dynamicOffset += routine->getIntermediate(id).Int(0);
816 for (auto i = 0u; i < numIndexes; i++)
818 auto & type = getType(typeId);
819 switch (type.definition.opcode())
821 case spv::OpTypeStruct:
823 int memberIndex = GetConstantInt(indexIds[i]);
824 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
826 constantOffset += d.Offset / sizeof(float);
827 typeId = type.definition.word(2u + memberIndex);
830 case spv::OpTypeArray:
831 case spv::OpTypeRuntimeArray:
833 // TODO: b/127950082: Check bounds.
834 ApplyDecorationsForId(&d, typeId);
835 ASSERT(d.HasArrayStride);
836 auto & obj = getObject(indexIds[i]);
837 if (obj.kind == Object::Kind::Constant)
838 constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
840 dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
841 typeId = type.element;
844 case spv::OpTypeMatrix:
846 // TODO: b/127950082: Check bounds.
847 ApplyDecorationsForId(&d, typeId);
848 ASSERT(d.HasMatrixStride);
849 auto & obj = getObject(indexIds[i]);
850 if (obj.kind == Object::Kind::Constant)
851 constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
853 dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
854 typeId = type.element;
857 case spv::OpTypeVector:
859 auto & obj = getObject(indexIds[i]);
860 if (obj.kind == Object::Kind::Constant)
861 constantOffset += GetConstantInt(indexIds[i]);
863 dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
864 typeId = type.element;
868 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
872 return dynamicOffset + SIMD::Int(constantOffset);
875 SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
877 // TODO: avoid doing per-lane work in some cases if we can?
878 // Produce a *component* offset into location-oriented memory
880 int constantOffset = 0;
881 SIMD::Int dynamicOffset = SIMD::Int(0);
882 auto &baseObject = getObject(id);
883 Type::ID typeId = getType(baseObject.type).element;
885 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
886 // Start with its offset and build from there.
887 if (baseObject.kind == Object::Kind::Value)
889 dynamicOffset += routine->getIntermediate(id).Int(0);
892 for (auto i = 0u; i < numIndexes; i++)
894 auto & type = getType(typeId);
895 switch(type.opcode())
897 case spv::OpTypeStruct:
899 int memberIndex = GetConstantInt(indexIds[i]);
900 int offsetIntoStruct = 0;
901 for (auto j = 0; j < memberIndex; j++) {
902 auto memberType = type.definition.word(2u + j);
903 offsetIntoStruct += getType(memberType).sizeInComponents;
905 constantOffset += offsetIntoStruct;
906 typeId = type.definition.word(2u + memberIndex);
910 case spv::OpTypeVector:
911 case spv::OpTypeMatrix:
912 case spv::OpTypeArray:
913 case spv::OpTypeRuntimeArray:
915 // TODO: b/127950082: Check bounds.
916 auto stride = getType(type.element).sizeInComponents;
917 auto & obj = getObject(indexIds[i]);
918 if (obj.kind == Object::Kind::Constant)
919 constantOffset += stride * GetConstantInt(indexIds[i]);
921 dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
922 typeId = type.element;
927 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
931 return dynamicOffset + SIMD::Int(constantOffset);
934 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
936 uint32_t constantOffset = 0;
938 for (auto i = 0u; i < numIndexes; i++)
940 auto & type = getType(typeId);
941 switch(type.opcode())
943 case spv::OpTypeStruct:
945 int memberIndex = indexes[i];
946 int offsetIntoStruct = 0;
947 for (auto j = 0; j < memberIndex; j++) {
948 auto memberType = type.definition.word(2u + j);
949 offsetIntoStruct += getType(memberType).sizeInComponents;
951 constantOffset += offsetIntoStruct;
952 typeId = type.definition.word(2u + memberIndex);
956 case spv::OpTypeVector:
957 case spv::OpTypeMatrix:
958 case spv::OpTypeArray:
960 auto elementType = type.definition.word(2);
961 auto stride = getType(elementType).sizeInComponents;
962 constantOffset += stride * indexes[i];
963 typeId = elementType;
968 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
972 return constantOffset;
975 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
979 case spv::DecorationLocation:
981 Location = static_cast<int32_t>(arg);
983 case spv::DecorationComponent:
987 case spv::DecorationDescriptorSet:
988 HasDescriptorSet = true;
991 case spv::DecorationBinding:
995 case spv::DecorationBuiltIn:
997 BuiltIn = static_cast<spv::BuiltIn>(arg);
999 case spv::DecorationFlat:
1002 case spv::DecorationNoPerspective:
1003 NoPerspective = true;
1005 case spv::DecorationCentroid:
1008 case spv::DecorationBlock:
1011 case spv::DecorationBufferBlock:
1014 case spv::DecorationOffset:
1016 Offset = static_cast<int32_t>(arg);
1018 case spv::DecorationArrayStride:
1019 HasArrayStride = true;
1020 ArrayStride = static_cast<int32_t>(arg);
1022 case spv::DecorationMatrixStride:
1023 HasMatrixStride = true;
1024 MatrixStride = static_cast<int32_t>(arg);
1027 // Intentionally partial, there are many decorations we just don't care about.
1032 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1034 // Apply a decoration group to this set of decorations
1038 BuiltIn = src.BuiltIn;
1041 if (src.HasLocation)
1044 Location = src.Location;
1047 if (src.HasComponent)
1049 HasComponent = true;
1050 Component = src.Component;
1053 if (src.HasDescriptorSet)
1055 HasDescriptorSet = true;
1056 DescriptorSet = src.DescriptorSet;
1062 Binding = src.Binding;
1068 Offset = src.Offset;
1071 if (src.HasArrayStride)
1073 HasArrayStride = true;
1074 ArrayStride = src.ArrayStride;
1077 if (src.HasMatrixStride)
1079 HasMatrixStride = true;
1080 MatrixStride = src.MatrixStride;
1084 NoPerspective |= src.NoPerspective;
1085 Centroid |= src.Centroid;
1087 BufferBlock |= src.BufferBlock;
1090 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1092 auto it = decorations.find(id);
1093 if (it != decorations.end())
1094 d->Apply(it->second);
1097 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1099 auto it = memberDecorations.find(id);
1100 if (it != memberDecorations.end() && member < it->second.size())
1102 d->Apply(it->second[member]);
1106 uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1108 // Slightly hackish access to constants very early in translation.
1109 // General consumption of constants by other instructions should
1110 // probably be just lowered to Reactor.
1112 // TODO: not encountered yet since we only use this for array sizes etc,
1113 // but is possible to construct integer constant 0 via OpConstantNull.
1114 auto insn = getObject(id).definition;
1115 ASSERT(insn.opcode() == spv::OpConstant);
1116 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1117 return insn.word(3);
1122 void SpirvShader::emitProlog(SpirvRoutine *routine) const
1124 for (auto insn : *this)
1126 switch (insn.opcode())
1128 case spv::OpVariable:
1130 Type::ID resultPointerTypeId = insn.word(1);
1131 auto resultPointerType = getType(resultPointerTypeId);
1132 auto pointeeType = getType(resultPointerType.element);
1134 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
1136 Object::ID resultId = insn.word(2);
1137 routine->createLvalue(resultId, pointeeType.sizeInComponents);
1142 // Nothing else produces interface variables, so can all be safely ignored.
1148 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1151 state.setActiveLaneMask(activeLaneMask);
1152 state.routine = routine;
1154 // Emit everything up to the first label
1155 // TODO: Separate out dispatch of block from non-block instructions?
1156 for (auto insn : *this)
1158 if (insn.opcode() == spv::OpLabel)
1162 EmitInstruction(insn, &state);
1165 // Emit all the blocks in BFS order, starting with the main block.
1166 std::queue<Block::ID> pending;
1167 pending.push(mainBlockId);
1168 while (pending.size() > 0)
1170 auto id = pending.front();
1172 if (state.visited.count(id) == 0)
1174 EmitBlock(id, &state);
1175 for (auto it : getBlock(id).outs)
1183 void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1185 if (state->visited.count(id) > 0)
1187 return; // Already processed this block.
1190 state->visited.emplace(id);
1192 auto &block = getBlock(id);
1197 case Block::StructuredBranchConditional:
1198 case Block::UnstructuredBranchConditional:
1199 case Block::StructuredSwitch:
1200 case Block::UnstructuredSwitch:
1201 if (id != mainBlockId)
1203 // Emit all preceding blocks and set the activeLaneMask.
1204 Intermediate activeLaneMask(1);
1205 activeLaneMask.move(0, SIMD::Int(0));
1206 for (auto in : block.ins)
1208 EmitBlock(in, state);
1209 auto inMask = state->getActiveLaneMaskEdge(in, id);
1210 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1212 state->setActiveLaneMask(activeLaneMask.Int(0));
1214 state->currentBlock = id;
1215 EmitInstructions(block.begin(), block.end(), state);
1219 state->currentBlock = id;
1224 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
1228 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1230 for (auto insn = begin; insn != end; insn++)
1232 auto res = EmitInstruction(insn, state);
1235 case EmitResult::Continue:
1237 case EmitResult::Terminator:
1240 UNREACHABLE("Unexpected EmitResult %d", int(res));
1246 void SpirvShader::EmitLoop(EmitState *state) const
1248 auto blockId = state->currentBlock;
1249 auto block = getBlock(blockId);
1251 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
1252 // This is initialized with the incoming active lane masks.
1253 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
1254 for (auto in : block.ins)
1256 if (!existsPath(blockId, in)) // if not a loop back edge
1258 EmitBlock(in, state);
1259 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1263 // Generate an alloca for each of the loop's phis.
1264 // These will be primed with the incoming, non back edge Phi values
1265 // before the loop, and then updated just before the loop jumps back to
1269 Object::ID phiId; // The Phi identifier.
1270 Object::ID continueValue; // The source merge value from the loop.
1271 Array<SIMD::Int> storage; // The alloca.
1274 std::vector<LoopPhi> phis;
1276 // For each OpPhi between the block start and the merge instruction:
1277 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
1279 if (insn.opcode() == spv::OpPhi)
1281 auto objectId = Object::ID(insn.word(2));
1282 auto &object = getObject(objectId);
1283 auto &type = getType(object.type);
1286 phi.phiId = Object::ID(insn.word(2));
1287 phi.storage = Array<SIMD::Int>(type.sizeInComponents);
1289 // Start with the Phi set to 0.
1290 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1292 phi.storage[i] = SIMD::Int(0);
1295 // For each Phi source:
1296 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
1298 auto varId = Object::ID(insn.word(w + 0));
1299 auto blockId = Block::ID(insn.word(w + 1));
1300 if (existsPath(state->currentBlock, blockId))
1302 // This source is from a loop back-edge.
1303 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
1304 phi.continueValue = varId;
1308 // This source is from a preceding block.
1309 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1311 auto in = GenericValue(this, state->routine, varId);
1312 auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
1313 phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
1318 phis.push_back(phi);
1322 // Create the loop basic blocks
1323 auto headerBasicBlock = Nucleus::createBasicBlock();
1324 auto mergeBasicBlock = Nucleus::createBasicBlock();
1326 // Start emitting code inside the loop.
1327 Nucleus::createBr(headerBasicBlock);
1328 Nucleus::setInsertBlock(headerBasicBlock);
1330 // Load the Phi values from storage.
1331 // This will load at the start of each loop.
1332 for (auto &phi : phis)
1334 auto &type = getType(getObject(phi.phiId).type);
1335 auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
1336 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1338 dst.move(i, phi.storage[i]);
1342 // Load the active lane mask.
1343 state->setActiveLaneMask(loopActiveLaneMask);
1345 // Emit all the non-phi instructions in this loop header block.
1346 for (auto insn = block.begin(); insn != block.end(); insn++)
1348 if (insn.opcode() != spv::OpPhi)
1350 EmitInstruction(insn, state);
1354 // Emit all the back-edge blocks and use their active lane masks to
1355 // rebuild the loopActiveLaneMask.
1356 loopActiveLaneMask = SIMD::Int(0);
1357 for (auto in : block.ins)
1359 if (existsPath(blockId, in))
1361 EmitBlock(in, state);
1362 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1366 // Update loop phi values
1367 for (auto &phi : phis)
1369 if (phi.continueValue != 0)
1371 auto val = GenericValue(this, state->routine, phi.continueValue);
1372 auto &type = getType(getObject(phi.phiId).type);
1373 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1375 phi.storage[i] = val.Int(i);
1380 // Loop body now done.
1381 // If any lanes are still active, jump back to the loop header,
1382 // otherwise jump to the merge block.
1383 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
1385 // Emit the merge block, and we're done.
1386 Nucleus::setInsertBlock(mergeBasicBlock);
1387 EmitBlock(block.mergeBlock, state);
1390 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1392 switch (insn.opcode())
1394 case spv::OpTypeVoid:
1395 case spv::OpTypeInt:
1396 case spv::OpTypeFloat:
1397 case spv::OpTypeBool:
1398 case spv::OpTypeVector:
1399 case spv::OpTypeArray:
1400 case spv::OpTypeRuntimeArray:
1401 case spv::OpTypeMatrix:
1402 case spv::OpTypeStruct:
1403 case spv::OpTypePointer:
1404 case spv::OpTypeFunction:
1405 case spv::OpExecutionMode:
1406 case spv::OpMemoryModel:
1407 case spv::OpFunction:
1408 case spv::OpFunctionEnd:
1409 case spv::OpConstant:
1410 case spv::OpConstantNull:
1411 case spv::OpConstantTrue:
1412 case spv::OpConstantFalse:
1413 case spv::OpConstantComposite:
1415 case spv::OpExtension:
1416 case spv::OpCapability:
1417 case spv::OpEntryPoint:
1418 case spv::OpExtInstImport:
1419 case spv::OpDecorate:
1420 case spv::OpMemberDecorate:
1421 case spv::OpGroupDecorate:
1422 case spv::OpGroupMemberDecorate:
1423 case spv::OpDecorationGroup:
1425 case spv::OpMemberName:
1427 case spv::OpSourceContinued:
1428 case spv::OpSourceExtension:
1431 case spv::OpModuleProcessed:
1433 // Nothing to do at emit time. These are either fully handled at analysis time,
1434 // or don't require any work at all.
1435 return EmitResult::Continue;
1438 return EmitResult::Continue;
1440 case spv::OpVariable:
1441 return EmitVariable(insn, state);
1444 case spv::OpAtomicLoad:
1445 return EmitLoad(insn, state);
1448 case spv::OpAtomicStore:
1449 return EmitStore(insn, state);
1451 case spv::OpAccessChain:
1452 case spv::OpInBoundsAccessChain:
1453 return EmitAccessChain(insn, state);
1455 case spv::OpCompositeConstruct:
1456 return EmitCompositeConstruct(insn, state);
1458 case spv::OpCompositeInsert:
1459 return EmitCompositeInsert(insn, state);
1461 case spv::OpCompositeExtract:
1462 return EmitCompositeExtract(insn, state);
1464 case spv::OpVectorShuffle:
1465 return EmitVectorShuffle(insn, state);
1467 case spv::OpVectorExtractDynamic:
1468 return EmitVectorExtractDynamic(insn, state);
1470 case spv::OpVectorInsertDynamic:
1471 return EmitVectorInsertDynamic(insn, state);
1473 case spv::OpVectorTimesScalar:
1474 case spv::OpMatrixTimesScalar:
1475 return EmitVectorTimesScalar(insn, state);
1477 case spv::OpMatrixTimesVector:
1478 return EmitMatrixTimesVector(insn, state);
1480 case spv::OpVectorTimesMatrix:
1481 return EmitVectorTimesMatrix(insn, state);
1484 case spv::OpSNegate:
1485 case spv::OpFNegate:
1486 case spv::OpLogicalNot:
1487 case spv::OpConvertFToU:
1488 case spv::OpConvertFToS:
1489 case spv::OpConvertSToF:
1490 case spv::OpConvertUToF:
1491 case spv::OpBitcast:
1495 case spv::OpDPdxCoarse:
1497 case spv::OpDPdyCoarse:
1499 case spv::OpFwidthCoarse:
1500 case spv::OpDPdxFine:
1501 case spv::OpDPdyFine:
1502 case spv::OpFwidthFine:
1503 return EmitUnaryOp(insn, state);
1516 case spv::OpFOrdEqual:
1517 case spv::OpFUnordEqual:
1518 case spv::OpFOrdNotEqual:
1519 case spv::OpFUnordNotEqual:
1520 case spv::OpFOrdLessThan:
1521 case spv::OpFUnordLessThan:
1522 case spv::OpFOrdGreaterThan:
1523 case spv::OpFUnordGreaterThan:
1524 case spv::OpFOrdLessThanEqual:
1525 case spv::OpFUnordLessThanEqual:
1526 case spv::OpFOrdGreaterThanEqual:
1527 case spv::OpFUnordGreaterThanEqual:
1532 case spv::OpINotEqual:
1533 case spv::OpUGreaterThan:
1534 case spv::OpSGreaterThan:
1535 case spv::OpUGreaterThanEqual:
1536 case spv::OpSGreaterThanEqual:
1537 case spv::OpULessThan:
1538 case spv::OpSLessThan:
1539 case spv::OpULessThanEqual:
1540 case spv::OpSLessThanEqual:
1541 case spv::OpShiftRightLogical:
1542 case spv::OpShiftRightArithmetic:
1543 case spv::OpShiftLeftLogical:
1544 case spv::OpBitwiseOr:
1545 case spv::OpBitwiseXor:
1546 case spv::OpBitwiseAnd:
1547 case spv::OpLogicalOr:
1548 case spv::OpLogicalAnd:
1549 case spv::OpLogicalEqual:
1550 case spv::OpLogicalNotEqual:
1551 case spv::OpUMulExtended:
1552 case spv::OpSMulExtended:
1553 return EmitBinaryOp(insn, state);
1556 return EmitDot(insn, state);
1559 return EmitSelect(insn, state);
1561 case spv::OpExtInst:
1562 return EmitExtendedInstruction(insn, state);
1565 return EmitAny(insn, state);
1568 return EmitAll(insn, state);
1571 return EmitBranch(insn, state);
1574 return EmitPhi(insn, state);
1576 case spv::OpSelectionMerge:
1577 case spv::OpLoopMerge:
1578 return EmitResult::Continue;
1580 case spv::OpBranchConditional:
1581 return EmitBranchConditional(insn, state);
1584 return EmitSwitch(insn, state);
1586 case spv::OpUnreachable:
1587 return EmitUnreachable(insn, state);
1590 return EmitReturn(insn, state);
1593 UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1597 return EmitResult::Continue;
1600 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1602 auto routine = state->routine;
1603 Object::ID resultId = insn.word(2);
1604 auto &object = getObject(resultId);
1605 auto &objectTy = getType(object.type);
1606 switch (objectTy.storageClass)
1608 case spv::StorageClassInput:
1610 if (object.kind == Object::Kind::InterfaceVariable)
1612 auto &dst = routine->getValue(resultId);
1614 VisitInterface(resultId,
1615 [&](Decorations const &d, AttribType type) {
1616 auto scalarSlot = d.Location << 2 | d.Component;
1617 dst[offset++] = routine->inputs[scalarSlot];
1622 case spv::StorageClassUniform:
1623 case spv::StorageClassStorageBuffer:
1626 ApplyDecorationsForId(&d, resultId);
1627 ASSERT(d.DescriptorSet >= 0);
1628 ASSERT(d.Binding >= 0);
1630 size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1632 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1633 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1634 Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1635 Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1636 Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1637 Pointer<Byte> address = data + offset;
1638 routine->physicalPointers[resultId] = address;
1641 case spv::StorageClassPushConstant:
1643 routine->physicalPointers[resultId] = routine->pushConstants;
1650 return EmitResult::Continue;
1653 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1655 auto routine = state->routine;
1656 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1657 Object::ID resultId = insn.word(2);
1658 Object::ID pointerId = insn.word(3);
1659 auto &result = getObject(resultId);
1660 auto &resultTy = getType(result.type);
1661 auto &pointer = getObject(pointerId);
1662 auto &pointerBase = getObject(pointer.pointerBase);
1663 auto &pointerBaseTy = getType(pointerBase.type);
1664 std::memory_order memoryOrder = std::memory_order_relaxed;
1668 Object::ID semanticsId = insn.word(5);
1669 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1670 memoryOrder = MemoryOrder(memorySemantics);
1673 ASSERT(getType(pointer.type).element == result.type);
1674 ASSERT(Type::ID(insn.word(1)) == result.type);
1675 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1677 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1679 UNIMPLEMENTED("StorageClassImage load not yet implemented");
1682 Pointer<Float> ptrBase;
1683 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1685 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1689 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1692 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1693 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1695 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1697 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1699 // Divergent offsets or masked lanes.
1700 auto offsets = pointer.kind == Object::Kind::Value ?
1701 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1702 RValue<SIMD::Int>(SIMD::Int(0));
1703 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1705 // i wish i had a Float,Float,Float,Float constructor here..
1706 for (int j = 0; j < SIMD::Width; j++)
1708 If(Extract(state->activeLaneMask(), j) != 0)
1710 Int offset = Int(i) + Extract(offsets, j);
1711 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1712 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1719 // No divergent offsets or masked lanes.
1720 if (interleavedByLane)
1722 // Lane-interleaved data.
1723 Pointer<SIMD::Float> src = ptrBase;
1724 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1726 load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1731 // Non-interleaved data.
1732 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1734 load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder)); // TODO: optimize alignment
1739 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1740 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1742 dst.move(i, load[i]);
1745 return EmitResult::Continue;
1748 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1750 auto routine = state->routine;
1751 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1752 Object::ID pointerId = insn.word(1);
1753 Object::ID objectId = insn.word(atomic ? 4 : 2);
1754 auto &object = getObject(objectId);
1755 auto &pointer = getObject(pointerId);
1756 auto &pointerTy = getType(pointer.type);
1757 auto &elementTy = getType(pointerTy.element);
1758 auto &pointerBase = getObject(pointer.pointerBase);
1759 auto &pointerBaseTy = getType(pointerBase.type);
1760 std::memory_order memoryOrder = std::memory_order_relaxed;
1764 Object::ID semanticsId = insn.word(3);
1765 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1766 memoryOrder = MemoryOrder(memorySemantics);
1769 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1771 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1773 UNIMPLEMENTED("StorageClassImage store not yet implemented");
1776 Pointer<Float> ptrBase;
1777 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1779 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1783 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1786 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1787 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1789 if (object.kind == Object::Kind::Constant)
1791 // Constant source data.
1792 auto src = reinterpret_cast<float *>(object.constantValue.get());
1793 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1795 // Divergent offsets or masked lanes.
1796 auto offsets = pointer.kind == Object::Kind::Value ?
1797 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1798 RValue<SIMD::Int>(SIMD::Int(0));
1799 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1801 for (int j = 0; j < SIMD::Width; j++)
1803 If(Extract(state->activeLaneMask(), j) != 0)
1805 Int offset = Int(i) + Extract(offsets, j);
1806 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1807 Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1814 // Constant source data.
1815 // No divergent offsets or masked lanes.
1816 Pointer<SIMD::Float> dst = ptrBase;
1817 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1819 Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1825 // Intermediate source data.
1826 auto &src = routine->getIntermediate(objectId);
1827 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1829 // Divergent offsets or masked lanes.
1830 auto offsets = pointer.kind == Object::Kind::Value ?
1831 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1832 RValue<SIMD::Int>(SIMD::Int(0));
1833 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1835 for (int j = 0; j < SIMD::Width; j++)
1837 If(Extract(state->activeLaneMask(), j) != 0)
1839 Int offset = Int(i) + Extract(offsets, j);
1840 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1841 Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1848 // No divergent offsets or masked lanes.
1849 if (interleavedByLane)
1851 // Lane-interleaved data.
1852 Pointer<SIMD::Float> dst = ptrBase;
1853 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1855 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1860 // Intermediate source data. Non-interleaved data.
1861 Pointer<SIMD::Float> dst = ptrBase;
1862 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1864 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1870 return EmitResult::Continue;
1873 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1875 auto routine = state->routine;
1876 Type::ID typeId = insn.word(1);
1877 Object::ID resultId = insn.word(2);
1878 Object::ID baseId = insn.word(3);
1879 uint32_t numIndexes = insn.wordCount() - 4;
1880 const uint32_t *indexes = insn.wordPointer(4);
1881 auto &type = getType(typeId);
1882 ASSERT(type.sizeInComponents == 1);
1883 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1885 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1887 if(type.storageClass == spv::StorageClassPushConstant ||
1888 type.storageClass == spv::StorageClassUniform ||
1889 type.storageClass == spv::StorageClassStorageBuffer)
1891 dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1895 dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1898 return EmitResult::Continue;
1901 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1903 auto routine = state->routine;
1904 auto &type = getType(insn.word(1));
1905 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1908 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1910 Object::ID srcObjectId = insn.word(3u + i);
1911 auto & srcObject = getObject(srcObjectId);
1912 auto & srcObjectTy = getType(srcObject.type);
1913 GenericValue srcObjectAccess(this, routine, srcObjectId);
1915 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1917 dst.move(offset++, srcObjectAccess.Float(j));
1921 return EmitResult::Continue;
1924 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1926 auto routine = state->routine;
1927 Type::ID resultTypeId = insn.word(1);
1928 auto &type = getType(resultTypeId);
1929 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1930 auto &newPartObject = getObject(insn.word(3));
1931 auto &newPartObjectTy = getType(newPartObject.type);
1932 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1934 GenericValue srcObjectAccess(this, routine, insn.word(4));
1935 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1937 // old components before
1938 for (auto i = 0u; i < firstNewComponent; i++)
1940 dst.move(i, srcObjectAccess.Float(i));
1943 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1945 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1947 // old components after
1948 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1950 dst.move(i, srcObjectAccess.Float(i));
1953 return EmitResult::Continue;
1956 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1958 auto routine = state->routine;
1959 auto &type = getType(insn.word(1));
1960 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1961 auto &compositeObject = getObject(insn.word(3));
1962 Type::ID compositeTypeId = compositeObject.definition.word(1);
1963 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1965 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1966 for (auto i = 0u; i < type.sizeInComponents; i++)
1968 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1971 return EmitResult::Continue;
1974 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1976 auto routine = state->routine;
1977 auto &type = getType(insn.word(1));
1978 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1980 // Note: number of components in result type, first half type, and second
1981 // half type are all independent.
1982 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1984 GenericValue firstHalfAccess(this, routine, insn.word(3));
1985 GenericValue secondHalfAccess(this, routine, insn.word(4));
1987 for (auto i = 0u; i < type.sizeInComponents; i++)
1989 auto selector = insn.word(5 + i);
1990 if (selector == static_cast<uint32_t>(-1))
1992 // Undefined value. Until we decide to do real undef values, zero is as good
1994 dst.move(i, RValue<SIMD::Float>(0.0f));
1996 else if (selector < firstHalfType.sizeInComponents)
1998 dst.move(i, firstHalfAccess.Float(selector));
2002 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
2006 return EmitResult::Continue;
2009 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2011 auto routine = state->routine;
2012 auto &type = getType(insn.word(1));
2013 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2014 auto &srcType = getType(getObject(insn.word(3)).type);
2016 GenericValue src(this, routine, insn.word(3));
2017 GenericValue index(this, routine, insn.word(4));
2019 SIMD::UInt v = SIMD::UInt(0);
2021 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2023 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2027 return EmitResult::Continue;
2030 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2032 auto routine = state->routine;
2033 auto &type = getType(insn.word(1));
2034 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2036 GenericValue src(this, routine, insn.word(3));
2037 GenericValue component(this, routine, insn.word(4));
2038 GenericValue index(this, routine, insn.word(5));
2040 for (auto i = 0u; i < type.sizeInComponents; i++)
2042 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2043 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2045 return EmitResult::Continue;
2048 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
2050 auto routine = state->routine;
2051 auto &type = getType(insn.word(1));
2052 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2053 auto lhs = GenericValue(this, routine, insn.word(3));
2054 auto rhs = GenericValue(this, routine, insn.word(4));
2056 for (auto i = 0u; i < type.sizeInComponents; i++)
2058 dst.move(i, lhs.Float(i) * rhs.Float(0));
2061 return EmitResult::Continue;
2064 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
2066 auto routine = state->routine;
2067 auto &type = getType(insn.word(1));
2068 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2069 auto lhs = GenericValue(this, routine, insn.word(3));
2070 auto rhs = GenericValue(this, routine, insn.word(4));
2071 auto rhsType = getType(getObject(insn.word(4)).type);
2073 for (auto i = 0u; i < type.sizeInComponents; i++)
2075 SIMD::Float v = lhs.Float(i) * rhs.Float(0);
2076 for (auto j = 1u; j < rhsType.sizeInComponents; j++)
2078 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
2083 return EmitResult::Continue;
2086 SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
2088 auto routine = state->routine;
2089 auto &type = getType(insn.word(1));
2090 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2091 auto lhs = GenericValue(this, routine, insn.word(3));
2092 auto rhs = GenericValue(this, routine, insn.word(4));
2093 auto lhsType = getType(getObject(insn.word(3)).type);
2095 for (auto i = 0u; i < type.sizeInComponents; i++)
2097 SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
2098 for (auto j = 1u; j < lhsType.sizeInComponents; j++)
2100 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
2105 return EmitResult::Continue;
2108 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
2110 auto routine = state->routine;
2111 auto &type = getType(insn.word(1));
2112 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2113 auto src = GenericValue(this, routine, insn.word(3));
2115 for (auto i = 0u; i < type.sizeInComponents; i++)
2117 switch (insn.opcode())
2120 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
2121 dst.move(i, ~src.UInt(i));
2123 case spv::OpSNegate:
2124 dst.move(i, -src.Int(i));
2126 case spv::OpFNegate:
2127 dst.move(i, -src.Float(i));
2129 case spv::OpConvertFToU:
2130 dst.move(i, SIMD::UInt(src.Float(i)));
2132 case spv::OpConvertFToS:
2133 dst.move(i, SIMD::Int(src.Float(i)));
2135 case spv::OpConvertSToF:
2136 dst.move(i, SIMD::Float(src.Int(i)));
2138 case spv::OpConvertUToF:
2139 dst.move(i, SIMD::Float(src.UInt(i)));
2141 case spv::OpBitcast:
2142 dst.move(i, src.Float(i));
2145 dst.move(i, IsInf(src.Float(i)));
2148 dst.move(i, IsNan(src.Float(i)));
2151 case spv::OpDPdxCoarse:
2152 // Derivative instructions: FS invocations are laid out like so:
2155 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
2156 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
2159 case spv::OpDPdyCoarse:
2160 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
2163 case spv::OpFwidthCoarse:
2164 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
2165 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
2167 case spv::OpDPdxFine:
2169 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2170 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2171 SIMD::Float v = SIMD::Float(firstRow);
2172 v = Insert(v, secondRow, 2);
2173 v = Insert(v, secondRow, 3);
2177 case spv::OpDPdyFine:
2179 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2180 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2181 SIMD::Float v = SIMD::Float(firstColumn);
2182 v = Insert(v, secondColumn, 1);
2183 v = Insert(v, secondColumn, 3);
2187 case spv::OpFwidthFine:
2189 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2190 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2191 SIMD::Float dpdx = SIMD::Float(firstRow);
2192 dpdx = Insert(dpdx, secondRow, 2);
2193 dpdx = Insert(dpdx, secondRow, 3);
2194 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2195 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2196 SIMD::Float dpdy = SIMD::Float(firstColumn);
2197 dpdy = Insert(dpdy, secondColumn, 1);
2198 dpdy = Insert(dpdy, secondColumn, 3);
2199 dst.move(i, Abs(dpdx) + Abs(dpdy));
2203 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
2207 return EmitResult::Continue;
2210 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
2212 auto routine = state->routine;
2213 auto &type = getType(insn.word(1));
2214 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2215 auto &lhsType = getType(getObject(insn.word(3)).type);
2216 auto lhs = GenericValue(this, routine, insn.word(3));
2217 auto rhs = GenericValue(this, routine, insn.word(4));
2219 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2221 switch (insn.opcode())
2224 dst.move(i, lhs.Int(i) + rhs.Int(i));
2227 dst.move(i, lhs.Int(i) - rhs.Int(i));
2230 dst.move(i, lhs.Int(i) * rhs.Int(i));
2234 SIMD::Int a = lhs.Int(i);
2235 SIMD::Int b = rhs.Int(i);
2236 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2237 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2243 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2244 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2249 SIMD::Int a = lhs.Int(i);
2250 SIMD::Int b = rhs.Int(i);
2251 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2252 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2258 SIMD::Int a = lhs.Int(i);
2259 SIMD::Int b = rhs.Int(i);
2260 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2261 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2263 // If a and b have opposite signs, the remainder operation takes
2264 // the sign from a but OpSMod is supposed to take the sign of b.
2265 // Adding b will ensure that the result has the correct sign and
2266 // that it is still congruent to a modulo b.
2268 // See also http://mathforum.org/library/drmath/view/52343.html
2269 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2270 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2271 dst.move(i, As<SIMD::Float>(fixedMod));
2276 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2277 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2281 case spv::OpLogicalEqual:
2282 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2284 case spv::OpINotEqual:
2285 case spv::OpLogicalNotEqual:
2286 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2288 case spv::OpUGreaterThan:
2289 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2291 case spv::OpSGreaterThan:
2292 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2294 case spv::OpUGreaterThanEqual:
2295 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2297 case spv::OpSGreaterThanEqual:
2298 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2300 case spv::OpULessThan:
2301 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2303 case spv::OpSLessThan:
2304 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2306 case spv::OpULessThanEqual:
2307 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2309 case spv::OpSLessThanEqual:
2310 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2313 dst.move(i, lhs.Float(i) + rhs.Float(i));
2316 dst.move(i, lhs.Float(i) - rhs.Float(i));
2319 dst.move(i, lhs.Float(i) * rhs.Float(i));
2322 dst.move(i, lhs.Float(i) / rhs.Float(i));
2325 // TODO(b/126873455): inaccurate for values greater than 2^24
2326 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2329 dst.move(i, lhs.Float(i) % rhs.Float(i));
2331 case spv::OpFOrdEqual:
2332 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2334 case spv::OpFUnordEqual:
2335 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2337 case spv::OpFOrdNotEqual:
2338 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2340 case spv::OpFUnordNotEqual:
2341 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2343 case spv::OpFOrdLessThan:
2344 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2346 case spv::OpFUnordLessThan:
2347 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2349 case spv::OpFOrdGreaterThan:
2350 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2352 case spv::OpFUnordGreaterThan:
2353 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2355 case spv::OpFOrdLessThanEqual:
2356 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2358 case spv::OpFUnordLessThanEqual:
2359 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2361 case spv::OpFOrdGreaterThanEqual:
2362 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2364 case spv::OpFUnordGreaterThanEqual:
2365 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2367 case spv::OpShiftRightLogical:
2368 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2370 case spv::OpShiftRightArithmetic:
2371 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2373 case spv::OpShiftLeftLogical:
2374 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2376 case spv::OpBitwiseOr:
2377 case spv::OpLogicalOr:
2378 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2380 case spv::OpBitwiseXor:
2381 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2383 case spv::OpBitwiseAnd:
2384 case spv::OpLogicalAnd:
2385 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2387 case spv::OpSMulExtended:
2388 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2389 // In our flat view then, component i is the i'th component of the first member;
2390 // component i + N is the i'th component of the second member.
2391 dst.move(i, lhs.Int(i) * rhs.Int(i));
2392 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2394 case spv::OpUMulExtended:
2395 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2396 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2399 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2403 return EmitResult::Continue;
2406 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2408 auto routine = state->routine;
2409 auto &type = getType(insn.word(1));
2410 ASSERT(type.sizeInComponents == 1);
2411 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2412 auto &lhsType = getType(getObject(insn.word(3)).type);
2413 auto lhs = GenericValue(this, routine, insn.word(3));
2414 auto rhs = GenericValue(this, routine, insn.word(4));
2416 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2417 return EmitResult::Continue;
2420 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2422 auto routine = state->routine;
2423 auto &type = getType(insn.word(1));
2424 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2425 auto cond = GenericValue(this, routine, insn.word(3));
2426 auto lhs = GenericValue(this, routine, insn.word(4));
2427 auto rhs = GenericValue(this, routine, insn.word(5));
2429 for (auto i = 0u; i < type.sizeInComponents; i++)
2431 dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i))); // FIXME: IfThenElse()
2434 return EmitResult::Continue;
2437 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2439 auto routine = state->routine;
2440 auto &type = getType(insn.word(1));
2441 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2442 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2444 switch (extInstIndex)
2446 case GLSLstd450FAbs:
2448 auto src = GenericValue(this, routine, insn.word(5));
2449 for (auto i = 0u; i < type.sizeInComponents; i++)
2451 dst.move(i, Abs(src.Float(i)));
2455 case GLSLstd450SAbs:
2457 auto src = GenericValue(this, routine, insn.word(5));
2458 for (auto i = 0u; i < type.sizeInComponents; i++)
2460 dst.move(i, Abs(src.Int(i)));
2464 case GLSLstd450Cross:
2466 auto lhs = GenericValue(this, routine, insn.word(5));
2467 auto rhs = GenericValue(this, routine, insn.word(6));
2468 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2469 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2470 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2473 case GLSLstd450Floor:
2475 auto src = GenericValue(this, routine, insn.word(5));
2476 for (auto i = 0u; i < type.sizeInComponents; i++)
2478 dst.move(i, Floor(src.Float(i)));
2482 case GLSLstd450Trunc:
2484 auto src = GenericValue(this, routine, insn.word(5));
2485 for (auto i = 0u; i < type.sizeInComponents; i++)
2487 dst.move(i, Trunc(src.Float(i)));
2491 case GLSLstd450Ceil:
2493 auto src = GenericValue(this, routine, insn.word(5));
2494 for (auto i = 0u; i < type.sizeInComponents; i++)
2496 dst.move(i, Ceil(src.Float(i)));
2500 case GLSLstd450Fract:
2502 auto src = GenericValue(this, routine, insn.word(5));
2503 for (auto i = 0u; i < type.sizeInComponents; i++)
2505 dst.move(i, Frac(src.Float(i)));
2509 case GLSLstd450Round:
2511 auto src = GenericValue(this, routine, insn.word(5));
2512 for (auto i = 0u; i < type.sizeInComponents; i++)
2514 dst.move(i, Round(src.Float(i)));
2518 case GLSLstd450RoundEven:
2520 auto src = GenericValue(this, routine, insn.word(5));
2521 for (auto i = 0u; i < type.sizeInComponents; i++)
2523 auto x = Round(src.Float(i));
2524 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2525 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2526 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2530 case GLSLstd450FMin:
2532 auto lhs = GenericValue(this, routine, insn.word(5));
2533 auto rhs = GenericValue(this, routine, insn.word(6));
2534 for (auto i = 0u; i < type.sizeInComponents; i++)
2536 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2540 case GLSLstd450FMax:
2542 auto lhs = GenericValue(this, routine, insn.word(5));
2543 auto rhs = GenericValue(this, routine, insn.word(6));
2544 for (auto i = 0u; i < type.sizeInComponents; i++)
2546 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2550 case GLSLstd450SMin:
2552 auto lhs = GenericValue(this, routine, insn.word(5));
2553 auto rhs = GenericValue(this, routine, insn.word(6));
2554 for (auto i = 0u; i < type.sizeInComponents; i++)
2556 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2560 case GLSLstd450SMax:
2562 auto lhs = GenericValue(this, routine, insn.word(5));
2563 auto rhs = GenericValue(this, routine, insn.word(6));
2564 for (auto i = 0u; i < type.sizeInComponents; i++)
2566 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2570 case GLSLstd450UMin:
2572 auto lhs = GenericValue(this, routine, insn.word(5));
2573 auto rhs = GenericValue(this, routine, insn.word(6));
2574 for (auto i = 0u; i < type.sizeInComponents; i++)
2576 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2580 case GLSLstd450UMax:
2582 auto lhs = GenericValue(this, routine, insn.word(5));
2583 auto rhs = GenericValue(this, routine, insn.word(6));
2584 for (auto i = 0u; i < type.sizeInComponents; i++)
2586 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2590 case GLSLstd450Step:
2592 auto edge = GenericValue(this, routine, insn.word(5));
2593 auto x = GenericValue(this, routine, insn.word(6));
2594 for (auto i = 0u; i < type.sizeInComponents; i++)
2596 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2600 case GLSLstd450SmoothStep:
2602 auto edge0 = GenericValue(this, routine, insn.word(5));
2603 auto edge1 = GenericValue(this, routine, insn.word(6));
2604 auto x = GenericValue(this, routine, insn.word(7));
2605 for (auto i = 0u; i < type.sizeInComponents; i++)
2607 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2608 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2609 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2613 case GLSLstd450FMix:
2615 auto x = GenericValue(this, routine, insn.word(5));
2616 auto y = GenericValue(this, routine, insn.word(6));
2617 auto a = GenericValue(this, routine, insn.word(7));
2618 for (auto i = 0u; i < type.sizeInComponents; i++)
2620 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2624 case GLSLstd450FClamp:
2626 auto x = GenericValue(this, routine, insn.word(5));
2627 auto minVal = GenericValue(this, routine, insn.word(6));
2628 auto maxVal = GenericValue(this, routine, insn.word(7));
2629 for (auto i = 0u; i < type.sizeInComponents; i++)
2631 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2635 case GLSLstd450SClamp:
2637 auto x = GenericValue(this, routine, insn.word(5));
2638 auto minVal = GenericValue(this, routine, insn.word(6));
2639 auto maxVal = GenericValue(this, routine, insn.word(7));
2640 for (auto i = 0u; i < type.sizeInComponents; i++)
2642 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2646 case GLSLstd450UClamp:
2648 auto x = GenericValue(this, routine, insn.word(5));
2649 auto minVal = GenericValue(this, routine, insn.word(6));
2650 auto maxVal = GenericValue(this, routine, insn.word(7));
2651 for (auto i = 0u; i < type.sizeInComponents; i++)
2653 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2657 case GLSLstd450FSign:
2659 auto src = GenericValue(this, routine, insn.word(5));
2660 for (auto i = 0u; i < type.sizeInComponents; i++)
2662 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2663 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2664 dst.move(i, neg | pos);
2668 case GLSLstd450SSign:
2670 auto src = GenericValue(this, routine, insn.word(5));
2671 for (auto i = 0u; i < type.sizeInComponents; i++)
2673 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2674 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2675 dst.move(i, neg | pos);
2679 case GLSLstd450Reflect:
2681 auto I = GenericValue(this, routine, insn.word(5));
2682 auto N = GenericValue(this, routine, insn.word(6));
2684 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2686 for (auto i = 0u; i < type.sizeInComponents; i++)
2688 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2692 case GLSLstd450Refract:
2694 auto I = GenericValue(this, routine, insn.word(5));
2695 auto N = GenericValue(this, routine, insn.word(6));
2696 auto eta = GenericValue(this, routine, insn.word(7));
2698 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2699 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2700 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2701 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2703 for (auto i = 0u; i < type.sizeInComponents; i++)
2705 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2709 case GLSLstd450FaceForward:
2711 auto N = GenericValue(this, routine, insn.word(5));
2712 auto I = GenericValue(this, routine, insn.word(6));
2713 auto Nref = GenericValue(this, routine, insn.word(7));
2715 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2716 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2718 for (auto i = 0u; i < type.sizeInComponents; i++)
2720 auto n = N.Float(i);
2721 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2725 case GLSLstd450Length:
2727 auto x = GenericValue(this, routine, insn.word(5));
2728 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2730 dst.move(0, Sqrt(d));
2733 case GLSLstd450Normalize:
2735 auto x = GenericValue(this, routine, insn.word(5));
2736 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2737 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2739 for (auto i = 0u; i < type.sizeInComponents; i++)
2741 dst.move(i, invLength * x.Float(i));
2745 case GLSLstd450Distance:
2747 auto p0 = GenericValue(this, routine, insn.word(5));
2748 auto p1 = GenericValue(this, routine, insn.word(6));
2749 auto p0Type = getType(getObject(insn.word(5)).type);
2751 // sqrt(dot(p0-p1, p0-p1))
2752 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2754 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2756 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2759 dst.move(0, Sqrt(d));
2763 UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2766 return EmitResult::Continue;
2769 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2771 switch(memorySemantics)
2773 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
2774 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
2775 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
2776 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
2777 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2779 UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2780 return std::memory_order_acq_rel;
2784 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2786 SIMD::Float d = x.Float(0) * y.Float(0);
2788 for (auto i = 1u; i < numComponents; i++)
2790 d += x.Float(i) * y.Float(i);
2796 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2798 auto routine = state->routine;
2799 auto &type = getType(insn.word(1));
2800 ASSERT(type.sizeInComponents == 1);
2801 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2802 auto &srcType = getType(getObject(insn.word(3)).type);
2803 auto src = GenericValue(this, routine, insn.word(3));
2805 SIMD::UInt result = src.UInt(0);
2807 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2809 result |= src.UInt(i);
2812 dst.move(0, result);
2813 return EmitResult::Continue;
2816 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2818 auto routine = state->routine;
2819 auto &type = getType(insn.word(1));
2820 ASSERT(type.sizeInComponents == 1);
2821 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2822 auto &srcType = getType(getObject(insn.word(3)).type);
2823 auto src = GenericValue(this, routine, insn.word(3));
2825 SIMD::UInt result = src.UInt(0);
2827 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2829 result &= src.UInt(i);
2832 dst.move(0, result);
2833 return EmitResult::Continue;
2836 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2838 auto target = Block::ID(insn.word(1));
2839 auto edge = Block::Edge{state->currentBlock, target};
2840 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2841 return EmitResult::Terminator;
2844 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2846 auto block = getBlock(state->currentBlock);
2847 ASSERT(block.branchInstruction == insn);
2849 auto condId = Object::ID(block.branchInstruction.word(1));
2850 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2851 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2853 auto cond = GenericValue(this, state->routine, condId);
2854 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2856 // TODO: Optimize for case where all lanes take same path.
2858 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2859 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2861 return EmitResult::Terminator;
2864 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2866 auto block = getBlock(state->currentBlock);
2867 ASSERT(block.branchInstruction == insn);
2869 auto selId = Object::ID(block.branchInstruction.word(1));
2871 auto sel = GenericValue(this, state->routine, selId);
2872 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2874 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2876 // TODO: Optimize for case where all lanes take same path.
2878 SIMD::Int defaultLaneMask = state->activeLaneMask();
2880 // Gather up the case label matches and calculate defaultLaneMask.
2881 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2882 caseLabelMatches.reserve(numCases);
2883 for (uint32_t i = 0; i < numCases; i++)
2885 auto label = block.branchInstruction.word(i * 2 + 3);
2886 auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2887 auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2888 state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2889 defaultLaneMask &= ~caseLabelMatch;
2892 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2893 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2895 return EmitResult::Terminator;
2898 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2900 // TODO: Log something in this case?
2901 state->setActiveLaneMask(SIMD::Int(0));
2902 return EmitResult::Terminator;
2905 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2907 state->setActiveLaneMask(SIMD::Int(0));
2908 return EmitResult::Terminator;
2911 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2913 auto routine = state->routine;
2914 auto typeId = Type::ID(insn.word(1));
2915 auto type = getType(typeId);
2916 auto objectId = Object::ID(insn.word(2));
2918 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2921 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2923 auto varId = Object::ID(insn.word(w + 0));
2924 auto blockId = Block::ID(insn.word(w + 1));
2926 auto in = GenericValue(this, routine, varId);
2927 auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2929 for (uint32_t i = 0; i < type.sizeInComponents; i++)
2931 auto inMasked = in.Int(i) & mask;
2932 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2937 return EmitResult::Continue;
2940 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2942 for (auto insn : *this)
2944 switch (insn.opcode())
2946 case spv::OpVariable:
2948 Object::ID resultId = insn.word(2);
2949 auto &object = getObject(resultId);
2950 auto &objectTy = getType(object.type);
2951 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2953 auto &dst = routine->getValue(resultId);
2955 VisitInterface(resultId,
2956 [&](Decorations const &d, AttribType type) {
2957 auto scalarSlot = d.Location << 2 | d.Component;
2958 routine->outputs[scalarSlot] = dst[offset++];
2969 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
2971 // Default to a Simple, this may change later.
2972 kind = Block::Simple;
2974 // Walk the instructions to find the last two of the block.
2975 InsnIterator insns[2];
2976 for (auto insn : *this)
2978 insns[0] = insns[1];
2982 switch (insns[1].opcode())
2985 branchInstruction = insns[1];
2986 outs.emplace(Block::ID(branchInstruction.word(1)));
2988 switch (insns[0].opcode())
2990 case spv::OpLoopMerge:
2992 mergeInstruction = insns[0];
2993 mergeBlock = Block::ID(mergeInstruction.word(1));
2994 continueTarget = Block::ID(mergeInstruction.word(2));
2998 kind = Block::Simple;
3003 case spv::OpBranchConditional:
3004 branchInstruction = insns[1];
3005 outs.emplace(Block::ID(branchInstruction.word(2)));
3006 outs.emplace(Block::ID(branchInstruction.word(3)));
3008 switch (insns[0].opcode())
3010 case spv::OpSelectionMerge:
3011 kind = StructuredBranchConditional;
3012 mergeInstruction = insns[0];
3013 mergeBlock = Block::ID(mergeInstruction.word(1));
3016 case spv::OpLoopMerge:
3018 mergeInstruction = insns[0];
3019 mergeBlock = Block::ID(mergeInstruction.word(1));
3020 continueTarget = Block::ID(mergeInstruction.word(2));
3024 kind = UnstructuredBranchConditional;
3030 branchInstruction = insns[1];
3031 outs.emplace(Block::ID(branchInstruction.word(2)));
3032 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
3034 outs.emplace(Block::ID(branchInstruction.word(w)));
3037 switch (insns[0].opcode())
3039 case spv::OpSelectionMerge:
3040 kind = StructuredSwitch;
3041 mergeInstruction = insns[0];
3042 mergeBlock = Block::ID(mergeInstruction.word(1));
3046 kind = UnstructuredSwitch;
3056 bool SpirvShader::existsPath(Block::ID from, Block::ID to) const
3058 // TODO: Optimize: This can be cached on the block.
3061 std::queue<Block::ID> pending;
3062 pending.emplace(from);
3064 while (pending.size() > 0)
3066 auto id = pending.front();
3068 for (auto out : getBlock(id).outs)
3070 if (seen.count(out) != 0) { continue; }
3071 if (out == to) { return true; }
3072 pending.emplace(out);
3080 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
3082 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
3085 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
3087 auto edge = Block::Edge{from, to};
3088 auto it = edgeActiveLaneMasks.find(edge);
3089 if (it == edgeActiveLaneMasks.end())
3091 edgeActiveLaneMasks.emplace(edge, mask);
3095 auto combined = it->second | mask;
3096 edgeActiveLaneMasks.erase(edge);
3097 edgeActiveLaneMasks.emplace(edge, combined);
3101 RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
3103 auto edge = Block::Edge{from, to};
3104 auto it = edgeActiveLaneMasks.find(edge);
3105 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
3109 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
3110 pipelineLayout(pipelineLayout)