1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
27 #undef Bool // b/127920555
32 rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
34 return rr::SignMask(ints) != 0;
37 rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
39 return rr::SignMask(~ints) != 0;
45 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
47 SpirvShader::SpirvShader(InsnStore const &insns)
48 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
49 outputs{MAX_INTERFACE_COMPONENTS},
50 serialID{serialCounter++}, modes{}
52 ASSERT(insns.size() > 0);
54 // Simplifying assumptions (to be satisfied by earlier transformations)
55 // - There is exactly one entrypoint in the module, and it's the one we want
56 // - The only input/output OpVariables present are those used by the entrypoint
58 Block::ID currentBlock;
59 InsnIterator blockStart;
61 for (auto insn : *this)
63 switch (insn.opcode())
65 case spv::OpExecutionMode:
66 ProcessExecutionMode(insn);
71 TypeOrObjectID targetId = insn.word(1);
72 auto decoration = static_cast<spv::Decoration>(insn.word(2));
73 decorations[targetId].Apply(
75 insn.wordCount() > 3 ? insn.word(3) : 0);
77 if (decoration == spv::DecorationCentroid)
78 modes.NeedsCentroid = true;
82 case spv::OpMemberDecorate:
84 Type::ID targetId = insn.word(1);
85 auto memberIndex = insn.word(2);
86 auto &d = memberDecorations[targetId];
87 if (memberIndex >= d.size())
88 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
89 auto decoration = static_cast<spv::Decoration>(insn.word(3));
92 insn.wordCount() > 4 ? insn.word(4) : 0);
94 if (decoration == spv::DecorationCentroid)
95 modes.NeedsCentroid = true;
99 case spv::OpDecorationGroup:
100 // Nothing to do here. We don't need to record the definition of the group; we'll just have
101 // the bundle of decorations float around. If we were to ever walk the decorations directly,
102 // we might think about introducing this as a real Object.
105 case spv::OpGroupDecorate:
107 auto const &srcDecorations = decorations[insn.word(1)];
108 for (auto i = 2u; i < insn.wordCount(); i++)
110 // remaining operands are targets to apply the group to.
111 decorations[insn.word(i)].Apply(srcDecorations);
116 case spv::OpGroupMemberDecorate:
118 auto const &srcDecorations = decorations[insn.word(1)];
119 for (auto i = 2u; i < insn.wordCount(); i += 2)
121 // remaining operands are pairs of <id>, literal for members to apply to.
122 auto &d = memberDecorations[insn.word(i)];
123 auto memberIndex = insn.word(i + 1);
124 if (memberIndex >= d.size())
125 d.resize(memberIndex + 1); // on demand resize, see above...
126 d[memberIndex].Apply(srcDecorations);
133 ASSERT(currentBlock.value() == 0);
134 currentBlock = Block::ID(insn.word(1));
139 // Branch Instructions (subset of Termination Instructions):
141 case spv::OpBranchConditional:
146 // Termination instruction:
148 case spv::OpUnreachable:
150 ASSERT(currentBlock.value() != 0);
151 auto blockEnd = insn; blockEnd++;
152 blocks[currentBlock] = Block(blockStart, blockEnd);
153 currentBlock = Block::ID(0);
155 if (insn.opcode() == spv::OpKill)
157 modes.ContainsKill = true;
162 case spv::OpLoopMerge:
163 case spv::OpSelectionMerge:
164 break; // Nothing to do in analysis pass.
166 case spv::OpTypeVoid:
167 case spv::OpTypeBool:
169 case spv::OpTypeFloat:
170 case spv::OpTypeVector:
171 case spv::OpTypeMatrix:
172 case spv::OpTypeImage:
173 case spv::OpTypeSampler:
174 case spv::OpTypeSampledImage:
175 case spv::OpTypeArray:
176 case spv::OpTypeRuntimeArray:
177 case spv::OpTypeStruct:
178 case spv::OpTypePointer:
179 case spv::OpTypeFunction:
183 case spv::OpVariable:
185 Type::ID typeId = insn.word(1);
186 Object::ID resultId = insn.word(2);
187 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
188 if (insn.wordCount() > 4)
189 UNIMPLEMENTED("Variable initializers not yet supported");
191 auto &object = defs[resultId];
192 object.kind = Object::Kind::Variable;
193 object.definition = insn;
194 object.type = typeId;
195 object.pointerBase = insn.word(2); // base is itself
197 ASSERT(getType(typeId).storageClass == storageClass);
199 switch (storageClass)
201 case spv::StorageClassInput:
202 case spv::StorageClassOutput:
203 ProcessInterfaceVariable(object);
205 case spv::StorageClassUniform:
206 case spv::StorageClassStorageBuffer:
207 case spv::StorageClassPushConstant:
208 object.kind = Object::Kind::PhysicalPointer;
211 case spv::StorageClassPrivate:
212 case spv::StorageClassFunction:
213 break; // Correctly handled.
215 case spv::StorageClassUniformConstant:
216 case spv::StorageClassWorkgroup:
217 case spv::StorageClassCrossWorkgroup:
218 case spv::StorageClassGeneric:
219 case spv::StorageClassAtomicCounter:
220 case spv::StorageClassImage:
221 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
225 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
231 case spv::OpConstant:
232 CreateConstant(insn).constantValue[0] = insn.word(3);
234 case spv::OpConstantFalse:
235 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
237 case spv::OpConstantTrue:
238 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
240 case spv::OpConstantNull:
243 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
244 // OpConstantNull forms a constant of arbitrary type, all zeros.
245 auto &object = CreateConstant(insn);
246 auto &objectTy = getType(object.type);
247 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
249 object.constantValue[i] = 0;
253 case spv::OpConstantComposite:
255 auto &object = CreateConstant(insn);
257 for (auto i = 0u; i < insn.wordCount() - 3; i++)
259 auto &constituent = getObject(insn.word(i + 3));
260 auto &constituentTy = getType(constituent.type);
261 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
262 object.constantValue[offset++] = constituent.constantValue[j];
265 auto objectId = Object::ID(insn.word(2));
266 auto decorationsIt = decorations.find(objectId);
267 if (decorationsIt != decorations.end() &&
268 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
270 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
271 // Decorating an object with the WorkgroupSize built-in
272 // decoration will make that object contain the dimensions
273 // of a local workgroup. If an object is decorated with the
274 // WorkgroupSize decoration, this must take precedence over
275 // any execution mode set for LocalSize.
276 // The object decorated with WorkgroupSize must be declared
277 // as a three-component vector of 32-bit integers.
278 ASSERT(getType(object.type).sizeInComponents == 3);
279 modes.WorkgroupSizeX = object.constantValue[0];
280 modes.WorkgroupSizeY = object.constantValue[1];
281 modes.WorkgroupSizeZ = object.constantValue[2];
286 case spv::OpCapability:
287 break; // Various capabilities will be declared, but none affect our code generation at this point.
288 case spv::OpMemoryModel:
289 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
291 case spv::OpEntryPoint:
293 case spv::OpFunction:
294 ASSERT(mainBlockId.value() == 0); // Multiple functions found
295 // Scan forward to find the function's label.
296 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
300 case spv::OpFunction:
301 case spv::OpFunctionParameter:
304 mainBlockId = Block::ID(it.word(1));
307 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
310 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
312 case spv::OpFunctionEnd:
313 // Due to preprocessing, the entrypoint and its function provide no value.
315 case spv::OpExtInstImport:
316 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
317 // Valid shaders will not attempt to import any other instruction sets.
318 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
320 UNIMPLEMENTED("Only GLSL extended instruction set is supported");
324 case spv::OpMemberName:
326 case spv::OpSourceContinued:
327 case spv::OpSourceExtension:
330 case spv::OpModuleProcessed:
332 // No semantic impact
335 case spv::OpFunctionParameter:
336 case spv::OpFunctionCall:
337 case spv::OpSpecConstant:
338 case spv::OpSpecConstantComposite:
339 case spv::OpSpecConstantFalse:
340 case spv::OpSpecConstantOp:
341 case spv::OpSpecConstantTrue:
342 // These should have all been removed by preprocessing passes. If we see them here,
343 // our assumptions are wrong and we will probably generate wrong code.
344 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
347 case spv::OpFConvert:
348 case spv::OpSConvert:
349 case spv::OpUConvert:
350 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
354 case spv::OpAccessChain:
355 case spv::OpInBoundsAccessChain:
356 case spv::OpCompositeConstruct:
357 case spv::OpCompositeInsert:
358 case spv::OpCompositeExtract:
359 case spv::OpVectorShuffle:
360 case spv::OpVectorTimesScalar:
361 case spv::OpMatrixTimesScalar:
362 case spv::OpMatrixTimesVector:
363 case spv::OpVectorTimesMatrix:
364 case spv::OpMatrixTimesMatrix:
365 case spv::OpVectorExtractDynamic:
366 case spv::OpVectorInsertDynamic:
367 case spv::OpNot: // Unary ops
370 case spv::OpLogicalNot:
371 case spv::OpIAdd: // Binary ops
382 case spv::OpFOrdEqual:
383 case spv::OpFUnordEqual:
384 case spv::OpFOrdNotEqual:
385 case spv::OpFUnordNotEqual:
386 case spv::OpFOrdLessThan:
387 case spv::OpFUnordLessThan:
388 case spv::OpFOrdGreaterThan:
389 case spv::OpFUnordGreaterThan:
390 case spv::OpFOrdLessThanEqual:
391 case spv::OpFUnordLessThanEqual:
392 case spv::OpFOrdGreaterThanEqual:
393 case spv::OpFUnordGreaterThanEqual:
398 case spv::OpINotEqual:
399 case spv::OpUGreaterThan:
400 case spv::OpSGreaterThan:
401 case spv::OpUGreaterThanEqual:
402 case spv::OpSGreaterThanEqual:
403 case spv::OpULessThan:
404 case spv::OpSLessThan:
405 case spv::OpULessThanEqual:
406 case spv::OpSLessThanEqual:
407 case spv::OpShiftRightLogical:
408 case spv::OpShiftRightArithmetic:
409 case spv::OpShiftLeftLogical:
410 case spv::OpBitwiseOr:
411 case spv::OpBitwiseXor:
412 case spv::OpBitwiseAnd:
413 case spv::OpLogicalOr:
414 case spv::OpLogicalAnd:
415 case spv::OpLogicalEqual:
416 case spv::OpLogicalNotEqual:
417 case spv::OpUMulExtended:
418 case spv::OpSMulExtended:
420 case spv::OpConvertFToU:
421 case spv::OpConvertFToS:
422 case spv::OpConvertSToF:
423 case spv::OpConvertUToF:
432 case spv::OpDPdxCoarse:
434 case spv::OpDPdyCoarse:
436 case spv::OpFwidthCoarse:
437 case spv::OpDPdxFine:
438 case spv::OpDPdyFine:
439 case spv::OpFwidthFine:
440 case spv::OpAtomicLoad:
442 // Instructions that yield an intermediate value
444 Type::ID typeId = insn.word(1);
445 Object::ID resultId = insn.word(2);
446 auto &object = defs[resultId];
447 object.type = typeId;
448 object.kind = Object::Kind::Value;
449 object.definition = insn;
451 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
453 // interior ptr has two parts:
454 // - logical base ptr, common across all lanes and known at compile time
456 Object::ID baseId = insn.word(3);
457 object.pointerBase = getObject(baseId).pointerBase;
463 case spv::OpAtomicStore:
464 // Don't need to do anything during analysis pass
468 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
472 // Assign all Block::ins
473 for (auto &it : blocks)
475 auto &blockId = it.first;
476 auto &block = it.second;
477 for (auto &outId : block.outs)
479 auto outIt = blocks.find(outId);
480 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
481 auto &out = outIt->second;
482 out.ins.emplace(blockId);
487 void SpirvShader::DeclareType(InsnIterator insn)
489 Type::ID resultId = insn.word(1);
491 auto &type = types[resultId];
492 type.definition = insn;
493 type.sizeInComponents = ComputeTypeSize(insn);
495 // A structure is a builtin block if it has a builtin
496 // member. All members of such a structure are builtins.
497 switch (insn.opcode())
499 case spv::OpTypeStruct:
501 auto d = memberDecorations.find(resultId);
502 if (d != memberDecorations.end())
504 for (auto &m : d->second)
508 type.isBuiltInBlock = true;
515 case spv::OpTypePointer:
517 Type::ID elementTypeId = insn.word(3);
518 type.element = elementTypeId;
519 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
520 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
523 case spv::OpTypeVector:
524 case spv::OpTypeMatrix:
525 case spv::OpTypeArray:
526 case spv::OpTypeRuntimeArray:
528 Type::ID elementTypeId = insn.word(2);
529 type.element = elementTypeId;
537 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
539 Type::ID typeId = insn.word(1);
540 Object::ID resultId = insn.word(2);
541 auto &object = defs[resultId];
542 auto &objectTy = getType(typeId);
543 object.type = typeId;
544 object.kind = Object::Kind::Constant;
545 object.definition = insn;
546 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
550 void SpirvShader::ProcessInterfaceVariable(Object &object)
552 auto &objectTy = getType(object.type);
553 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
555 ASSERT(objectTy.opcode() == spv::OpTypePointer);
556 auto pointeeTy = getType(objectTy.element);
558 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
559 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
561 ASSERT(object.opcode() == spv::OpVariable);
562 Object::ID resultId = object.definition.word(2);
564 if (objectTy.isBuiltInBlock)
566 // walk the builtin block, registering each of its members separately.
567 auto m = memberDecorations.find(objectTy.element);
568 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
569 auto &structType = pointeeTy.definition;
572 for (auto &member : m->second)
574 auto &memberType = getType(structType.word(word));
576 if (member.HasBuiltIn)
578 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
581 offset += memberType.sizeInComponents;
587 auto d = decorations.find(resultId);
588 if (d != decorations.end() && d->second.HasBuiltIn)
590 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
594 object.kind = Object::Kind::InterfaceVariable;
595 VisitInterface(resultId,
596 [&userDefinedInterface](Decorations const &d, AttribType type) {
597 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
598 auto scalarSlot = (d.Location << 2) | d.Component;
599 ASSERT(scalarSlot >= 0 &&
600 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
602 auto &slot = userDefinedInterface[scalarSlot];
605 slot.NoPerspective = d.NoPerspective;
606 slot.Centroid = d.Centroid;
611 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
613 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
616 case spv::ExecutionModeEarlyFragmentTests:
617 modes.EarlyFragmentTests = true;
619 case spv::ExecutionModeDepthReplacing:
620 modes.DepthReplacing = true;
622 case spv::ExecutionModeDepthGreater:
623 modes.DepthGreater = true;
625 case spv::ExecutionModeDepthLess:
626 modes.DepthLess = true;
628 case spv::ExecutionModeDepthUnchanged:
629 modes.DepthUnchanged = true;
631 case spv::ExecutionModeLocalSize:
632 modes.WorkgroupSizeX = insn.word(3);
633 modes.WorkgroupSizeY = insn.word(4);
634 modes.WorkgroupSizeZ = insn.word(5);
636 case spv::ExecutionModeOriginUpperLeft:
637 // This is always the case for a Vulkan shader. Do nothing.
640 UNIMPLEMENTED("No other execution modes are permitted");
644 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
646 // Types are always built from the bottom up (with the exception of forward ptrs, which
647 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
648 // already been described (and so their sizes determined)
649 switch (insn.opcode())
651 case spv::OpTypeVoid:
652 case spv::OpTypeSampler:
653 case spv::OpTypeImage:
654 case spv::OpTypeSampledImage:
655 case spv::OpTypeFunction:
656 case spv::OpTypeRuntimeArray:
657 // Objects that don't consume any space.
658 // Descriptor-backed objects currently only need exist at compile-time.
659 // Runtime arrays don't appear in places where their size would be interesting
662 case spv::OpTypeBool:
663 case spv::OpTypeFloat:
665 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
666 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
669 case spv::OpTypeVector:
670 case spv::OpTypeMatrix:
671 // Vectors and matrices both consume element count * element size.
672 return getType(insn.word(2)).sizeInComponents * insn.word(3);
674 case spv::OpTypeArray:
676 // Element count * element size. Array sizes come from constant ids.
677 auto arraySize = GetConstantInt(insn.word(3));
678 return getType(insn.word(2)).sizeInComponents * arraySize;
681 case spv::OpTypeStruct:
684 for (uint32_t i = 2u; i < insn.wordCount(); i++)
686 size += getType(insn.word(i)).sizeInComponents;
691 case spv::OpTypePointer:
692 // Runtime representation of a pointer is a per-lane index.
693 // Note: clients are expected to look through the pointer if they want the pointee size instead.
697 // Some other random insn.
698 UNIMPLEMENTED("Only types are supported");
703 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
705 switch (storageClass)
707 case spv::StorageClassUniform:
708 case spv::StorageClassStorageBuffer:
709 case spv::StorageClassPushConstant:
717 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
719 // Recursively walks variable definition and its type tree, taking into account
720 // any explicit Location or Component decorations encountered; where explicit
721 // Locations or Components are not specified, assigns them sequentially.
722 // Collected decorations are carried down toward the leaves and across
723 // siblings; Effect of decorations intentionally does not flow back up the tree.
725 // F is a functor to be called with the effective decoration set for every component.
727 // Returns the next available location, and calls f().
729 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
731 ApplyDecorationsForId(&d, id);
733 auto const &obj = getType(id);
736 case spv::OpTypePointer:
737 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
738 case spv::OpTypeMatrix:
739 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
741 // consumes same components of N consecutive locations
742 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
745 case spv::OpTypeVector:
746 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
748 // consumes N consecutive components in the same location
749 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
751 return d.Location + 1;
752 case spv::OpTypeFloat:
753 f(d, ATTRIBTYPE_FLOAT);
754 return d.Location + 1;
756 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
757 return d.Location + 1;
758 case spv::OpTypeBool:
759 f(d, ATTRIBTYPE_UINT);
760 return d.Location + 1;
761 case spv::OpTypeStruct:
763 // iterate over members, which may themselves have Location/Component decorations
764 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
766 ApplyDecorationsForIdMember(&d, id, i);
767 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
768 d.Component = 0; // Implicit locations always have component=0
772 case spv::OpTypeArray:
774 auto arraySize = GetConstantInt(obj.definition.word(3));
775 for (auto i = 0u; i < arraySize; i++)
777 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
782 // Intentionally partial; most opcodes do not participate in type hierarchies
788 void SpirvShader::VisitInterface(Object::ID id, F f) const
790 // Walk a variable definition and call f for each component in it.
792 ApplyDecorationsForId(&d, id);
794 auto def = getObject(id).definition;
795 ASSERT(def.opcode() == spv::OpVariable);
796 VisitInterfaceInner<F>(def.word(1), d, f);
799 SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
801 // Produce a offset into external memory in sizeof(float) units
803 int constantOffset = 0;
804 SIMD::Int dynamicOffset = SIMD::Int(0);
805 auto &baseObject = getObject(id);
806 Type::ID typeId = getType(baseObject.type).element;
808 ApplyDecorationsForId(&d, baseObject.type);
810 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
811 // Start with its offset and build from there.
812 if (baseObject.kind == Object::Kind::Value)
814 dynamicOffset += routine->getIntermediate(id).Int(0);
817 for (auto i = 0u; i < numIndexes; i++)
819 auto & type = getType(typeId);
820 switch (type.definition.opcode())
822 case spv::OpTypeStruct:
824 int memberIndex = GetConstantInt(indexIds[i]);
825 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
827 constantOffset += d.Offset / sizeof(float);
828 typeId = type.definition.word(2u + memberIndex);
831 case spv::OpTypeArray:
832 case spv::OpTypeRuntimeArray:
834 // TODO: b/127950082: Check bounds.
835 ApplyDecorationsForId(&d, typeId);
836 ASSERT(d.HasArrayStride);
837 auto & obj = getObject(indexIds[i]);
838 if (obj.kind == Object::Kind::Constant)
839 constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
841 dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
842 typeId = type.element;
845 case spv::OpTypeMatrix:
847 // TODO: b/127950082: Check bounds.
848 ApplyDecorationsForId(&d, typeId);
849 ASSERT(d.HasMatrixStride);
850 auto & obj = getObject(indexIds[i]);
851 if (obj.kind == Object::Kind::Constant)
852 constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
854 dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
855 typeId = type.element;
858 case spv::OpTypeVector:
860 auto & obj = getObject(indexIds[i]);
861 if (obj.kind == Object::Kind::Constant)
862 constantOffset += GetConstantInt(indexIds[i]);
864 dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
865 typeId = type.element;
869 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
873 return dynamicOffset + SIMD::Int(constantOffset);
876 SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
878 // TODO: avoid doing per-lane work in some cases if we can?
879 // Produce a *component* offset into location-oriented memory
881 int constantOffset = 0;
882 SIMD::Int dynamicOffset = SIMD::Int(0);
883 auto &baseObject = getObject(id);
884 Type::ID typeId = getType(baseObject.type).element;
886 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
887 // Start with its offset and build from there.
888 if (baseObject.kind == Object::Kind::Value)
890 dynamicOffset += routine->getIntermediate(id).Int(0);
893 for (auto i = 0u; i < numIndexes; i++)
895 auto & type = getType(typeId);
896 switch(type.opcode())
898 case spv::OpTypeStruct:
900 int memberIndex = GetConstantInt(indexIds[i]);
901 int offsetIntoStruct = 0;
902 for (auto j = 0; j < memberIndex; j++) {
903 auto memberType = type.definition.word(2u + j);
904 offsetIntoStruct += getType(memberType).sizeInComponents;
906 constantOffset += offsetIntoStruct;
907 typeId = type.definition.word(2u + memberIndex);
911 case spv::OpTypeVector:
912 case spv::OpTypeMatrix:
913 case spv::OpTypeArray:
914 case spv::OpTypeRuntimeArray:
916 // TODO: b/127950082: Check bounds.
917 auto stride = getType(type.element).sizeInComponents;
918 auto & obj = getObject(indexIds[i]);
919 if (obj.kind == Object::Kind::Constant)
920 constantOffset += stride * GetConstantInt(indexIds[i]);
922 dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
923 typeId = type.element;
928 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
932 return dynamicOffset + SIMD::Int(constantOffset);
935 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
937 uint32_t constantOffset = 0;
939 for (auto i = 0u; i < numIndexes; i++)
941 auto & type = getType(typeId);
942 switch(type.opcode())
944 case spv::OpTypeStruct:
946 int memberIndex = indexes[i];
947 int offsetIntoStruct = 0;
948 for (auto j = 0; j < memberIndex; j++) {
949 auto memberType = type.definition.word(2u + j);
950 offsetIntoStruct += getType(memberType).sizeInComponents;
952 constantOffset += offsetIntoStruct;
953 typeId = type.definition.word(2u + memberIndex);
957 case spv::OpTypeVector:
958 case spv::OpTypeMatrix:
959 case spv::OpTypeArray:
961 auto elementType = type.definition.word(2);
962 auto stride = getType(elementType).sizeInComponents;
963 constantOffset += stride * indexes[i];
964 typeId = elementType;
969 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
973 return constantOffset;
976 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
980 case spv::DecorationLocation:
982 Location = static_cast<int32_t>(arg);
984 case spv::DecorationComponent:
988 case spv::DecorationDescriptorSet:
989 HasDescriptorSet = true;
992 case spv::DecorationBinding:
996 case spv::DecorationBuiltIn:
998 BuiltIn = static_cast<spv::BuiltIn>(arg);
1000 case spv::DecorationFlat:
1003 case spv::DecorationNoPerspective:
1004 NoPerspective = true;
1006 case spv::DecorationCentroid:
1009 case spv::DecorationBlock:
1012 case spv::DecorationBufferBlock:
1015 case spv::DecorationOffset:
1017 Offset = static_cast<int32_t>(arg);
1019 case spv::DecorationArrayStride:
1020 HasArrayStride = true;
1021 ArrayStride = static_cast<int32_t>(arg);
1023 case spv::DecorationMatrixStride:
1024 HasMatrixStride = true;
1025 MatrixStride = static_cast<int32_t>(arg);
1028 // Intentionally partial, there are many decorations we just don't care about.
1033 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1035 // Apply a decoration group to this set of decorations
1039 BuiltIn = src.BuiltIn;
1042 if (src.HasLocation)
1045 Location = src.Location;
1048 if (src.HasComponent)
1050 HasComponent = true;
1051 Component = src.Component;
1054 if (src.HasDescriptorSet)
1056 HasDescriptorSet = true;
1057 DescriptorSet = src.DescriptorSet;
1063 Binding = src.Binding;
1069 Offset = src.Offset;
1072 if (src.HasArrayStride)
1074 HasArrayStride = true;
1075 ArrayStride = src.ArrayStride;
1078 if (src.HasMatrixStride)
1080 HasMatrixStride = true;
1081 MatrixStride = src.MatrixStride;
1085 NoPerspective |= src.NoPerspective;
1086 Centroid |= src.Centroid;
1088 BufferBlock |= src.BufferBlock;
1091 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1093 auto it = decorations.find(id);
1094 if (it != decorations.end())
1095 d->Apply(it->second);
1098 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1100 auto it = memberDecorations.find(id);
1101 if (it != memberDecorations.end() && member < it->second.size())
1103 d->Apply(it->second[member]);
1107 uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1109 // Slightly hackish access to constants very early in translation.
1110 // General consumption of constants by other instructions should
1111 // probably be just lowered to Reactor.
1113 // TODO: not encountered yet since we only use this for array sizes etc,
1114 // but is possible to construct integer constant 0 via OpConstantNull.
1115 auto insn = getObject(id).definition;
1116 ASSERT(insn.opcode() == spv::OpConstant);
1117 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1118 return insn.word(3);
1123 void SpirvShader::emitProlog(SpirvRoutine *routine) const
1125 for (auto insn : *this)
1127 switch (insn.opcode())
1129 case spv::OpVariable:
1131 Type::ID resultPointerTypeId = insn.word(1);
1132 auto resultPointerType = getType(resultPointerTypeId);
1133 auto pointeeType = getType(resultPointerType.element);
1135 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
1137 Object::ID resultId = insn.word(2);
1138 routine->createLvalue(resultId, pointeeType.sizeInComponents);
1143 // Nothing else produces interface variables, so can all be safely ignored.
1149 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1152 state.setActiveLaneMask(activeLaneMask);
1153 state.routine = routine;
1155 // Emit everything up to the first label
1156 // TODO: Separate out dispatch of block from non-block instructions?
1157 for (auto insn : *this)
1159 if (insn.opcode() == spv::OpLabel)
1163 EmitInstruction(insn, &state);
1166 // Emit all the blocks in BFS order, starting with the main block.
1167 std::queue<Block::ID> pending;
1168 pending.push(mainBlockId);
1169 while (pending.size() > 0)
1171 auto id = pending.front();
1173 if (state.visited.count(id) == 0)
1175 EmitBlock(id, &state);
1176 for (auto it : getBlock(id).outs)
1184 void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1186 if (state->visited.count(id) > 0)
1188 return; // Already processed this block.
1191 state->visited.emplace(id);
1193 auto &block = getBlock(id);
1198 case Block::StructuredBranchConditional:
1199 case Block::UnstructuredBranchConditional:
1200 case Block::StructuredSwitch:
1201 case Block::UnstructuredSwitch:
1202 if (id != mainBlockId)
1204 // Emit all preceding blocks and set the activeLaneMask.
1205 Intermediate activeLaneMask(1);
1206 activeLaneMask.move(0, SIMD::Int(0));
1207 for (auto in : block.ins)
1209 EmitBlock(in, state);
1210 auto inMask = state->getActiveLaneMaskEdge(in, id);
1211 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1213 state->setActiveLaneMask(activeLaneMask.Int(0));
1215 state->currentBlock = id;
1216 EmitInstructions(block.begin(), block.end(), state);
1220 state->currentBlock = id;
1225 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
1229 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1231 for (auto insn = begin; insn != end; insn++)
1233 auto res = EmitInstruction(insn, state);
1236 case EmitResult::Continue:
1238 case EmitResult::Terminator:
1241 UNREACHABLE("Unexpected EmitResult %d", int(res));
1247 void SpirvShader::EmitLoop(EmitState *state) const
1249 auto blockId = state->currentBlock;
1250 auto block = getBlock(blockId);
1252 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
1253 // This is initialized with the incoming active lane masks.
1254 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
1255 for (auto in : block.ins)
1257 if (!existsPath(blockId, in)) // if not a loop back edge
1259 EmitBlock(in, state);
1260 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1264 // Generate an alloca for each of the loop's phis.
1265 // These will be primed with the incoming, non back edge Phi values
1266 // before the loop, and then updated just before the loop jumps back to
1270 Object::ID phiId; // The Phi identifier.
1271 Object::ID continueValue; // The source merge value from the loop.
1272 Array<SIMD::Int> storage; // The alloca.
1275 std::vector<LoopPhi> phis;
1277 // For each OpPhi between the block start and the merge instruction:
1278 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
1280 if (insn.opcode() == spv::OpPhi)
1282 auto objectId = Object::ID(insn.word(2));
1283 auto &object = getObject(objectId);
1284 auto &type = getType(object.type);
1287 phi.phiId = Object::ID(insn.word(2));
1288 phi.storage = Array<SIMD::Int>(type.sizeInComponents);
1290 // Start with the Phi set to 0.
1291 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1293 phi.storage[i] = SIMD::Int(0);
1296 // For each Phi source:
1297 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
1299 auto varId = Object::ID(insn.word(w + 0));
1300 auto blockId = Block::ID(insn.word(w + 1));
1301 if (existsPath(state->currentBlock, blockId))
1303 // This source is from a loop back-edge.
1304 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
1305 phi.continueValue = varId;
1309 // This source is from a preceding block.
1310 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1312 auto in = GenericValue(this, state->routine, varId);
1313 auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
1314 phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
1319 phis.push_back(phi);
1323 // Create the loop basic blocks
1324 auto headerBasicBlock = Nucleus::createBasicBlock();
1325 auto mergeBasicBlock = Nucleus::createBasicBlock();
1327 // Start emitting code inside the loop.
1328 Nucleus::createBr(headerBasicBlock);
1329 Nucleus::setInsertBlock(headerBasicBlock);
1331 // Load the Phi values from storage.
1332 // This will load at the start of each loop.
1333 for (auto &phi : phis)
1335 auto &type = getType(getObject(phi.phiId).type);
1336 auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
1337 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1339 dst.move(i, phi.storage[i]);
1343 // Load the active lane mask.
1344 state->setActiveLaneMask(loopActiveLaneMask);
1346 // Emit all the non-phi instructions in this loop header block.
1347 for (auto insn = block.begin(); insn != block.end(); insn++)
1349 if (insn.opcode() != spv::OpPhi)
1351 EmitInstruction(insn, state);
1355 // Emit all the back-edge blocks and use their active lane masks to
1356 // rebuild the loopActiveLaneMask.
1357 loopActiveLaneMask = SIMD::Int(0);
1358 for (auto in : block.ins)
1360 if (existsPath(blockId, in))
1362 EmitBlock(in, state);
1363 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1367 // Update loop phi values
1368 for (auto &phi : phis)
1370 if (phi.continueValue != 0)
1372 auto val = GenericValue(this, state->routine, phi.continueValue);
1373 auto &type = getType(getObject(phi.phiId).type);
1374 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1376 phi.storage[i] = val.Int(i);
1381 // Loop body now done.
1382 // If any lanes are still active, jump back to the loop header,
1383 // otherwise jump to the merge block.
1384 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
1386 // Emit the merge block, and we're done.
1387 Nucleus::setInsertBlock(mergeBasicBlock);
1388 EmitBlock(block.mergeBlock, state);
1391 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1393 switch (insn.opcode())
1395 case spv::OpTypeVoid:
1396 case spv::OpTypeInt:
1397 case spv::OpTypeFloat:
1398 case spv::OpTypeBool:
1399 case spv::OpTypeVector:
1400 case spv::OpTypeArray:
1401 case spv::OpTypeRuntimeArray:
1402 case spv::OpTypeMatrix:
1403 case spv::OpTypeStruct:
1404 case spv::OpTypePointer:
1405 case spv::OpTypeFunction:
1406 case spv::OpExecutionMode:
1407 case spv::OpMemoryModel:
1408 case spv::OpFunction:
1409 case spv::OpFunctionEnd:
1410 case spv::OpConstant:
1411 case spv::OpConstantNull:
1412 case spv::OpConstantTrue:
1413 case spv::OpConstantFalse:
1414 case spv::OpConstantComposite:
1416 case spv::OpExtension:
1417 case spv::OpCapability:
1418 case spv::OpEntryPoint:
1419 case spv::OpExtInstImport:
1420 case spv::OpDecorate:
1421 case spv::OpMemberDecorate:
1422 case spv::OpGroupDecorate:
1423 case spv::OpGroupMemberDecorate:
1424 case spv::OpDecorationGroup:
1426 case spv::OpMemberName:
1428 case spv::OpSourceContinued:
1429 case spv::OpSourceExtension:
1432 case spv::OpModuleProcessed:
1434 // Nothing to do at emit time. These are either fully handled at analysis time,
1435 // or don't require any work at all.
1436 return EmitResult::Continue;
1439 return EmitResult::Continue;
1441 case spv::OpVariable:
1442 return EmitVariable(insn, state);
1445 case spv::OpAtomicLoad:
1446 return EmitLoad(insn, state);
1449 case spv::OpAtomicStore:
1450 return EmitStore(insn, state);
1452 case spv::OpAccessChain:
1453 case spv::OpInBoundsAccessChain:
1454 return EmitAccessChain(insn, state);
1456 case spv::OpCompositeConstruct:
1457 return EmitCompositeConstruct(insn, state);
1459 case spv::OpCompositeInsert:
1460 return EmitCompositeInsert(insn, state);
1462 case spv::OpCompositeExtract:
1463 return EmitCompositeExtract(insn, state);
1465 case spv::OpVectorShuffle:
1466 return EmitVectorShuffle(insn, state);
1468 case spv::OpVectorExtractDynamic:
1469 return EmitVectorExtractDynamic(insn, state);
1471 case spv::OpVectorInsertDynamic:
1472 return EmitVectorInsertDynamic(insn, state);
1474 case spv::OpVectorTimesScalar:
1475 case spv::OpMatrixTimesScalar:
1476 return EmitVectorTimesScalar(insn, state);
1478 case spv::OpMatrixTimesVector:
1479 return EmitMatrixTimesVector(insn, state);
1481 case spv::OpVectorTimesMatrix:
1482 return EmitVectorTimesMatrix(insn, state);
1484 case spv::OpMatrixTimesMatrix:
1485 return EmitMatrixTimesMatrix(insn, state);
1488 case spv::OpSNegate:
1489 case spv::OpFNegate:
1490 case spv::OpLogicalNot:
1491 case spv::OpConvertFToU:
1492 case spv::OpConvertFToS:
1493 case spv::OpConvertSToF:
1494 case spv::OpConvertUToF:
1495 case spv::OpBitcast:
1499 case spv::OpDPdxCoarse:
1501 case spv::OpDPdyCoarse:
1503 case spv::OpFwidthCoarse:
1504 case spv::OpDPdxFine:
1505 case spv::OpDPdyFine:
1506 case spv::OpFwidthFine:
1507 return EmitUnaryOp(insn, state);
1520 case spv::OpFOrdEqual:
1521 case spv::OpFUnordEqual:
1522 case spv::OpFOrdNotEqual:
1523 case spv::OpFUnordNotEqual:
1524 case spv::OpFOrdLessThan:
1525 case spv::OpFUnordLessThan:
1526 case spv::OpFOrdGreaterThan:
1527 case spv::OpFUnordGreaterThan:
1528 case spv::OpFOrdLessThanEqual:
1529 case spv::OpFUnordLessThanEqual:
1530 case spv::OpFOrdGreaterThanEqual:
1531 case spv::OpFUnordGreaterThanEqual:
1536 case spv::OpINotEqual:
1537 case spv::OpUGreaterThan:
1538 case spv::OpSGreaterThan:
1539 case spv::OpUGreaterThanEqual:
1540 case spv::OpSGreaterThanEqual:
1541 case spv::OpULessThan:
1542 case spv::OpSLessThan:
1543 case spv::OpULessThanEqual:
1544 case spv::OpSLessThanEqual:
1545 case spv::OpShiftRightLogical:
1546 case spv::OpShiftRightArithmetic:
1547 case spv::OpShiftLeftLogical:
1548 case spv::OpBitwiseOr:
1549 case spv::OpBitwiseXor:
1550 case spv::OpBitwiseAnd:
1551 case spv::OpLogicalOr:
1552 case spv::OpLogicalAnd:
1553 case spv::OpLogicalEqual:
1554 case spv::OpLogicalNotEqual:
1555 case spv::OpUMulExtended:
1556 case spv::OpSMulExtended:
1557 return EmitBinaryOp(insn, state);
1560 return EmitDot(insn, state);
1563 return EmitSelect(insn, state);
1565 case spv::OpExtInst:
1566 return EmitExtendedInstruction(insn, state);
1569 return EmitAny(insn, state);
1572 return EmitAll(insn, state);
1575 return EmitBranch(insn, state);
1578 return EmitPhi(insn, state);
1580 case spv::OpSelectionMerge:
1581 case spv::OpLoopMerge:
1582 return EmitResult::Continue;
1584 case spv::OpBranchConditional:
1585 return EmitBranchConditional(insn, state);
1588 return EmitSwitch(insn, state);
1590 case spv::OpUnreachable:
1591 return EmitUnreachable(insn, state);
1594 return EmitReturn(insn, state);
1597 UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1601 return EmitResult::Continue;
1604 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1606 auto routine = state->routine;
1607 Object::ID resultId = insn.word(2);
1608 auto &object = getObject(resultId);
1609 auto &objectTy = getType(object.type);
1610 switch (objectTy.storageClass)
1612 case spv::StorageClassInput:
1614 if (object.kind == Object::Kind::InterfaceVariable)
1616 auto &dst = routine->getValue(resultId);
1618 VisitInterface(resultId,
1619 [&](Decorations const &d, AttribType type) {
1620 auto scalarSlot = d.Location << 2 | d.Component;
1621 dst[offset++] = routine->inputs[scalarSlot];
1626 case spv::StorageClassUniform:
1627 case spv::StorageClassStorageBuffer:
1630 ApplyDecorationsForId(&d, resultId);
1631 ASSERT(d.DescriptorSet >= 0);
1632 ASSERT(d.Binding >= 0);
1634 size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1636 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1637 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1638 Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1639 Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1640 Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1641 Pointer<Byte> address = data + offset;
1642 routine->physicalPointers[resultId] = address;
1645 case spv::StorageClassPushConstant:
1647 routine->physicalPointers[resultId] = routine->pushConstants;
1654 return EmitResult::Continue;
1657 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1659 auto routine = state->routine;
1660 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1661 Object::ID resultId = insn.word(2);
1662 Object::ID pointerId = insn.word(3);
1663 auto &result = getObject(resultId);
1664 auto &resultTy = getType(result.type);
1665 auto &pointer = getObject(pointerId);
1666 auto &pointerBase = getObject(pointer.pointerBase);
1667 auto &pointerBaseTy = getType(pointerBase.type);
1668 std::memory_order memoryOrder = std::memory_order_relaxed;
1672 Object::ID semanticsId = insn.word(5);
1673 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1674 memoryOrder = MemoryOrder(memorySemantics);
1677 ASSERT(getType(pointer.type).element == result.type);
1678 ASSERT(Type::ID(insn.word(1)) == result.type);
1679 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1681 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1683 UNIMPLEMENTED("StorageClassImage load not yet implemented");
1686 Pointer<Float> ptrBase;
1687 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1689 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1693 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1696 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1697 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1699 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1701 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1703 // Divergent offsets or masked lanes.
1704 auto offsets = pointer.kind == Object::Kind::Value ?
1705 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1706 RValue<SIMD::Int>(SIMD::Int(0));
1707 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1709 // i wish i had a Float,Float,Float,Float constructor here..
1710 for (int j = 0; j < SIMD::Width; j++)
1712 If(Extract(state->activeLaneMask(), j) != 0)
1714 Int offset = Int(i) + Extract(offsets, j);
1715 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1716 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1723 // No divergent offsets or masked lanes.
1724 if (interleavedByLane)
1726 // Lane-interleaved data.
1727 Pointer<SIMD::Float> src = ptrBase;
1728 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1730 load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1735 // Non-interleaved data.
1736 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1738 load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder)); // TODO: optimize alignment
1743 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1744 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1746 dst.move(i, load[i]);
1749 return EmitResult::Continue;
1752 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1754 auto routine = state->routine;
1755 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1756 Object::ID pointerId = insn.word(1);
1757 Object::ID objectId = insn.word(atomic ? 4 : 2);
1758 auto &object = getObject(objectId);
1759 auto &pointer = getObject(pointerId);
1760 auto &pointerTy = getType(pointer.type);
1761 auto &elementTy = getType(pointerTy.element);
1762 auto &pointerBase = getObject(pointer.pointerBase);
1763 auto &pointerBaseTy = getType(pointerBase.type);
1764 std::memory_order memoryOrder = std::memory_order_relaxed;
1768 Object::ID semanticsId = insn.word(3);
1769 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1770 memoryOrder = MemoryOrder(memorySemantics);
1773 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1775 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1777 UNIMPLEMENTED("StorageClassImage store not yet implemented");
1780 Pointer<Float> ptrBase;
1781 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1783 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1787 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1790 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1791 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1793 if (object.kind == Object::Kind::Constant)
1795 // Constant source data.
1796 auto src = reinterpret_cast<float *>(object.constantValue.get());
1797 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1799 // Divergent offsets or masked lanes.
1800 auto offsets = pointer.kind == Object::Kind::Value ?
1801 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1802 RValue<SIMD::Int>(SIMD::Int(0));
1803 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1805 for (int j = 0; j < SIMD::Width; j++)
1807 If(Extract(state->activeLaneMask(), j) != 0)
1809 Int offset = Int(i) + Extract(offsets, j);
1810 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1811 Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1818 // Constant source data.
1819 // No divergent offsets or masked lanes.
1820 Pointer<SIMD::Float> dst = ptrBase;
1821 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1823 Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1829 // Intermediate source data.
1830 auto &src = routine->getIntermediate(objectId);
1831 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1833 // Divergent offsets or masked lanes.
1834 auto offsets = pointer.kind == Object::Kind::Value ?
1835 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1836 RValue<SIMD::Int>(SIMD::Int(0));
1837 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1839 for (int j = 0; j < SIMD::Width; j++)
1841 If(Extract(state->activeLaneMask(), j) != 0)
1843 Int offset = Int(i) + Extract(offsets, j);
1844 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1845 Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1852 // No divergent offsets or masked lanes.
1853 if (interleavedByLane)
1855 // Lane-interleaved data.
1856 Pointer<SIMD::Float> dst = ptrBase;
1857 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1859 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1864 // Intermediate source data. Non-interleaved data.
1865 Pointer<SIMD::Float> dst = ptrBase;
1866 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1868 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1874 return EmitResult::Continue;
1877 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1879 auto routine = state->routine;
1880 Type::ID typeId = insn.word(1);
1881 Object::ID resultId = insn.word(2);
1882 Object::ID baseId = insn.word(3);
1883 uint32_t numIndexes = insn.wordCount() - 4;
1884 const uint32_t *indexes = insn.wordPointer(4);
1885 auto &type = getType(typeId);
1886 ASSERT(type.sizeInComponents == 1);
1887 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1889 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1891 if(type.storageClass == spv::StorageClassPushConstant ||
1892 type.storageClass == spv::StorageClassUniform ||
1893 type.storageClass == spv::StorageClassStorageBuffer)
1895 dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1899 dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1902 return EmitResult::Continue;
1905 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1907 auto routine = state->routine;
1908 auto &type = getType(insn.word(1));
1909 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1912 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1914 Object::ID srcObjectId = insn.word(3u + i);
1915 auto & srcObject = getObject(srcObjectId);
1916 auto & srcObjectTy = getType(srcObject.type);
1917 GenericValue srcObjectAccess(this, routine, srcObjectId);
1919 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1921 dst.move(offset++, srcObjectAccess.Float(j));
1925 return EmitResult::Continue;
1928 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1930 auto routine = state->routine;
1931 Type::ID resultTypeId = insn.word(1);
1932 auto &type = getType(resultTypeId);
1933 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1934 auto &newPartObject = getObject(insn.word(3));
1935 auto &newPartObjectTy = getType(newPartObject.type);
1936 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1938 GenericValue srcObjectAccess(this, routine, insn.word(4));
1939 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1941 // old components before
1942 for (auto i = 0u; i < firstNewComponent; i++)
1944 dst.move(i, srcObjectAccess.Float(i));
1947 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1949 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1951 // old components after
1952 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1954 dst.move(i, srcObjectAccess.Float(i));
1957 return EmitResult::Continue;
1960 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1962 auto routine = state->routine;
1963 auto &type = getType(insn.word(1));
1964 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1965 auto &compositeObject = getObject(insn.word(3));
1966 Type::ID compositeTypeId = compositeObject.definition.word(1);
1967 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1969 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1970 for (auto i = 0u; i < type.sizeInComponents; i++)
1972 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1975 return EmitResult::Continue;
1978 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1980 auto routine = state->routine;
1981 auto &type = getType(insn.word(1));
1982 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1984 // Note: number of components in result type, first half type, and second
1985 // half type are all independent.
1986 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1988 GenericValue firstHalfAccess(this, routine, insn.word(3));
1989 GenericValue secondHalfAccess(this, routine, insn.word(4));
1991 for (auto i = 0u; i < type.sizeInComponents; i++)
1993 auto selector = insn.word(5 + i);
1994 if (selector == static_cast<uint32_t>(-1))
1996 // Undefined value. Until we decide to do real undef values, zero is as good
1998 dst.move(i, RValue<SIMD::Float>(0.0f));
2000 else if (selector < firstHalfType.sizeInComponents)
2002 dst.move(i, firstHalfAccess.Float(selector));
2006 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
2010 return EmitResult::Continue;
2013 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2015 auto routine = state->routine;
2016 auto &type = getType(insn.word(1));
2017 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2018 auto &srcType = getType(getObject(insn.word(3)).type);
2020 GenericValue src(this, routine, insn.word(3));
2021 GenericValue index(this, routine, insn.word(4));
2023 SIMD::UInt v = SIMD::UInt(0);
2025 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2027 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2031 return EmitResult::Continue;
2034 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2036 auto routine = state->routine;
2037 auto &type = getType(insn.word(1));
2038 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2040 GenericValue src(this, routine, insn.word(3));
2041 GenericValue component(this, routine, insn.word(4));
2042 GenericValue index(this, routine, insn.word(5));
2044 for (auto i = 0u; i < type.sizeInComponents; i++)
2046 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2047 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2049 return EmitResult::Continue;
2052 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
2054 auto routine = state->routine;
2055 auto &type = getType(insn.word(1));
2056 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2057 auto lhs = GenericValue(this, routine, insn.word(3));
2058 auto rhs = GenericValue(this, routine, insn.word(4));
2060 for (auto i = 0u; i < type.sizeInComponents; i++)
2062 dst.move(i, lhs.Float(i) * rhs.Float(0));
2065 return EmitResult::Continue;
2068 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
2070 auto routine = state->routine;
2071 auto &type = getType(insn.word(1));
2072 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2073 auto lhs = GenericValue(this, routine, insn.word(3));
2074 auto rhs = GenericValue(this, routine, insn.word(4));
2075 auto rhsType = getType(getObject(insn.word(4)).type);
2077 for (auto i = 0u; i < type.sizeInComponents; i++)
2079 SIMD::Float v = lhs.Float(i) * rhs.Float(0);
2080 for (auto j = 1u; j < rhsType.sizeInComponents; j++)
2082 v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
2087 return EmitResult::Continue;
2090 SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
2092 auto routine = state->routine;
2093 auto &type = getType(insn.word(1));
2094 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2095 auto lhs = GenericValue(this, routine, insn.word(3));
2096 auto rhs = GenericValue(this, routine, insn.word(4));
2097 auto lhsType = getType(getObject(insn.word(3)).type);
2099 for (auto i = 0u; i < type.sizeInComponents; i++)
2101 SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
2102 for (auto j = 1u; j < lhsType.sizeInComponents; j++)
2104 v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
2109 return EmitResult::Continue;
2112 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
2114 auto routine = state->routine;
2115 auto &type = getType(insn.word(1));
2116 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2117 auto lhs = GenericValue(this, routine, insn.word(3));
2118 auto rhs = GenericValue(this, routine, insn.word(4));
2120 auto numColumns = type.definition.word(3);
2121 auto numRows = getType(type.definition.word(2)).definition.word(3);
2122 auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
2124 for (auto row = 0u; row < numRows; row++)
2126 for (auto col = 0u; col < numColumns; col++)
2128 SIMD::Float v = SIMD::Float(0);
2129 for (auto i = 0u; i < numAdds; i++)
2131 v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
2133 dst.move(numRows * col + row, v);
2137 return EmitResult::Continue;
2140 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
2142 auto routine = state->routine;
2143 auto &type = getType(insn.word(1));
2144 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2145 auto src = GenericValue(this, routine, insn.word(3));
2147 for (auto i = 0u; i < type.sizeInComponents; i++)
2149 switch (insn.opcode())
2152 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
2153 dst.move(i, ~src.UInt(i));
2155 case spv::OpSNegate:
2156 dst.move(i, -src.Int(i));
2158 case spv::OpFNegate:
2159 dst.move(i, -src.Float(i));
2161 case spv::OpConvertFToU:
2162 dst.move(i, SIMD::UInt(src.Float(i)));
2164 case spv::OpConvertFToS:
2165 dst.move(i, SIMD::Int(src.Float(i)));
2167 case spv::OpConvertSToF:
2168 dst.move(i, SIMD::Float(src.Int(i)));
2170 case spv::OpConvertUToF:
2171 dst.move(i, SIMD::Float(src.UInt(i)));
2173 case spv::OpBitcast:
2174 dst.move(i, src.Float(i));
2177 dst.move(i, IsInf(src.Float(i)));
2180 dst.move(i, IsNan(src.Float(i)));
2183 case spv::OpDPdxCoarse:
2184 // Derivative instructions: FS invocations are laid out like so:
2187 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
2188 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
2191 case spv::OpDPdyCoarse:
2192 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
2195 case spv::OpFwidthCoarse:
2196 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
2197 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
2199 case spv::OpDPdxFine:
2201 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2202 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2203 SIMD::Float v = SIMD::Float(firstRow);
2204 v = Insert(v, secondRow, 2);
2205 v = Insert(v, secondRow, 3);
2209 case spv::OpDPdyFine:
2211 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2212 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2213 SIMD::Float v = SIMD::Float(firstColumn);
2214 v = Insert(v, secondColumn, 1);
2215 v = Insert(v, secondColumn, 3);
2219 case spv::OpFwidthFine:
2221 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2222 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2223 SIMD::Float dpdx = SIMD::Float(firstRow);
2224 dpdx = Insert(dpdx, secondRow, 2);
2225 dpdx = Insert(dpdx, secondRow, 3);
2226 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2227 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2228 SIMD::Float dpdy = SIMD::Float(firstColumn);
2229 dpdy = Insert(dpdy, secondColumn, 1);
2230 dpdy = Insert(dpdy, secondColumn, 3);
2231 dst.move(i, Abs(dpdx) + Abs(dpdy));
2235 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
2239 return EmitResult::Continue;
2242 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
2244 auto routine = state->routine;
2245 auto &type = getType(insn.word(1));
2246 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2247 auto &lhsType = getType(getObject(insn.word(3)).type);
2248 auto lhs = GenericValue(this, routine, insn.word(3));
2249 auto rhs = GenericValue(this, routine, insn.word(4));
2251 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2253 switch (insn.opcode())
2256 dst.move(i, lhs.Int(i) + rhs.Int(i));
2259 dst.move(i, lhs.Int(i) - rhs.Int(i));
2262 dst.move(i, lhs.Int(i) * rhs.Int(i));
2266 SIMD::Int a = lhs.Int(i);
2267 SIMD::Int b = rhs.Int(i);
2268 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2269 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2275 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2276 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2281 SIMD::Int a = lhs.Int(i);
2282 SIMD::Int b = rhs.Int(i);
2283 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2284 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2290 SIMD::Int a = lhs.Int(i);
2291 SIMD::Int b = rhs.Int(i);
2292 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2293 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2295 // If a and b have opposite signs, the remainder operation takes
2296 // the sign from a but OpSMod is supposed to take the sign of b.
2297 // Adding b will ensure that the result has the correct sign and
2298 // that it is still congruent to a modulo b.
2300 // See also http://mathforum.org/library/drmath/view/52343.html
2301 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2302 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2303 dst.move(i, As<SIMD::Float>(fixedMod));
2308 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2309 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2313 case spv::OpLogicalEqual:
2314 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2316 case spv::OpINotEqual:
2317 case spv::OpLogicalNotEqual:
2318 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2320 case spv::OpUGreaterThan:
2321 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2323 case spv::OpSGreaterThan:
2324 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2326 case spv::OpUGreaterThanEqual:
2327 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2329 case spv::OpSGreaterThanEqual:
2330 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2332 case spv::OpULessThan:
2333 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2335 case spv::OpSLessThan:
2336 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2338 case spv::OpULessThanEqual:
2339 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2341 case spv::OpSLessThanEqual:
2342 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2345 dst.move(i, lhs.Float(i) + rhs.Float(i));
2348 dst.move(i, lhs.Float(i) - rhs.Float(i));
2351 dst.move(i, lhs.Float(i) * rhs.Float(i));
2354 dst.move(i, lhs.Float(i) / rhs.Float(i));
2357 // TODO(b/126873455): inaccurate for values greater than 2^24
2358 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2361 dst.move(i, lhs.Float(i) % rhs.Float(i));
2363 case spv::OpFOrdEqual:
2364 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2366 case spv::OpFUnordEqual:
2367 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2369 case spv::OpFOrdNotEqual:
2370 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2372 case spv::OpFUnordNotEqual:
2373 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2375 case spv::OpFOrdLessThan:
2376 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2378 case spv::OpFUnordLessThan:
2379 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2381 case spv::OpFOrdGreaterThan:
2382 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2384 case spv::OpFUnordGreaterThan:
2385 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2387 case spv::OpFOrdLessThanEqual:
2388 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2390 case spv::OpFUnordLessThanEqual:
2391 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2393 case spv::OpFOrdGreaterThanEqual:
2394 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2396 case spv::OpFUnordGreaterThanEqual:
2397 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2399 case spv::OpShiftRightLogical:
2400 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2402 case spv::OpShiftRightArithmetic:
2403 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2405 case spv::OpShiftLeftLogical:
2406 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2408 case spv::OpBitwiseOr:
2409 case spv::OpLogicalOr:
2410 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2412 case spv::OpBitwiseXor:
2413 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2415 case spv::OpBitwiseAnd:
2416 case spv::OpLogicalAnd:
2417 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2419 case spv::OpSMulExtended:
2420 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2421 // In our flat view then, component i is the i'th component of the first member;
2422 // component i + N is the i'th component of the second member.
2423 dst.move(i, lhs.Int(i) * rhs.Int(i));
2424 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2426 case spv::OpUMulExtended:
2427 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2428 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2431 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2435 return EmitResult::Continue;
2438 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2440 auto routine = state->routine;
2441 auto &type = getType(insn.word(1));
2442 ASSERT(type.sizeInComponents == 1);
2443 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2444 auto &lhsType = getType(getObject(insn.word(3)).type);
2445 auto lhs = GenericValue(this, routine, insn.word(3));
2446 auto rhs = GenericValue(this, routine, insn.word(4));
2448 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2449 return EmitResult::Continue;
2452 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2454 auto routine = state->routine;
2455 auto &type = getType(insn.word(1));
2456 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2457 auto cond = GenericValue(this, routine, insn.word(3));
2458 auto lhs = GenericValue(this, routine, insn.word(4));
2459 auto rhs = GenericValue(this, routine, insn.word(5));
2461 for (auto i = 0u; i < type.sizeInComponents; i++)
2463 dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i))); // FIXME: IfThenElse()
2466 return EmitResult::Continue;
2469 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2471 auto routine = state->routine;
2472 auto &type = getType(insn.word(1));
2473 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2474 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2476 switch (extInstIndex)
2478 case GLSLstd450FAbs:
2480 auto src = GenericValue(this, routine, insn.word(5));
2481 for (auto i = 0u; i < type.sizeInComponents; i++)
2483 dst.move(i, Abs(src.Float(i)));
2487 case GLSLstd450SAbs:
2489 auto src = GenericValue(this, routine, insn.word(5));
2490 for (auto i = 0u; i < type.sizeInComponents; i++)
2492 dst.move(i, Abs(src.Int(i)));
2496 case GLSLstd450Cross:
2498 auto lhs = GenericValue(this, routine, insn.word(5));
2499 auto rhs = GenericValue(this, routine, insn.word(6));
2500 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2501 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2502 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2505 case GLSLstd450Floor:
2507 auto src = GenericValue(this, routine, insn.word(5));
2508 for (auto i = 0u; i < type.sizeInComponents; i++)
2510 dst.move(i, Floor(src.Float(i)));
2514 case GLSLstd450Trunc:
2516 auto src = GenericValue(this, routine, insn.word(5));
2517 for (auto i = 0u; i < type.sizeInComponents; i++)
2519 dst.move(i, Trunc(src.Float(i)));
2523 case GLSLstd450Ceil:
2525 auto src = GenericValue(this, routine, insn.word(5));
2526 for (auto i = 0u; i < type.sizeInComponents; i++)
2528 dst.move(i, Ceil(src.Float(i)));
2532 case GLSLstd450Fract:
2534 auto src = GenericValue(this, routine, insn.word(5));
2535 for (auto i = 0u; i < type.sizeInComponents; i++)
2537 dst.move(i, Frac(src.Float(i)));
2541 case GLSLstd450Round:
2543 auto src = GenericValue(this, routine, insn.word(5));
2544 for (auto i = 0u; i < type.sizeInComponents; i++)
2546 dst.move(i, Round(src.Float(i)));
2550 case GLSLstd450RoundEven:
2552 auto src = GenericValue(this, routine, insn.word(5));
2553 for (auto i = 0u; i < type.sizeInComponents; i++)
2555 auto x = Round(src.Float(i));
2556 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2557 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2558 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2562 case GLSLstd450FMin:
2564 auto lhs = GenericValue(this, routine, insn.word(5));
2565 auto rhs = GenericValue(this, routine, insn.word(6));
2566 for (auto i = 0u; i < type.sizeInComponents; i++)
2568 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2572 case GLSLstd450FMax:
2574 auto lhs = GenericValue(this, routine, insn.word(5));
2575 auto rhs = GenericValue(this, routine, insn.word(6));
2576 for (auto i = 0u; i < type.sizeInComponents; i++)
2578 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2582 case GLSLstd450SMin:
2584 auto lhs = GenericValue(this, routine, insn.word(5));
2585 auto rhs = GenericValue(this, routine, insn.word(6));
2586 for (auto i = 0u; i < type.sizeInComponents; i++)
2588 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2592 case GLSLstd450SMax:
2594 auto lhs = GenericValue(this, routine, insn.word(5));
2595 auto rhs = GenericValue(this, routine, insn.word(6));
2596 for (auto i = 0u; i < type.sizeInComponents; i++)
2598 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2602 case GLSLstd450UMin:
2604 auto lhs = GenericValue(this, routine, insn.word(5));
2605 auto rhs = GenericValue(this, routine, insn.word(6));
2606 for (auto i = 0u; i < type.sizeInComponents; i++)
2608 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2612 case GLSLstd450UMax:
2614 auto lhs = GenericValue(this, routine, insn.word(5));
2615 auto rhs = GenericValue(this, routine, insn.word(6));
2616 for (auto i = 0u; i < type.sizeInComponents; i++)
2618 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2622 case GLSLstd450Step:
2624 auto edge = GenericValue(this, routine, insn.word(5));
2625 auto x = GenericValue(this, routine, insn.word(6));
2626 for (auto i = 0u; i < type.sizeInComponents; i++)
2628 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2632 case GLSLstd450SmoothStep:
2634 auto edge0 = GenericValue(this, routine, insn.word(5));
2635 auto edge1 = GenericValue(this, routine, insn.word(6));
2636 auto x = GenericValue(this, routine, insn.word(7));
2637 for (auto i = 0u; i < type.sizeInComponents; i++)
2639 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2640 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2641 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2645 case GLSLstd450FMix:
2647 auto x = GenericValue(this, routine, insn.word(5));
2648 auto y = GenericValue(this, routine, insn.word(6));
2649 auto a = GenericValue(this, routine, insn.word(7));
2650 for (auto i = 0u; i < type.sizeInComponents; i++)
2652 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2656 case GLSLstd450FClamp:
2658 auto x = GenericValue(this, routine, insn.word(5));
2659 auto minVal = GenericValue(this, routine, insn.word(6));
2660 auto maxVal = GenericValue(this, routine, insn.word(7));
2661 for (auto i = 0u; i < type.sizeInComponents; i++)
2663 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2667 case GLSLstd450SClamp:
2669 auto x = GenericValue(this, routine, insn.word(5));
2670 auto minVal = GenericValue(this, routine, insn.word(6));
2671 auto maxVal = GenericValue(this, routine, insn.word(7));
2672 for (auto i = 0u; i < type.sizeInComponents; i++)
2674 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2678 case GLSLstd450UClamp:
2680 auto x = GenericValue(this, routine, insn.word(5));
2681 auto minVal = GenericValue(this, routine, insn.word(6));
2682 auto maxVal = GenericValue(this, routine, insn.word(7));
2683 for (auto i = 0u; i < type.sizeInComponents; i++)
2685 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2689 case GLSLstd450FSign:
2691 auto src = GenericValue(this, routine, insn.word(5));
2692 for (auto i = 0u; i < type.sizeInComponents; i++)
2694 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2695 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2696 dst.move(i, neg | pos);
2700 case GLSLstd450SSign:
2702 auto src = GenericValue(this, routine, insn.word(5));
2703 for (auto i = 0u; i < type.sizeInComponents; i++)
2705 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2706 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2707 dst.move(i, neg | pos);
2711 case GLSLstd450Reflect:
2713 auto I = GenericValue(this, routine, insn.word(5));
2714 auto N = GenericValue(this, routine, insn.word(6));
2716 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2718 for (auto i = 0u; i < type.sizeInComponents; i++)
2720 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2724 case GLSLstd450Refract:
2726 auto I = GenericValue(this, routine, insn.word(5));
2727 auto N = GenericValue(this, routine, insn.word(6));
2728 auto eta = GenericValue(this, routine, insn.word(7));
2730 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2731 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2732 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2733 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2735 for (auto i = 0u; i < type.sizeInComponents; i++)
2737 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2741 case GLSLstd450FaceForward:
2743 auto N = GenericValue(this, routine, insn.word(5));
2744 auto I = GenericValue(this, routine, insn.word(6));
2745 auto Nref = GenericValue(this, routine, insn.word(7));
2747 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2748 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2750 for (auto i = 0u; i < type.sizeInComponents; i++)
2752 auto n = N.Float(i);
2753 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2757 case GLSLstd450Length:
2759 auto x = GenericValue(this, routine, insn.word(5));
2760 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2762 dst.move(0, Sqrt(d));
2765 case GLSLstd450Normalize:
2767 auto x = GenericValue(this, routine, insn.word(5));
2768 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2769 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2771 for (auto i = 0u; i < type.sizeInComponents; i++)
2773 dst.move(i, invLength * x.Float(i));
2777 case GLSLstd450Distance:
2779 auto p0 = GenericValue(this, routine, insn.word(5));
2780 auto p1 = GenericValue(this, routine, insn.word(6));
2781 auto p0Type = getType(getObject(insn.word(5)).type);
2783 // sqrt(dot(p0-p1, p0-p1))
2784 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2786 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2788 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2791 dst.move(0, Sqrt(d));
2795 UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2798 return EmitResult::Continue;
2801 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2803 switch(memorySemantics)
2805 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
2806 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
2807 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
2808 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
2809 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2811 UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2812 return std::memory_order_acq_rel;
2816 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2818 SIMD::Float d = x.Float(0) * y.Float(0);
2820 for (auto i = 1u; i < numComponents; i++)
2822 d += x.Float(i) * y.Float(i);
2828 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2830 auto routine = state->routine;
2831 auto &type = getType(insn.word(1));
2832 ASSERT(type.sizeInComponents == 1);
2833 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2834 auto &srcType = getType(getObject(insn.word(3)).type);
2835 auto src = GenericValue(this, routine, insn.word(3));
2837 SIMD::UInt result = src.UInt(0);
2839 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2841 result |= src.UInt(i);
2844 dst.move(0, result);
2845 return EmitResult::Continue;
2848 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2850 auto routine = state->routine;
2851 auto &type = getType(insn.word(1));
2852 ASSERT(type.sizeInComponents == 1);
2853 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2854 auto &srcType = getType(getObject(insn.word(3)).type);
2855 auto src = GenericValue(this, routine, insn.word(3));
2857 SIMD::UInt result = src.UInt(0);
2859 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2861 result &= src.UInt(i);
2864 dst.move(0, result);
2865 return EmitResult::Continue;
2868 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2870 auto target = Block::ID(insn.word(1));
2871 auto edge = Block::Edge{state->currentBlock, target};
2872 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2873 return EmitResult::Terminator;
2876 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2878 auto block = getBlock(state->currentBlock);
2879 ASSERT(block.branchInstruction == insn);
2881 auto condId = Object::ID(block.branchInstruction.word(1));
2882 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2883 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2885 auto cond = GenericValue(this, state->routine, condId);
2886 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2888 // TODO: Optimize for case where all lanes take same path.
2890 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2891 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2893 return EmitResult::Terminator;
2896 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2898 auto block = getBlock(state->currentBlock);
2899 ASSERT(block.branchInstruction == insn);
2901 auto selId = Object::ID(block.branchInstruction.word(1));
2903 auto sel = GenericValue(this, state->routine, selId);
2904 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2906 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2908 // TODO: Optimize for case where all lanes take same path.
2910 SIMD::Int defaultLaneMask = state->activeLaneMask();
2912 // Gather up the case label matches and calculate defaultLaneMask.
2913 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2914 caseLabelMatches.reserve(numCases);
2915 for (uint32_t i = 0; i < numCases; i++)
2917 auto label = block.branchInstruction.word(i * 2 + 3);
2918 auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2919 auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2920 state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2921 defaultLaneMask &= ~caseLabelMatch;
2924 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2925 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2927 return EmitResult::Terminator;
2930 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2932 // TODO: Log something in this case?
2933 state->setActiveLaneMask(SIMD::Int(0));
2934 return EmitResult::Terminator;
2937 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2939 state->setActiveLaneMask(SIMD::Int(0));
2940 return EmitResult::Terminator;
2943 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2945 auto routine = state->routine;
2946 auto typeId = Type::ID(insn.word(1));
2947 auto type = getType(typeId);
2948 auto objectId = Object::ID(insn.word(2));
2950 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2953 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2955 auto varId = Object::ID(insn.word(w + 0));
2956 auto blockId = Block::ID(insn.word(w + 1));
2958 auto in = GenericValue(this, routine, varId);
2959 auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2961 for (uint32_t i = 0; i < type.sizeInComponents; i++)
2963 auto inMasked = in.Int(i) & mask;
2964 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2969 return EmitResult::Continue;
2972 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2974 for (auto insn : *this)
2976 switch (insn.opcode())
2978 case spv::OpVariable:
2980 Object::ID resultId = insn.word(2);
2981 auto &object = getObject(resultId);
2982 auto &objectTy = getType(object.type);
2983 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2985 auto &dst = routine->getValue(resultId);
2987 VisitInterface(resultId,
2988 [&](Decorations const &d, AttribType type) {
2989 auto scalarSlot = d.Location << 2 | d.Component;
2990 routine->outputs[scalarSlot] = dst[offset++];
3001 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
3003 // Default to a Simple, this may change later.
3004 kind = Block::Simple;
3006 // Walk the instructions to find the last two of the block.
3007 InsnIterator insns[2];
3008 for (auto insn : *this)
3010 insns[0] = insns[1];
3014 switch (insns[1].opcode())
3017 branchInstruction = insns[1];
3018 outs.emplace(Block::ID(branchInstruction.word(1)));
3020 switch (insns[0].opcode())
3022 case spv::OpLoopMerge:
3024 mergeInstruction = insns[0];
3025 mergeBlock = Block::ID(mergeInstruction.word(1));
3026 continueTarget = Block::ID(mergeInstruction.word(2));
3030 kind = Block::Simple;
3035 case spv::OpBranchConditional:
3036 branchInstruction = insns[1];
3037 outs.emplace(Block::ID(branchInstruction.word(2)));
3038 outs.emplace(Block::ID(branchInstruction.word(3)));
3040 switch (insns[0].opcode())
3042 case spv::OpSelectionMerge:
3043 kind = StructuredBranchConditional;
3044 mergeInstruction = insns[0];
3045 mergeBlock = Block::ID(mergeInstruction.word(1));
3048 case spv::OpLoopMerge:
3050 mergeInstruction = insns[0];
3051 mergeBlock = Block::ID(mergeInstruction.word(1));
3052 continueTarget = Block::ID(mergeInstruction.word(2));
3056 kind = UnstructuredBranchConditional;
3062 branchInstruction = insns[1];
3063 outs.emplace(Block::ID(branchInstruction.word(2)));
3064 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
3066 outs.emplace(Block::ID(branchInstruction.word(w)));
3069 switch (insns[0].opcode())
3071 case spv::OpSelectionMerge:
3072 kind = StructuredSwitch;
3073 mergeInstruction = insns[0];
3074 mergeBlock = Block::ID(mergeInstruction.word(1));
3078 kind = UnstructuredSwitch;
3088 bool SpirvShader::existsPath(Block::ID from, Block::ID to) const
3090 // TODO: Optimize: This can be cached on the block.
3093 std::queue<Block::ID> pending;
3094 pending.emplace(from);
3096 while (pending.size() > 0)
3098 auto id = pending.front();
3100 for (auto out : getBlock(id).outs)
3102 if (seen.count(out) != 0) { continue; }
3103 if (out == to) { return true; }
3104 pending.emplace(out);
3112 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
3114 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
3117 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
3119 auto edge = Block::Edge{from, to};
3120 auto it = edgeActiveLaneMasks.find(edge);
3121 if (it == edgeActiveLaneMasks.end())
3123 edgeActiveLaneMasks.emplace(edge, mask);
3127 auto combined = it->second | mask;
3128 edgeActiveLaneMasks.erase(edge);
3129 edgeActiveLaneMasks.emplace(edge, combined);
3133 RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
3135 auto edge = Block::Edge{from, to};
3136 auto it = edgeActiveLaneMasks.find(edge);
3137 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
3141 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
3142 pipelineLayout(pipelineLayout)