1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
26 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
28 SpirvShader::SpirvShader(InsnStore const &insns)
29 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
30 outputs{MAX_INTERFACE_COMPONENTS},
31 serialID{serialCounter++}, modes{}
33 ASSERT(insns.size() > 0);
35 // Simplifying assumptions (to be satisfied by earlier transformations)
36 // - There is exactly one entrypoint in the module, and it's the one we want
37 // - The only input/output OpVariables present are those used by the entrypoint
39 // TODO: Add real support for control flow. For now, track whether we've seen
40 // a label or a return already (if so, the shader does things we will mishandle).
41 // We expect there to be one of each in a simple shader -- the first and last instruction
42 // of the entrypoint function.
43 bool seenLabel = false;
44 bool seenReturn = false;
46 for (auto insn : *this)
48 switch (insn.opcode())
50 case spv::OpExecutionMode:
51 ProcessExecutionMode(insn);
56 TypeOrObjectID targetId = insn.word(1);
57 auto decoration = static_cast<spv::Decoration>(insn.word(2));
58 decorations[targetId].Apply(
60 insn.wordCount() > 3 ? insn.word(3) : 0);
62 if (decoration == spv::DecorationCentroid)
63 modes.NeedsCentroid = true;
67 case spv::OpMemberDecorate:
69 TypeID targetId = insn.word(1);
70 auto memberIndex = insn.word(2);
71 auto &d = memberDecorations[targetId];
72 if (memberIndex >= d.size())
73 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
74 auto decoration = static_cast<spv::Decoration>(insn.word(3));
77 insn.wordCount() > 4 ? insn.word(4) : 0);
79 if (decoration == spv::DecorationCentroid)
80 modes.NeedsCentroid = true;
84 case spv::OpDecorationGroup:
85 // Nothing to do here. We don't need to record the definition of the group; we'll just have
86 // the bundle of decorations float around. If we were to ever walk the decorations directly,
87 // we might think about introducing this as a real Object.
90 case spv::OpGroupDecorate:
92 auto const &srcDecorations = decorations[insn.word(1)];
93 for (auto i = 2u; i < insn.wordCount(); i++)
95 // remaining operands are targets to apply the group to.
96 decorations[insn.word(i)].Apply(srcDecorations);
101 case spv::OpGroupMemberDecorate:
103 auto const &srcDecorations = decorations[insn.word(1)];
104 for (auto i = 2u; i < insn.wordCount(); i += 2)
106 // remaining operands are pairs of <id>, literal for members to apply to.
107 auto &d = memberDecorations[insn.word(i)];
108 auto memberIndex = insn.word(i + 1);
109 if (memberIndex >= d.size())
110 d.resize(memberIndex + 1); // on demand resize, see above...
111 d[memberIndex].Apply(srcDecorations);
118 UNIMPLEMENTED("Shader contains multiple labels, has control flow");
124 UNIMPLEMENTED("Shader contains multiple returns, has control flow");
128 case spv::OpTypeVoid:
129 case spv::OpTypeBool:
131 case spv::OpTypeFloat:
132 case spv::OpTypeVector:
133 case spv::OpTypeMatrix:
134 case spv::OpTypeImage:
135 case spv::OpTypeSampler:
136 case spv::OpTypeSampledImage:
137 case spv::OpTypeArray:
138 case spv::OpTypeRuntimeArray:
139 case spv::OpTypeStruct:
140 case spv::OpTypePointer:
141 case spv::OpTypeFunction:
145 case spv::OpVariable:
147 TypeID typeId = insn.word(1);
148 ObjectID resultId = insn.word(2);
149 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
150 if (insn.wordCount() > 4)
151 UNIMPLEMENTED("Variable initializers not yet supported");
153 auto &object = defs[resultId];
154 object.kind = Object::Kind::Variable;
155 object.definition = insn;
156 object.type = typeId;
157 object.pointerBase = insn.word(2); // base is itself
159 ASSERT(getType(typeId).storageClass == storageClass);
161 switch (storageClass)
163 case spv::StorageClassInput:
164 case spv::StorageClassOutput:
165 ProcessInterfaceVariable(object);
167 case spv::StorageClassUniform:
168 case spv::StorageClassStorageBuffer:
169 object.kind = Object::Kind::PhysicalPointer;
172 case spv::StorageClassPrivate:
173 case spv::StorageClassFunction:
174 break; // Correctly handled.
176 case spv::StorageClassUniformConstant:
177 case spv::StorageClassWorkgroup:
178 case spv::StorageClassCrossWorkgroup:
179 case spv::StorageClassGeneric:
180 case spv::StorageClassPushConstant:
181 case spv::StorageClassAtomicCounter:
182 case spv::StorageClassImage:
183 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
187 UNREACHABLE("Unexpected StorageClass"); // See Appendix A of the Vulkan spec.
193 case spv::OpConstant:
194 CreateConstant(insn).constantValue[0] = insn.word(3);
196 case spv::OpConstantFalse:
197 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
199 case spv::OpConstantTrue:
200 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
202 case spv::OpConstantNull:
204 // OpConstantNull forms a constant of arbitrary type, all zeros.
205 auto &object = CreateConstant(insn);
206 auto &objectTy = getType(object.type);
207 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
209 object.constantValue[i] = 0;
213 case spv::OpConstantComposite:
215 auto &object = CreateConstant(insn);
217 for (auto i = 0u; i < insn.wordCount() - 3; i++)
219 auto &constituent = getObject(insn.word(i + 3));
220 auto &constituentTy = getType(constituent.type);
221 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
222 object.constantValue[offset++] = constituent.constantValue[j];
227 case spv::OpCapability:
228 // Various capabilities will be declared, but none affect our code generation at this point.
229 case spv::OpMemoryModel:
230 // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
231 case spv::OpEntryPoint:
232 case spv::OpFunction:
233 case spv::OpFunctionEnd:
234 // Due to preprocessing, the entrypoint and its function provide no value.
236 case spv::OpExtInstImport:
237 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
238 // Valid shaders will not attempt to import any other instruction sets.
239 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
241 UNIMPLEMENTED("Only GLSL extended instruction set is supported");
245 case spv::OpMemberName:
247 case spv::OpSourceContinued:
248 case spv::OpSourceExtension:
251 case spv::OpModuleProcessed:
253 // No semantic impact
256 case spv::OpFunctionParameter:
257 case spv::OpFunctionCall:
258 case spv::OpSpecConstant:
259 case spv::OpSpecConstantComposite:
260 case spv::OpSpecConstantFalse:
261 case spv::OpSpecConstantOp:
262 case spv::OpSpecConstantTrue:
263 // These should have all been removed by preprocessing passes. If we see them here,
264 // our assumptions are wrong and we will probably generate wrong code.
265 UNIMPLEMENTED("These instructions should have already been lowered.");
268 case spv::OpFConvert:
269 case spv::OpSConvert:
270 case spv::OpUConvert:
271 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
275 case spv::OpAccessChain:
276 case spv::OpCompositeConstruct:
277 case spv::OpCompositeInsert:
278 case spv::OpCompositeExtract:
279 case spv::OpVectorShuffle:
280 case spv::OpNot: // Unary ops
283 case spv::OpLogicalNot:
284 case spv::OpIAdd: // Binary ops
293 case spv::OpFOrdEqual:
294 case spv::OpFUnordEqual:
295 case spv::OpFOrdNotEqual:
296 case spv::OpFUnordNotEqual:
297 case spv::OpFOrdLessThan:
298 case spv::OpFUnordLessThan:
299 case spv::OpFOrdGreaterThan:
300 case spv::OpFUnordGreaterThan:
301 case spv::OpFOrdLessThanEqual:
302 case spv::OpFUnordLessThanEqual:
303 case spv::OpFOrdGreaterThanEqual:
304 case spv::OpFUnordGreaterThanEqual:
307 case spv::OpINotEqual:
308 case spv::OpUGreaterThan:
309 case spv::OpSGreaterThan:
310 case spv::OpUGreaterThanEqual:
311 case spv::OpSGreaterThanEqual:
312 case spv::OpULessThan:
313 case spv::OpSLessThan:
314 case spv::OpULessThanEqual:
315 case spv::OpSLessThanEqual:
316 case spv::OpShiftRightLogical:
317 case spv::OpShiftRightArithmetic:
318 case spv::OpShiftLeftLogical:
319 case spv::OpBitwiseOr:
320 case spv::OpBitwiseXor:
321 case spv::OpBitwiseAnd:
322 case spv::OpLogicalOr:
323 case spv::OpLogicalAnd:
324 case spv::OpLogicalEqual:
325 case spv::OpLogicalNotEqual:
326 case spv::OpUMulExtended:
327 case spv::OpSMulExtended:
329 case spv::OpConvertFToU:
330 case spv::OpConvertFToS:
331 case spv::OpConvertSToF:
332 case spv::OpConvertUToF:
340 // Instructions that yield an intermediate value
342 TypeID typeId = insn.word(1);
343 ObjectID resultId = insn.word(2);
344 auto &object = defs[resultId];
345 object.type = typeId;
346 object.kind = Object::Kind::Value;
347 object.definition = insn;
349 if (insn.opcode() == spv::OpAccessChain)
351 // interior ptr has two parts:
352 // - logical base ptr, common across all lanes and known at compile time
354 ObjectID baseId = insn.word(3);
355 object.pointerBase = getObject(baseId).pointerBase;
361 // Don't need to do anything during analysis pass
365 modes.ContainsKill = true;
369 UNIMPLEMENTED(OpcodeName(insn.opcode()).c_str());
374 void SpirvShader::DeclareType(InsnIterator insn)
376 TypeID resultId = insn.word(1);
378 auto &type = types[resultId];
379 type.definition = insn;
380 type.sizeInComponents = ComputeTypeSize(insn);
382 // A structure is a builtin block if it has a builtin
383 // member. All members of such a structure are builtins.
384 switch (insn.opcode())
386 case spv::OpTypeStruct:
388 auto d = memberDecorations.find(resultId);
389 if (d != memberDecorations.end())
391 for (auto &m : d->second)
395 type.isBuiltInBlock = true;
402 case spv::OpTypePointer:
404 TypeID elementTypeId = insn.word(3);
405 type.element = elementTypeId;
406 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
407 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
410 case spv::OpTypeVector:
411 case spv::OpTypeMatrix:
412 case spv::OpTypeArray:
413 case spv::OpTypeRuntimeArray:
415 TypeID elementTypeId = insn.word(2);
416 type.element = elementTypeId;
424 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
426 TypeID typeId = insn.word(1);
427 ObjectID resultId = insn.word(2);
428 auto &object = defs[resultId];
429 auto &objectTy = getType(typeId);
430 object.type = typeId;
431 object.kind = Object::Kind::Constant;
432 object.definition = insn;
433 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
437 void SpirvShader::ProcessInterfaceVariable(Object &object)
439 auto &objectTy = getType(object.type);
440 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
442 ASSERT(objectTy.definition.opcode() == spv::OpTypePointer);
443 auto pointeeTy = getType(objectTy.element);
445 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
446 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
448 ASSERT(object.definition.opcode() == spv::OpVariable);
449 ObjectID resultId = object.definition.word(2);
451 if (objectTy.isBuiltInBlock)
453 // walk the builtin block, registering each of its members separately.
454 auto m = memberDecorations.find(objectTy.element);
455 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
456 auto &structType = pointeeTy.definition;
459 for (auto &member : m->second)
461 auto &memberType = getType(structType.word(word));
463 if (member.HasBuiltIn)
465 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
468 offset += memberType.sizeInComponents;
474 auto d = decorations.find(resultId);
475 if (d != decorations.end() && d->second.HasBuiltIn)
477 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
481 object.kind = Object::Kind::InterfaceVariable;
482 VisitInterface(resultId,
483 [&userDefinedInterface](Decorations const &d, AttribType type) {
484 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
485 auto scalarSlot = (d.Location << 2) | d.Component;
486 ASSERT(scalarSlot >= 0 &&
487 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
489 auto &slot = userDefinedInterface[scalarSlot];
492 slot.NoPerspective = d.NoPerspective;
493 slot.Centroid = d.Centroid;
498 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
500 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
503 case spv::ExecutionModeEarlyFragmentTests:
504 modes.EarlyFragmentTests = true;
506 case spv::ExecutionModeDepthReplacing:
507 modes.DepthReplacing = true;
509 case spv::ExecutionModeDepthGreater:
510 modes.DepthGreater = true;
512 case spv::ExecutionModeDepthLess:
513 modes.DepthLess = true;
515 case spv::ExecutionModeDepthUnchanged:
516 modes.DepthUnchanged = true;
518 case spv::ExecutionModeLocalSize:
519 modes.LocalSizeX = insn.word(3);
520 modes.LocalSizeZ = insn.word(5);
521 modes.LocalSizeY = insn.word(4);
523 case spv::ExecutionModeOriginUpperLeft:
524 // This is always the case for a Vulkan shader. Do nothing.
527 UNIMPLEMENTED("No other execution modes are permitted");
531 uint32_t SpirvShader::ComputeTypeSize(sw::SpirvShader::InsnIterator insn)
533 // Types are always built from the bottom up (with the exception of forward ptrs, which
534 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
535 // already been described (and so their sizes determined)
536 switch (insn.opcode())
538 case spv::OpTypeVoid:
539 case spv::OpTypeSampler:
540 case spv::OpTypeImage:
541 case spv::OpTypeSampledImage:
542 case spv::OpTypeFunction:
543 case spv::OpTypeRuntimeArray:
544 // Objects that don't consume any space.
545 // Descriptor-backed objects currently only need exist at compile-time.
546 // Runtime arrays don't appear in places where their size would be interesting
549 case spv::OpTypeBool:
550 case spv::OpTypeFloat:
552 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
553 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
556 case spv::OpTypeVector:
557 case spv::OpTypeMatrix:
558 // Vectors and matrices both consume element count * element size.
559 return getType(insn.word(2)).sizeInComponents * insn.word(3);
561 case spv::OpTypeArray:
563 // Element count * element size. Array sizes come from constant ids.
564 auto arraySize = GetConstantInt(insn.word(3));
565 return getType(insn.word(2)).sizeInComponents * arraySize;
568 case spv::OpTypeStruct:
571 for (uint32_t i = 2u; i < insn.wordCount(); i++)
573 size += getType(insn.word(i)).sizeInComponents;
578 case spv::OpTypePointer:
579 // Runtime representation of a pointer is a per-lane index.
580 // Note: clients are expected to look through the pointer if they want the pointee size instead.
584 // Some other random insn.
585 UNIMPLEMENTED("Only types are supported");
590 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
592 switch (storageClass)
594 case spv::StorageClassUniform:
595 case spv::StorageClassStorageBuffer:
603 int SpirvShader::VisitInterfaceInner(TypeID id, Decorations d, F f) const
605 // Recursively walks variable definition and its type tree, taking into account
606 // any explicit Location or Component decorations encountered; where explicit
607 // Locations or Components are not specified, assigns them sequentially.
608 // Collected decorations are carried down toward the leaves and across
609 // siblings; Effect of decorations intentionally does not flow back up the tree.
611 // F is a functor to be called with the effective decoration set for every component.
613 // Returns the next available location, and calls f().
615 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
617 ApplyDecorationsForId(&d, id);
619 auto const &obj = getType(id);
620 switch (obj.definition.opcode())
622 case spv::OpTypePointer:
623 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
624 case spv::OpTypeMatrix:
625 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
627 // consumes same components of N consecutive locations
628 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
631 case spv::OpTypeVector:
632 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
634 // consumes N consecutive components in the same location
635 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
637 return d.Location + 1;
638 case spv::OpTypeFloat:
639 f(d, ATTRIBTYPE_FLOAT);
640 return d.Location + 1;
642 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
643 return d.Location + 1;
644 case spv::OpTypeBool:
645 f(d, ATTRIBTYPE_UINT);
646 return d.Location + 1;
647 case spv::OpTypeStruct:
649 // iterate over members, which may themselves have Location/Component decorations
650 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
652 ApplyDecorationsForIdMember(&d, id, i);
653 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
654 d.Component = 0; // Implicit locations always have component=0
658 case spv::OpTypeArray:
660 auto arraySize = GetConstantInt(obj.definition.word(3));
661 for (auto i = 0u; i < arraySize; i++)
663 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
668 // Intentionally partial; most opcodes do not participate in type hierarchies
674 void SpirvShader::VisitInterface(ObjectID id, F f) const
676 // Walk a variable definition and call f for each component in it.
678 ApplyDecorationsForId(&d, id);
680 auto def = getObject(id).definition;
681 ASSERT(def.opcode() == spv::OpVariable);
682 VisitInterfaceInner<F>(def.word(1), d, f);
685 SIMD::Int SpirvShader::WalkAccessChain(ObjectID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
687 // TODO: think about explicit layout (UBO/SSBO) storage classes
688 // TODO: avoid doing per-lane work in some cases if we can?
690 int constantOffset = 0;
691 SIMD::Int dynamicOffset = SIMD::Int(0);
692 auto &baseObject = getObject(id);
693 TypeID typeId = getType(baseObject.type).element;
695 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
696 // Start with its offset and build from there.
697 if (baseObject.kind == Object::Kind::Value)
698 dynamicOffset += As<SIMD::Int>(routine->getIntermediate(id)[0]);
700 for (auto i = 0u; i < numIndexes; i++)
702 auto & type = getType(typeId);
703 switch (type.definition.opcode())
705 case spv::OpTypeStruct:
707 int memberIndex = GetConstantInt(indexIds[i]);
708 int offsetIntoStruct = 0;
709 for (auto j = 0; j < memberIndex; j++) {
710 auto memberType = type.definition.word(2u + j);
711 offsetIntoStruct += getType(memberType).sizeInComponents;
713 constantOffset += offsetIntoStruct;
714 typeId = type.definition.word(2u + memberIndex);
718 case spv::OpTypeVector:
719 case spv::OpTypeMatrix:
720 case spv::OpTypeArray:
722 auto stride = getType(type.element).sizeInComponents;
723 auto & obj = getObject(indexIds[i]);
724 if (obj.kind == Object::Kind::Constant)
725 constantOffset += stride * GetConstantInt(indexIds[i]);
727 dynamicOffset += SIMD::Int(stride) * As<SIMD::Int>(routine->getIntermediate(indexIds[i])[0]);
728 typeId = type.element;
733 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.definition.opcode()).c_str());
737 return dynamicOffset + SIMD::Int(constantOffset);
740 uint32_t SpirvShader::WalkLiteralAccessChain(TypeID typeId, uint32_t numIndexes, uint32_t const *indexes) const
742 uint32_t constantOffset = 0;
744 for (auto i = 0u; i < numIndexes; i++)
746 auto & type = getType(typeId);
747 switch (type.definition.opcode())
749 case spv::OpTypeStruct:
751 int memberIndex = indexes[i];
752 int offsetIntoStruct = 0;
753 for (auto j = 0; j < memberIndex; j++) {
754 auto memberType = type.definition.word(2u + j);
755 offsetIntoStruct += getType(memberType).sizeInComponents;
757 constantOffset += offsetIntoStruct;
758 typeId = type.definition.word(2u + memberIndex);
762 case spv::OpTypeVector:
763 case spv::OpTypeMatrix:
764 case spv::OpTypeArray:
766 auto elementType = type.definition.word(2);
767 auto stride = getType(elementType).sizeInComponents;
768 constantOffset += stride * indexes[i];
769 typeId = elementType;
774 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
778 return constantOffset;
781 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
785 case spv::DecorationLocation:
787 Location = static_cast<int32_t>(arg);
789 case spv::DecorationComponent:
793 case spv::DecorationDescriptorSet:
794 HasDescriptorSet = true;
797 case spv::DecorationBinding:
801 case spv::DecorationBuiltIn:
803 BuiltIn = static_cast<spv::BuiltIn>(arg);
805 case spv::DecorationFlat:
808 case spv::DecorationNoPerspective:
809 NoPerspective = true;
811 case spv::DecorationCentroid:
814 case spv::DecorationBlock:
817 case spv::DecorationBufferBlock:
820 case spv::DecorationOffset:
822 Offset = static_cast<int32_t>(arg);
824 case spv::DecorationArrayStride:
825 HasArrayStride = true;
826 ArrayStride = static_cast<int32_t>(arg);
828 case spv::DecorationMatrixStride:
829 HasMatrixStride = true;
830 MatrixStride = static_cast<int32_t>(arg);
833 // Intentionally partial, there are many decorations we just don't care about.
838 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
840 // Apply a decoration group to this set of decorations
844 BuiltIn = src.BuiltIn;
850 Location = src.Location;
853 if (src.HasComponent)
856 Component = src.Component;
859 if (src.HasDescriptorSet)
861 HasDescriptorSet = true;
862 DescriptorSet = src.DescriptorSet;
868 Binding = src.Binding;
877 if (src.HasArrayStride)
879 HasArrayStride = true;
880 ArrayStride = src.ArrayStride;
883 if (src.HasMatrixStride)
885 HasMatrixStride = true;
886 MatrixStride = src.MatrixStride;
890 NoPerspective |= src.NoPerspective;
891 Centroid |= src.Centroid;
893 BufferBlock |= src.BufferBlock;
896 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
898 auto it = decorations.find(id);
899 if (it != decorations.end())
900 d->Apply(it->second);
903 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, TypeID id, uint32_t member) const
905 auto it = memberDecorations.find(id);
906 if (it != memberDecorations.end() && member < it->second.size())
908 d->Apply(it->second[member]);
912 uint32_t SpirvShader::GetConstantInt(ObjectID id) const
914 // Slightly hackish access to constants very early in translation.
915 // General consumption of constants by other instructions should
916 // probably be just lowered to Reactor.
918 // TODO: not encountered yet since we only use this for array sizes etc,
919 // but is possible to construct integer constant 0 via OpConstantNull.
920 auto insn = getObject(id).definition;
921 ASSERT(insn.opcode() == spv::OpConstant);
922 ASSERT(getType(insn.word(1)).definition.opcode() == spv::OpTypeInt);
928 void SpirvShader::emitProlog(SpirvRoutine *routine) const
930 for (auto insn : *this)
932 switch (insn.opcode())
934 case spv::OpVariable:
936 ObjectID resultId = insn.word(2);
937 auto &object = getObject(resultId);
938 auto &objectTy = getType(object.type);
939 auto &pointeeTy = getType(objectTy.element);
940 // TODO: what to do about zero-slot objects?
941 if (pointeeTy.sizeInComponents > 0)
943 routine->createLvalue(insn.word(2), pointeeTy.sizeInComponents);
948 // Nothing else produces interface variables, so can all be safely ignored.
954 void SpirvShader::emit(SpirvRoutine *routine) const
956 for (auto insn : *this)
958 switch (insn.opcode())
960 case spv::OpTypeVoid:
962 case spv::OpTypeFloat:
963 case spv::OpTypeBool:
964 case spv::OpTypeVector:
965 case spv::OpTypeArray:
966 case spv::OpTypeRuntimeArray:
967 case spv::OpTypeMatrix:
968 case spv::OpTypeStruct:
969 case spv::OpTypePointer:
970 case spv::OpTypeFunction:
971 case spv::OpExecutionMode:
972 case spv::OpMemoryModel:
973 case spv::OpFunction:
974 case spv::OpFunctionEnd:
975 case spv::OpConstant:
976 case spv::OpConstantNull:
977 case spv::OpConstantTrue:
978 case spv::OpConstantFalse:
979 case spv::OpConstantComposite:
980 case spv::OpExtension:
981 case spv::OpCapability:
982 case spv::OpEntryPoint:
983 case spv::OpExtInstImport:
984 case spv::OpDecorate:
985 case spv::OpMemberDecorate:
986 case spv::OpGroupDecorate:
987 case spv::OpGroupMemberDecorate:
988 case spv::OpDecorationGroup:
990 case spv::OpMemberName:
992 case spv::OpSourceContinued:
993 case spv::OpSourceExtension:
996 case spv::OpModuleProcessed:
998 // Nothing to do at emit time. These are either fully handled at analysis time,
999 // or don't require any work at all.
1004 // TODO: when we do control flow, will need to do some work here.
1005 // Until then, there is nothing to do -- we expect there to be an initial OpLabel
1006 // in the entrypoint function, for which we do nothing; and a final OpReturn at the
1007 // end of the entrypoint function, for which we do nothing.
1010 case spv::OpVariable:
1011 EmitVariable(insn, routine);
1015 EmitLoad(insn, routine);
1019 EmitStore(insn, routine);
1022 case spv::OpAccessChain:
1023 EmitAccessChain(insn, routine);
1026 case spv::OpCompositeConstruct:
1027 EmitCompositeConstruct(insn, routine);
1030 case spv::OpCompositeInsert:
1031 EmitCompositeInsert(insn, routine);
1034 case spv::OpCompositeExtract:
1035 EmitCompositeExtract(insn, routine);
1038 case spv::OpVectorShuffle:
1039 EmitVectorShuffle(insn, routine);
1043 case spv::OpSNegate:
1044 case spv::OpFNegate:
1045 case spv::OpLogicalNot:
1046 case spv::OpConvertFToU:
1047 case spv::OpConvertFToS:
1048 case spv::OpConvertSToF:
1049 case spv::OpConvertUToF:
1050 case spv::OpBitcast:
1053 EmitUnaryOp(insn, routine);
1065 case spv::OpFOrdEqual:
1066 case spv::OpFUnordEqual:
1067 case spv::OpFOrdNotEqual:
1068 case spv::OpFUnordNotEqual:
1069 case spv::OpFOrdLessThan:
1070 case spv::OpFUnordLessThan:
1071 case spv::OpFOrdGreaterThan:
1072 case spv::OpFUnordGreaterThan:
1073 case spv::OpFOrdLessThanEqual:
1074 case spv::OpFUnordLessThanEqual:
1075 case spv::OpFOrdGreaterThanEqual:
1076 case spv::OpFUnordGreaterThanEqual:
1079 case spv::OpINotEqual:
1080 case spv::OpUGreaterThan:
1081 case spv::OpSGreaterThan:
1082 case spv::OpUGreaterThanEqual:
1083 case spv::OpSGreaterThanEqual:
1084 case spv::OpULessThan:
1085 case spv::OpSLessThan:
1086 case spv::OpULessThanEqual:
1087 case spv::OpSLessThanEqual:
1088 case spv::OpShiftRightLogical:
1089 case spv::OpShiftRightArithmetic:
1090 case spv::OpShiftLeftLogical:
1091 case spv::OpBitwiseOr:
1092 case spv::OpBitwiseXor:
1093 case spv::OpBitwiseAnd:
1094 case spv::OpLogicalOr:
1095 case spv::OpLogicalAnd:
1096 case spv::OpLogicalEqual:
1097 case spv::OpLogicalNotEqual:
1098 case spv::OpUMulExtended:
1099 case spv::OpSMulExtended:
1100 EmitBinaryOp(insn, routine);
1104 EmitDot(insn, routine);
1108 EmitSelect(insn, routine);
1111 case spv::OpExtInst:
1112 EmitExtendedInstruction(insn, routine);
1116 EmitAny(insn, routine);
1120 EmitAll(insn, routine);
1124 UNIMPLEMENTED(OpcodeName(insn.opcode()).c_str());
1130 void SpirvShader::EmitVariable(InsnIterator insn, SpirvRoutine *routine) const
1132 ObjectID resultId = insn.word(2);
1133 auto &object = getObject(resultId);
1134 auto &objectTy = getType(object.type);
1135 switch (objectTy.storageClass)
1137 case spv::StorageClassInput:
1139 if (object.kind == Object::Kind::InterfaceVariable)
1141 auto &dst = routine->getValue(resultId);
1143 VisitInterface(resultId,
1144 [&](Decorations const &d, AttribType type) {
1145 auto scalarSlot = d.Location << 2 | d.Component;
1146 dst[offset++] = routine->inputs[scalarSlot];
1151 case spv::StorageClassUniform:
1152 case spv::StorageClassStorageBuffer:
1155 ApplyDecorationsForId(&d, resultId);
1156 ASSERT(d.DescriptorSet >= 0);
1157 ASSERT(d.Binding >= 0);
1159 size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1161 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1162 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1163 Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1164 Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1165 Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1166 Pointer<Byte> address = data + offset;
1167 routine->physicalPointers[resultId] = address;
1175 void SpirvShader::EmitLoad(InsnIterator insn, SpirvRoutine *routine) const
1177 ObjectID objectId = insn.word(2);
1178 ObjectID pointerId = insn.word(3);
1179 auto &object = getObject(objectId);
1180 auto &objectTy = getType(object.type);
1181 auto &pointer = getObject(pointerId);
1182 auto &pointerBase = getObject(pointer.pointerBase);
1183 auto &pointerBaseTy = getType(pointerBase.type);
1185 ASSERT(getType(pointer.type).element == object.type);
1186 ASSERT(TypeID(insn.word(1)) == object.type);
1188 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1190 UNIMPLEMENTED("StorageClassImage load not yet implemented");
1193 Pointer<Float> ptrBase;
1194 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1196 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1200 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1203 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1205 auto &dst = routine->createIntermediate(objectId, objectTy.sizeInComponents);
1207 if (pointer.kind == Object::Kind::Value)
1209 // Divergent offsets.
1210 auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
1211 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1213 // i wish i had a Float,Float,Float,Float constructor here..
1215 for (int j = 0; j < SIMD::Width; j++)
1217 Int offset = Int(i) + Extract(offsets, j);
1218 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1219 v = Insert(v, ptrBase[offset], j);
1224 else if (interleavedByLane)
1226 // Lane-interleaved data. No divergent offsets.
1227 Pointer<SIMD::Float> src = ptrBase;
1228 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1230 dst.emplace(i, src[i]);
1235 // Non-interleaved data. No divergent offsets.
1236 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1238 dst.emplace(i, RValue<SIMD::Float>(ptrBase[i]));
1243 void SpirvShader::EmitAccessChain(InsnIterator insn, SpirvRoutine *routine) const
1245 TypeID typeId = insn.word(1);
1246 ObjectID objectId = insn.word(2);
1247 ObjectID baseId = insn.word(3);
1248 auto &object = getObject(objectId);
1249 auto &type = getType(typeId);
1250 ASSERT(type.sizeInComponents == 1);
1251 ASSERT(getObject(baseId).pointerBase == object.pointerBase);
1253 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
1254 dst.emplace(0, As<SIMD::Float>(WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine)));
1257 void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const
1259 ObjectID pointerId = insn.word(1);
1260 ObjectID objectId = insn.word(2);
1261 auto &object = getObject(objectId);
1262 auto &pointer = getObject(pointerId);
1263 auto &pointerTy = getType(pointer.type);
1264 auto &elementTy = getType(pointerTy.element);
1265 auto &pointerBase = getObject(pointer.pointerBase);
1266 auto &pointerBaseTy = getType(pointerBase.type);
1268 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1270 UNIMPLEMENTED("StorageClassImage store not yet implemented");
1273 Pointer<Float> ptrBase;
1274 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1276 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1280 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1283 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1285 if (object.kind == Object::Kind::Constant)
1287 auto src = reinterpret_cast<float *>(object.constantValue.get());
1289 if (pointer.kind == Object::Kind::Value)
1291 // Constant source data. Divergent offsets.
1292 auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
1293 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1295 for (int j = 0; j < SIMD::Width; j++)
1297 Int offset = Int(i) + Extract(offsets, j);
1298 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1299 ptrBase[offset] = RValue<Float>(src[i]);
1305 // Constant source data. No divergent offsets.
1306 Pointer<SIMD::Float> dst = ptrBase;
1307 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1309 dst[i] = RValue<SIMD::Float>(src[i]);
1315 auto &src = routine->getIntermediate(objectId);
1317 if (pointer.kind == Object::Kind::Value)
1319 // Intermediate source data. Divergent offsets.
1320 auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
1321 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1323 for (int j = 0; j < SIMD::Width; j++)
1325 Int offset = Int(i) + Extract(offsets, j);
1326 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1327 ptrBase[offset] = Extract(src[i], j);
1331 else if (interleavedByLane)
1333 // Intermediate source data. Lane-interleaved data. No divergent offsets.
1334 Pointer<SIMD::Float> dst = ptrBase;
1335 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1342 // Intermediate source data. Non-interleaved data. No divergent offsets.
1343 Pointer<SIMD::Float> dst = ptrBase;
1344 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1346 dst[i] = SIMD::Float(src[i]);
1352 void SpirvShader::EmitCompositeConstruct(InsnIterator insn, SpirvRoutine *routine) const
1354 auto &type = getType(insn.word(1));
1355 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1358 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1360 ObjectID srcObjectId = insn.word(3u + i);
1361 auto & srcObject = getObject(srcObjectId);
1362 auto & srcObjectTy = getType(srcObject.type);
1363 GenericValue srcObjectAccess(this, routine, srcObjectId);
1365 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1366 dst.emplace(offset++, srcObjectAccess[j]);
1370 void SpirvShader::EmitCompositeInsert(InsnIterator insn, SpirvRoutine *routine) const
1372 TypeID resultTypeId = insn.word(1);
1373 auto &type = getType(resultTypeId);
1374 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1375 auto &newPartObject = getObject(insn.word(3));
1376 auto &newPartObjectTy = getType(newPartObject.type);
1377 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1379 GenericValue srcObjectAccess(this, routine, insn.word(4));
1380 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1382 // old components before
1383 for (auto i = 0u; i < firstNewComponent; i++)
1385 dst.emplace(i, srcObjectAccess[i]);
1388 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1390 dst.emplace(firstNewComponent + i, newPartObjectAccess[i]);
1392 // old components after
1393 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1395 dst.emplace(i, srcObjectAccess[i]);
1399 void SpirvShader::EmitCompositeExtract(InsnIterator insn, SpirvRoutine *routine) const
1401 auto &type = getType(insn.word(1));
1402 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1403 auto &compositeObject = getObject(insn.word(3));
1404 TypeID compositeTypeId = compositeObject.definition.word(1);
1405 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1407 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1408 for (auto i = 0u; i < type.sizeInComponents; i++)
1410 dst.emplace(i, compositeObjectAccess[firstComponent + i]);
1414 void SpirvShader::EmitVectorShuffle(InsnIterator insn, SpirvRoutine *routine) const
1416 auto &type = getType(insn.word(1));
1417 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1419 // Note: number of components in result type, first half type, and second
1420 // half type are all independent.
1421 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1423 GenericValue firstHalfAccess(this, routine, insn.word(3));
1424 GenericValue secondHalfAccess(this, routine, insn.word(4));
1426 for (auto i = 0u; i < type.sizeInComponents; i++)
1428 auto selector = insn.word(5 + i);
1429 if (selector == static_cast<uint32_t>(-1))
1431 // Undefined value. Until we decide to do real undef values, zero is as good
1433 dst.emplace(i, RValue<SIMD::Float>(0.0f));
1435 else if (selector < firstHalfType.sizeInComponents)
1437 dst.emplace(i, firstHalfAccess[selector]);
1441 dst.emplace(i, secondHalfAccess[selector - firstHalfType.sizeInComponents]);
1446 void SpirvShader::EmitUnaryOp(InsnIterator insn, SpirvRoutine *routine) const
1448 auto &type = getType(insn.word(1));
1449 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1450 auto src = GenericValue(this, routine, insn.word(3));
1452 for (auto i = 0u; i < type.sizeInComponents; i++)
1456 switch (insn.opcode())
1459 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
1460 dst.emplace(i, As<SIMD::Float>(~As<SIMD::UInt>(val)));
1462 case spv::OpSNegate:
1463 dst.emplace(i, As<SIMD::Float>(-As<SIMD::Int>(val)));
1465 case spv::OpFNegate:
1466 dst.emplace(i, -val);
1468 case spv::OpConvertFToU:
1469 dst.emplace(i, As<SIMD::Float>(SIMD::UInt(val)));
1471 case spv::OpConvertFToS:
1472 dst.emplace(i, As<SIMD::Float>(SIMD::Int(val)));
1474 case spv::OpConvertSToF:
1475 dst.emplace(i, SIMD::Float(As<SIMD::Int>(val)));
1477 case spv::OpConvertUToF:
1478 dst.emplace(i, SIMD::Float(As<SIMD::UInt>(val)));
1480 case spv::OpBitcast:
1481 dst.emplace(i, val);
1484 dst.emplace(i, As<SIMD::Float>(IsInf(val)));
1487 dst.emplace(i, As<SIMD::Float>(IsNan(val)));
1490 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
1495 void SpirvShader::EmitBinaryOp(InsnIterator insn, SpirvRoutine *routine) const
1497 auto &type = getType(insn.word(1));
1498 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1499 auto &lhsType = getType(getObject(insn.word(3)).type);
1500 auto srcLHS = GenericValue(this, routine, insn.word(3));
1501 auto srcRHS = GenericValue(this, routine, insn.word(4));
1503 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
1505 auto lhs = srcLHS[i];
1506 auto rhs = srcRHS[i];
1508 switch (insn.opcode())
1511 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) + As<SIMD::Int>(rhs)));
1514 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) - As<SIMD::Int>(rhs)));
1517 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) * As<SIMD::Int>(rhs)));
1520 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) / As<SIMD::Int>(rhs)));
1523 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) / As<SIMD::UInt>(rhs)));
1526 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) % As<SIMD::UInt>(rhs)));
1529 case spv::OpLogicalEqual:
1530 dst.emplace(i, As<SIMD::Float>(CmpEQ(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1532 case spv::OpINotEqual:
1533 case spv::OpLogicalNotEqual:
1534 dst.emplace(i, As<SIMD::Float>(CmpNEQ(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1536 case spv::OpUGreaterThan:
1537 dst.emplace(i, As<SIMD::Float>(CmpGT(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1539 case spv::OpSGreaterThan:
1540 dst.emplace(i, As<SIMD::Float>(CmpGT(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1542 case spv::OpUGreaterThanEqual:
1543 dst.emplace(i, As<SIMD::Float>(CmpGE(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1545 case spv::OpSGreaterThanEqual:
1546 dst.emplace(i, As<SIMD::Float>(CmpGE(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1548 case spv::OpULessThan:
1549 dst.emplace(i, As<SIMD::Float>(CmpLT(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1551 case spv::OpSLessThan:
1552 dst.emplace(i, As<SIMD::Float>(CmpLT(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1554 case spv::OpULessThanEqual:
1555 dst.emplace(i, As<SIMD::Float>(CmpLE(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1557 case spv::OpSLessThanEqual:
1558 dst.emplace(i, As<SIMD::Float>(CmpLE(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1561 dst.emplace(i, lhs + rhs);
1564 dst.emplace(i, lhs - rhs);
1567 dst.emplace(i, lhs * rhs);
1570 dst.emplace(i, lhs / rhs);
1572 case spv::OpFOrdEqual:
1573 dst.emplace(i, As<SIMD::Float>(CmpEQ(lhs, rhs)));
1575 case spv::OpFUnordEqual:
1576 dst.emplace(i, As<SIMD::Float>(CmpUEQ(lhs, rhs)));
1578 case spv::OpFOrdNotEqual:
1579 dst.emplace(i, As<SIMD::Float>(CmpNEQ(lhs, rhs)));
1581 case spv::OpFUnordNotEqual:
1582 dst.emplace(i, As<SIMD::Float>(CmpUNEQ(lhs, rhs)));
1584 case spv::OpFOrdLessThan:
1585 dst.emplace(i, As<SIMD::Float>(CmpLT(lhs, rhs)));
1587 case spv::OpFUnordLessThan:
1588 dst.emplace(i, As<SIMD::Float>(CmpULT(lhs, rhs)));
1590 case spv::OpFOrdGreaterThan:
1591 dst.emplace(i, As<SIMD::Float>(CmpGT(lhs, rhs)));
1593 case spv::OpFUnordGreaterThan:
1594 dst.emplace(i, As<SIMD::Float>(CmpUGT(lhs, rhs)));
1596 case spv::OpFOrdLessThanEqual:
1597 dst.emplace(i, As<SIMD::Float>(CmpLE(lhs, rhs)));
1599 case spv::OpFUnordLessThanEqual:
1600 dst.emplace(i, As<SIMD::Float>(CmpULE(lhs, rhs)));
1602 case spv::OpFOrdGreaterThanEqual:
1603 dst.emplace(i, As<SIMD::Float>(CmpGE(lhs, rhs)));
1605 case spv::OpFUnordGreaterThanEqual:
1606 dst.emplace(i, As<SIMD::Float>(CmpUGE(lhs, rhs)));
1608 case spv::OpShiftRightLogical:
1609 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) >> As<SIMD::UInt>(rhs)));
1611 case spv::OpShiftRightArithmetic:
1612 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) >> As<SIMD::Int>(rhs)));
1614 case spv::OpShiftLeftLogical:
1615 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) << As<SIMD::UInt>(rhs)));
1617 case spv::OpBitwiseOr:
1618 case spv::OpLogicalOr:
1619 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) | As<SIMD::UInt>(rhs)));
1621 case spv::OpBitwiseXor:
1622 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) ^ As<SIMD::UInt>(rhs)));
1624 case spv::OpBitwiseAnd:
1625 case spv::OpLogicalAnd:
1626 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) & As<SIMD::UInt>(rhs)));
1628 case spv::OpSMulExtended:
1629 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
1630 // In our flat view then, component i is the i'th component of the first member;
1631 // component i + N is the i'th component of the second member.
1632 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) * As<SIMD::Int>(rhs)));
1633 dst.emplace(i + lhsType.sizeInComponents, As<SIMD::Float>(MulHigh(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1635 case spv::OpUMulExtended:
1636 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) * As<SIMD::UInt>(rhs)));
1637 dst.emplace(i + lhsType.sizeInComponents, As<SIMD::Float>(MulHigh(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1640 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
1645 void SpirvShader::EmitDot(InsnIterator insn, SpirvRoutine *routine) const
1647 auto &type = getType(insn.word(1));
1648 assert(type.sizeInComponents == 1);
1649 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1650 auto &lhsType = getType(getObject(insn.word(3)).type);
1651 auto srcLHS = GenericValue(this, routine, insn.word(3));
1652 auto srcRHS = GenericValue(this, routine, insn.word(4));
1654 SIMD::Float result = srcLHS[0] * srcRHS[0];
1656 for (auto i = 1u; i < lhsType.sizeInComponents; i++)
1658 result += srcLHS[i] * srcRHS[i];
1661 dst.emplace(0, result);
1664 void SpirvShader::EmitSelect(InsnIterator insn, SpirvRoutine *routine) const
1666 auto &type = getType(insn.word(1));
1667 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1668 auto srcCond = GenericValue(this, routine, insn.word(3));
1669 auto srcLHS = GenericValue(this, routine, insn.word(4));
1670 auto srcRHS = GenericValue(this, routine, insn.word(5));
1672 for (auto i = 0u; i < type.sizeInComponents; i++)
1674 auto cond = As<SIMD::Int>(srcCond[i]);
1675 auto lhs = srcLHS[i];
1676 auto rhs = srcRHS[i];
1677 auto out = (cond & As<Int4>(lhs)) | (~cond & As<Int4>(rhs)); // FIXME: IfThenElse()
1678 dst.emplace(i, As<SIMD::Float>(out));
1682 void SpirvShader::EmitExtendedInstruction(InsnIterator insn, SpirvRoutine *routine) const
1684 auto &type = getType(insn.word(1));
1685 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1686 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
1688 switch (extInstIndex)
1690 case GLSLstd450FAbs:
1692 auto src = GenericValue(this, routine, insn.word(5));
1693 for (auto i = 0u; i < type.sizeInComponents; i++)
1695 dst.emplace(i, Abs(src[i]));
1699 case GLSLstd450SAbs:
1701 auto src = GenericValue(this, routine, insn.word(5));
1702 for (auto i = 0u; i < type.sizeInComponents; i++)
1704 dst.emplace(i, As<SIMD::Float>(Abs(As<SIMD::Int>(src[i]))));
1708 case GLSLstd450Cross:
1710 auto lhs = GenericValue(this, routine, insn.word(5));
1711 auto rhs = GenericValue(this, routine, insn.word(6));
1712 dst.emplace(0, lhs[1] * rhs[2] - rhs[1] * lhs[2]);
1713 dst.emplace(1, lhs[2] * rhs[0] - rhs[2] * lhs[0]);
1714 dst.emplace(2, lhs[0] * rhs[1] - rhs[0] * lhs[1]);
1717 case GLSLstd450Floor:
1719 auto src = GenericValue(this, routine, insn.word(5));
1720 for (auto i = 0u; i < type.sizeInComponents; i++)
1722 dst.emplace(i, Floor(src[i]));
1726 case GLSLstd450Trunc:
1728 auto src = GenericValue(this, routine, insn.word(5));
1729 for (auto i = 0u; i < type.sizeInComponents; i++)
1731 dst.emplace(i, Trunc(src[i]));
1735 case GLSLstd450Ceil:
1737 auto src = GenericValue(this, routine, insn.word(5));
1738 for (auto i = 0u; i < type.sizeInComponents; i++)
1740 dst.emplace(i, Ceil(src[i]));
1744 case GLSLstd450Fract:
1746 auto src = GenericValue(this, routine, insn.word(5));
1747 for (auto i = 0u; i < type.sizeInComponents; i++)
1749 dst.emplace(i, Frac(src[i]));
1753 case GLSLstd450Round:
1755 auto src = GenericValue(this, routine, insn.word(5));
1756 for (auto i = 0u; i < type.sizeInComponents; i++)
1758 dst.emplace(i, Round(src[i]));
1762 case GLSLstd450RoundEven:
1764 auto src = GenericValue(this, routine, insn.word(5));
1765 for (auto i = 0u; i < type.sizeInComponents; i++)
1767 auto x = Round(src[i]);
1768 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
1769 dst.emplace(i, x + ((SIMD::Float(CmpLT(x, src[i]) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
1770 SIMD::Float(CmpEQ(Frac(src[i]), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
1774 case GLSLstd450FMin:
1776 auto lhs = GenericValue(this, routine, insn.word(5));
1777 auto rhs = GenericValue(this, routine, insn.word(6));
1778 for (auto i = 0u; i < type.sizeInComponents; i++)
1780 dst.emplace(i, Min(lhs[i], rhs[i]));
1784 case GLSLstd450FMax:
1786 auto lhs = GenericValue(this, routine, insn.word(5));
1787 auto rhs = GenericValue(this, routine, insn.word(6));
1788 for (auto i = 0u; i < type.sizeInComponents; i++)
1790 dst.emplace(i, Max(lhs[i], rhs[i]));
1794 case GLSLstd450SMin:
1796 auto lhs = GenericValue(this, routine, insn.word(5));
1797 auto rhs = GenericValue(this, routine, insn.word(6));
1798 for (auto i = 0u; i < type.sizeInComponents; i++)
1800 dst.emplace(i, As<SIMD::Float>(Min(As<SIMD::Int>(lhs[i]), As<SIMD::Int>(rhs[i]))));
1804 case GLSLstd450SMax:
1806 auto lhs = GenericValue(this, routine, insn.word(5));
1807 auto rhs = GenericValue(this, routine, insn.word(6));
1808 for (auto i = 0u; i < type.sizeInComponents; i++)
1810 dst.emplace(i, As<SIMD::Float>(Max(As<SIMD::Int>(lhs[i]), As<SIMD::Int>(rhs[i]))));
1814 case GLSLstd450UMin:
1816 auto lhs = GenericValue(this, routine, insn.word(5));
1817 auto rhs = GenericValue(this, routine, insn.word(6));
1818 for (auto i = 0u; i < type.sizeInComponents; i++)
1820 dst.emplace(i, As<SIMD::Float>(Min(As<SIMD::UInt>(lhs[i]), As<SIMD::UInt>(rhs[i]))));
1824 case GLSLstd450UMax:
1826 auto lhs = GenericValue(this, routine, insn.word(5));
1827 auto rhs = GenericValue(this, routine, insn.word(6));
1828 for (auto i = 0u; i < type.sizeInComponents; i++)
1830 dst.emplace(i, As<SIMD::Float>(Max(As<SIMD::UInt>(lhs[i]), As<SIMD::UInt>(rhs[i]))));
1834 case GLSLstd450Step:
1836 auto edge = GenericValue(this, routine, insn.word(5));
1837 auto x = GenericValue(this, routine, insn.word(6));
1838 for (auto i = 0u; i < type.sizeInComponents; i++)
1840 dst.emplace(i, As<SIMD::Float>(CmpNLT(x[i], edge[i]) & As<SIMD::Int>(SIMD::Float(1.0f))));
1844 case GLSLstd450SmoothStep:
1846 auto edge0 = GenericValue(this, routine, insn.word(5));
1847 auto edge1 = GenericValue(this, routine, insn.word(6));
1848 auto x = GenericValue(this, routine, insn.word(7));
1849 for (auto i = 0u; i < type.sizeInComponents; i++)
1851 auto tx = Min(Max((x[i] - edge0[i]) / (edge1[i] - edge0[i]), SIMD::Float(0.0f)), SIMD::Float(1.0f));
1852 dst.emplace(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
1856 case GLSLstd450FMix:
1858 auto x = GenericValue(this, routine, insn.word(5));
1859 auto y = GenericValue(this, routine, insn.word(6));
1860 auto a = GenericValue(this, routine, insn.word(7));
1861 for (auto i = 0u; i < type.sizeInComponents; i++)
1863 dst.emplace(i, a[i] * (y[i] - x[i]) + x[i]);
1867 case GLSLstd450FClamp:
1869 auto x = GenericValue(this, routine, insn.word(5));
1870 auto minVal = GenericValue(this, routine, insn.word(6));
1871 auto maxVal = GenericValue(this, routine, insn.word(7));
1872 for (auto i = 0u; i < type.sizeInComponents; i++)
1874 dst.emplace(i, Min(Max(x[i], minVal[i]), maxVal[i]));
1878 case GLSLstd450SClamp:
1880 auto x = GenericValue(this, routine, insn.word(5));
1881 auto minVal = GenericValue(this, routine, insn.word(6));
1882 auto maxVal = GenericValue(this, routine, insn.word(7));
1883 for (auto i = 0u; i < type.sizeInComponents; i++)
1885 dst.emplace(i, As<SIMD::Float>(Min(Max(As<SIMD::Int>(x[i]), As<SIMD::Int>(minVal[i])), As<SIMD::Int>(maxVal[i]))));
1889 case GLSLstd450UClamp:
1891 auto x = GenericValue(this, routine, insn.word(5));
1892 auto minVal = GenericValue(this, routine, insn.word(6));
1893 auto maxVal = GenericValue(this, routine, insn.word(7));
1894 for (auto i = 0u; i < type.sizeInComponents; i++)
1896 dst.emplace(i, As<SIMD::Float>(Min(Max(As<SIMD::UInt>(x[i]), As<SIMD::UInt>(minVal[i])), As<SIMD::UInt>(maxVal[i]))));
1900 case GLSLstd450FSign:
1902 auto src = GenericValue(this, routine, insn.word(5));
1903 for (auto i = 0u; i < type.sizeInComponents; i++)
1905 auto neg = As<SIMD::Int>(CmpLT(src[i], SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
1906 auto pos = As<SIMD::Int>(CmpNLE(src[i], SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
1907 dst.emplace(i, As<SIMD::Float>(neg | pos));
1911 case GLSLstd450SSign:
1913 auto src = GenericValue(this, routine, insn.word(5));
1914 for (auto i = 0u; i < type.sizeInComponents; i++)
1916 auto neg = CmpLT(As<SIMD::Int>(src[i]), SIMD::Int(0)) & SIMD::Int(-1);
1917 auto pos = CmpNLE(As<SIMD::Int>(src[i]), SIMD::Int(0)) & SIMD::Int(1);
1918 dst.emplace(i, As<SIMD::Float>(neg | pos));
1923 UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
1927 void SpirvShader::EmitAny(InsnIterator insn, SpirvRoutine *routine) const
1929 auto &type = getType(insn.word(1));
1930 assert(type.sizeInComponents == 1);
1931 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1932 auto &srcType = getType(getObject(insn.word(3)).type);
1933 auto src = GenericValue(this, routine, insn.word(3));
1935 SIMD::UInt result = As<SIMD::UInt>(src[0]);
1937 for (auto i = 1u; i < srcType.sizeInComponents; i++)
1939 result |= As<SIMD::UInt>(src[i]);
1942 dst.emplace(0, As<SIMD::Float>(result));
1945 void SpirvShader::EmitAll(InsnIterator insn, SpirvRoutine *routine) const
1947 auto &type = getType(insn.word(1));
1948 assert(type.sizeInComponents == 1);
1949 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1950 auto &srcType = getType(getObject(insn.word(3)).type);
1951 auto src = GenericValue(this, routine, insn.word(3));
1953 SIMD::UInt result = As<SIMD::UInt>(src[0]);
1955 for (auto i = 1u; i < srcType.sizeInComponents; i++)
1957 result &= As<SIMD::UInt>(src[i]);
1960 dst.emplace(0, As<SIMD::Float>(result));
1963 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
1965 for (auto insn : *this)
1967 switch (insn.opcode())
1969 case spv::OpVariable:
1971 ObjectID resultId = insn.word(2);
1972 auto &object = getObject(resultId);
1973 auto &objectTy = getType(object.type);
1974 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
1976 auto &dst = routine->getValue(resultId);
1978 VisitInterface(resultId,
1979 [&](Decorations const &d, AttribType type) {
1980 auto scalarSlot = d.Location << 2 | d.Component;
1981 routine->outputs[scalarSlot] = dst[offset++];
1992 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
1993 pipelineLayout(pipelineLayout)