1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
27 #undef Bool // b/127920555
32 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
34 SpirvShader::SpirvShader(InsnStore const &insns)
35 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
36 outputs{MAX_INTERFACE_COMPONENTS},
37 serialID{serialCounter++}, modes{}
39 ASSERT(insns.size() > 0);
41 // Simplifying assumptions (to be satisfied by earlier transformations)
42 // - There is exactly one entrypoint in the module, and it's the one we want
43 // - The only input/output OpVariables present are those used by the entrypoint
45 Block::ID currentBlock;
46 InsnIterator blockStart;
48 for (auto insn : *this)
50 switch (insn.opcode())
52 case spv::OpExecutionMode:
53 ProcessExecutionMode(insn);
58 TypeOrObjectID targetId = insn.word(1);
59 auto decoration = static_cast<spv::Decoration>(insn.word(2));
60 decorations[targetId].Apply(
62 insn.wordCount() > 3 ? insn.word(3) : 0);
64 if (decoration == spv::DecorationCentroid)
65 modes.NeedsCentroid = true;
69 case spv::OpMemberDecorate:
71 Type::ID targetId = insn.word(1);
72 auto memberIndex = insn.word(2);
73 auto &d = memberDecorations[targetId];
74 if (memberIndex >= d.size())
75 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
76 auto decoration = static_cast<spv::Decoration>(insn.word(3));
79 insn.wordCount() > 4 ? insn.word(4) : 0);
81 if (decoration == spv::DecorationCentroid)
82 modes.NeedsCentroid = true;
86 case spv::OpDecorationGroup:
87 // Nothing to do here. We don't need to record the definition of the group; we'll just have
88 // the bundle of decorations float around. If we were to ever walk the decorations directly,
89 // we might think about introducing this as a real Object.
92 case spv::OpGroupDecorate:
94 auto const &srcDecorations = decorations[insn.word(1)];
95 for (auto i = 2u; i < insn.wordCount(); i++)
97 // remaining operands are targets to apply the group to.
98 decorations[insn.word(i)].Apply(srcDecorations);
103 case spv::OpGroupMemberDecorate:
105 auto const &srcDecorations = decorations[insn.word(1)];
106 for (auto i = 2u; i < insn.wordCount(); i += 2)
108 // remaining operands are pairs of <id>, literal for members to apply to.
109 auto &d = memberDecorations[insn.word(i)];
110 auto memberIndex = insn.word(i + 1);
111 if (memberIndex >= d.size())
112 d.resize(memberIndex + 1); // on demand resize, see above...
113 d[memberIndex].Apply(srcDecorations);
120 ASSERT(currentBlock.value() == 0);
121 currentBlock = Block::ID(insn.word(1));
126 // Branch Instructions (subset of Termination Instructions):
128 case spv::OpBranchConditional:
133 // Termination instruction:
135 case spv::OpUnreachable:
137 ASSERT(currentBlock.value() != 0);
138 auto blockEnd = insn; blockEnd++;
139 blocks[currentBlock] = Block(blockStart, blockEnd);
140 currentBlock = Block::ID(0);
142 if (insn.opcode() == spv::OpKill)
144 modes.ContainsKill = true;
149 case spv::OpSelectionMerge:
150 break; // Nothing to do in analysis pass.
152 case spv::OpTypeVoid:
153 case spv::OpTypeBool:
155 case spv::OpTypeFloat:
156 case spv::OpTypeVector:
157 case spv::OpTypeMatrix:
158 case spv::OpTypeImage:
159 case spv::OpTypeSampler:
160 case spv::OpTypeSampledImage:
161 case spv::OpTypeArray:
162 case spv::OpTypeRuntimeArray:
163 case spv::OpTypeStruct:
164 case spv::OpTypePointer:
165 case spv::OpTypeFunction:
169 case spv::OpVariable:
171 Type::ID typeId = insn.word(1);
172 Object::ID resultId = insn.word(2);
173 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
174 if (insn.wordCount() > 4)
175 UNIMPLEMENTED("Variable initializers not yet supported");
177 auto &object = defs[resultId];
178 object.kind = Object::Kind::Variable;
179 object.definition = insn;
180 object.type = typeId;
181 object.pointerBase = insn.word(2); // base is itself
183 ASSERT(getType(typeId).storageClass == storageClass);
185 switch (storageClass)
187 case spv::StorageClassInput:
188 case spv::StorageClassOutput:
189 ProcessInterfaceVariable(object);
191 case spv::StorageClassUniform:
192 case spv::StorageClassStorageBuffer:
193 case spv::StorageClassPushConstant:
194 object.kind = Object::Kind::PhysicalPointer;
197 case spv::StorageClassPrivate:
198 case spv::StorageClassFunction:
199 break; // Correctly handled.
201 case spv::StorageClassUniformConstant:
202 case spv::StorageClassWorkgroup:
203 case spv::StorageClassCrossWorkgroup:
204 case spv::StorageClassGeneric:
205 case spv::StorageClassAtomicCounter:
206 case spv::StorageClassImage:
207 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
211 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
217 case spv::OpConstant:
218 CreateConstant(insn).constantValue[0] = insn.word(3);
220 case spv::OpConstantFalse:
221 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
223 case spv::OpConstantTrue:
224 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
226 case spv::OpConstantNull:
229 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
230 // OpConstantNull forms a constant of arbitrary type, all zeros.
231 auto &object = CreateConstant(insn);
232 auto &objectTy = getType(object.type);
233 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
235 object.constantValue[i] = 0;
239 case spv::OpConstantComposite:
241 auto &object = CreateConstant(insn);
243 for (auto i = 0u; i < insn.wordCount() - 3; i++)
245 auto &constituent = getObject(insn.word(i + 3));
246 auto &constituentTy = getType(constituent.type);
247 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
248 object.constantValue[offset++] = constituent.constantValue[j];
251 auto objectId = Object::ID(insn.word(2));
252 auto decorationsIt = decorations.find(objectId);
253 if (decorationsIt != decorations.end() &&
254 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
256 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
257 // Decorating an object with the WorkgroupSize built-in
258 // decoration will make that object contain the dimensions
259 // of a local workgroup. If an object is decorated with the
260 // WorkgroupSize decoration, this must take precedence over
261 // any execution mode set for LocalSize.
262 // The object decorated with WorkgroupSize must be declared
263 // as a three-component vector of 32-bit integers.
264 ASSERT(getType(object.type).sizeInComponents == 3);
265 modes.WorkgroupSizeX = object.constantValue[0];
266 modes.WorkgroupSizeY = object.constantValue[1];
267 modes.WorkgroupSizeZ = object.constantValue[2];
272 case spv::OpCapability:
273 break; // Various capabilities will be declared, but none affect our code generation at this point.
274 case spv::OpMemoryModel:
275 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
277 case spv::OpEntryPoint:
279 case spv::OpFunction:
280 ASSERT(mainBlockId.value() == 0); // Multiple functions found
281 // Scan forward to find the function's label.
282 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
286 case spv::OpFunction:
287 case spv::OpFunctionParameter:
290 mainBlockId = Block::ID(it.word(1));
293 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
296 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
298 case spv::OpFunctionEnd:
299 // Due to preprocessing, the entrypoint and its function provide no value.
301 case spv::OpExtInstImport:
302 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
303 // Valid shaders will not attempt to import any other instruction sets.
304 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
306 UNIMPLEMENTED("Only GLSL extended instruction set is supported");
310 case spv::OpMemberName:
312 case spv::OpSourceContinued:
313 case spv::OpSourceExtension:
316 case spv::OpModuleProcessed:
318 // No semantic impact
321 case spv::OpFunctionParameter:
322 case spv::OpFunctionCall:
323 case spv::OpSpecConstant:
324 case spv::OpSpecConstantComposite:
325 case spv::OpSpecConstantFalse:
326 case spv::OpSpecConstantOp:
327 case spv::OpSpecConstantTrue:
328 // These should have all been removed by preprocessing passes. If we see them here,
329 // our assumptions are wrong and we will probably generate wrong code.
330 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
333 case spv::OpFConvert:
334 case spv::OpSConvert:
335 case spv::OpUConvert:
336 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
340 case spv::OpAccessChain:
341 case spv::OpInBoundsAccessChain:
342 case spv::OpCompositeConstruct:
343 case spv::OpCompositeInsert:
344 case spv::OpCompositeExtract:
345 case spv::OpVectorShuffle:
346 case spv::OpVectorTimesScalar:
347 case spv::OpVectorExtractDynamic:
348 case spv::OpVectorInsertDynamic:
349 case spv::OpNot: // Unary ops
352 case spv::OpLogicalNot:
353 case spv::OpIAdd: // Binary ops
364 case spv::OpFOrdEqual:
365 case spv::OpFUnordEqual:
366 case spv::OpFOrdNotEqual:
367 case spv::OpFUnordNotEqual:
368 case spv::OpFOrdLessThan:
369 case spv::OpFUnordLessThan:
370 case spv::OpFOrdGreaterThan:
371 case spv::OpFUnordGreaterThan:
372 case spv::OpFOrdLessThanEqual:
373 case spv::OpFUnordLessThanEqual:
374 case spv::OpFOrdGreaterThanEqual:
375 case spv::OpFUnordGreaterThanEqual:
380 case spv::OpINotEqual:
381 case spv::OpUGreaterThan:
382 case spv::OpSGreaterThan:
383 case spv::OpUGreaterThanEqual:
384 case spv::OpSGreaterThanEqual:
385 case spv::OpULessThan:
386 case spv::OpSLessThan:
387 case spv::OpULessThanEqual:
388 case spv::OpSLessThanEqual:
389 case spv::OpShiftRightLogical:
390 case spv::OpShiftRightArithmetic:
391 case spv::OpShiftLeftLogical:
392 case spv::OpBitwiseOr:
393 case spv::OpBitwiseXor:
394 case spv::OpBitwiseAnd:
395 case spv::OpLogicalOr:
396 case spv::OpLogicalAnd:
397 case spv::OpLogicalEqual:
398 case spv::OpLogicalNotEqual:
399 case spv::OpUMulExtended:
400 case spv::OpSMulExtended:
402 case spv::OpConvertFToU:
403 case spv::OpConvertFToS:
404 case spv::OpConvertSToF:
405 case spv::OpConvertUToF:
414 case spv::OpDPdxCoarse:
416 case spv::OpDPdyCoarse:
418 case spv::OpFwidthCoarse:
419 case spv::OpDPdxFine:
420 case spv::OpDPdyFine:
421 case spv::OpFwidthFine:
422 case spv::OpAtomicLoad:
424 // Instructions that yield an intermediate value
426 Type::ID typeId = insn.word(1);
427 Object::ID resultId = insn.word(2);
428 auto &object = defs[resultId];
429 object.type = typeId;
430 object.kind = Object::Kind::Value;
431 object.definition = insn;
433 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
435 // interior ptr has two parts:
436 // - logical base ptr, common across all lanes and known at compile time
438 Object::ID baseId = insn.word(3);
439 object.pointerBase = getObject(baseId).pointerBase;
445 case spv::OpAtomicStore:
446 // Don't need to do anything during analysis pass
450 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
454 // Assign all Block::ins
455 for (auto &it : blocks)
457 auto &blockId = it.first;
458 auto &block = it.second;
459 for (auto &outId : block.outs)
461 auto outIt = blocks.find(outId);
462 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
463 auto &out = outIt->second;
464 out.ins.emplace(blockId);
469 void SpirvShader::DeclareType(InsnIterator insn)
471 Type::ID resultId = insn.word(1);
473 auto &type = types[resultId];
474 type.definition = insn;
475 type.sizeInComponents = ComputeTypeSize(insn);
477 // A structure is a builtin block if it has a builtin
478 // member. All members of such a structure are builtins.
479 switch (insn.opcode())
481 case spv::OpTypeStruct:
483 auto d = memberDecorations.find(resultId);
484 if (d != memberDecorations.end())
486 for (auto &m : d->second)
490 type.isBuiltInBlock = true;
497 case spv::OpTypePointer:
499 Type::ID elementTypeId = insn.word(3);
500 type.element = elementTypeId;
501 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
502 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
505 case spv::OpTypeVector:
506 case spv::OpTypeMatrix:
507 case spv::OpTypeArray:
508 case spv::OpTypeRuntimeArray:
510 Type::ID elementTypeId = insn.word(2);
511 type.element = elementTypeId;
519 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
521 Type::ID typeId = insn.word(1);
522 Object::ID resultId = insn.word(2);
523 auto &object = defs[resultId];
524 auto &objectTy = getType(typeId);
525 object.type = typeId;
526 object.kind = Object::Kind::Constant;
527 object.definition = insn;
528 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
532 void SpirvShader::ProcessInterfaceVariable(Object &object)
534 auto &objectTy = getType(object.type);
535 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
537 ASSERT(objectTy.opcode() == spv::OpTypePointer);
538 auto pointeeTy = getType(objectTy.element);
540 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
541 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
543 ASSERT(object.opcode() == spv::OpVariable);
544 Object::ID resultId = object.definition.word(2);
546 if (objectTy.isBuiltInBlock)
548 // walk the builtin block, registering each of its members separately.
549 auto m = memberDecorations.find(objectTy.element);
550 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
551 auto &structType = pointeeTy.definition;
554 for (auto &member : m->second)
556 auto &memberType = getType(structType.word(word));
558 if (member.HasBuiltIn)
560 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
563 offset += memberType.sizeInComponents;
569 auto d = decorations.find(resultId);
570 if (d != decorations.end() && d->second.HasBuiltIn)
572 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
576 object.kind = Object::Kind::InterfaceVariable;
577 VisitInterface(resultId,
578 [&userDefinedInterface](Decorations const &d, AttribType type) {
579 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
580 auto scalarSlot = (d.Location << 2) | d.Component;
581 ASSERT(scalarSlot >= 0 &&
582 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
584 auto &slot = userDefinedInterface[scalarSlot];
587 slot.NoPerspective = d.NoPerspective;
588 slot.Centroid = d.Centroid;
593 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
595 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
598 case spv::ExecutionModeEarlyFragmentTests:
599 modes.EarlyFragmentTests = true;
601 case spv::ExecutionModeDepthReplacing:
602 modes.DepthReplacing = true;
604 case spv::ExecutionModeDepthGreater:
605 modes.DepthGreater = true;
607 case spv::ExecutionModeDepthLess:
608 modes.DepthLess = true;
610 case spv::ExecutionModeDepthUnchanged:
611 modes.DepthUnchanged = true;
613 case spv::ExecutionModeLocalSize:
614 modes.WorkgroupSizeX = insn.word(3);
615 modes.WorkgroupSizeY = insn.word(4);
616 modes.WorkgroupSizeZ = insn.word(5);
618 case spv::ExecutionModeOriginUpperLeft:
619 // This is always the case for a Vulkan shader. Do nothing.
622 UNIMPLEMENTED("No other execution modes are permitted");
626 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
628 // Types are always built from the bottom up (with the exception of forward ptrs, which
629 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
630 // already been described (and so their sizes determined)
631 switch (insn.opcode())
633 case spv::OpTypeVoid:
634 case spv::OpTypeSampler:
635 case spv::OpTypeImage:
636 case spv::OpTypeSampledImage:
637 case spv::OpTypeFunction:
638 case spv::OpTypeRuntimeArray:
639 // Objects that don't consume any space.
640 // Descriptor-backed objects currently only need exist at compile-time.
641 // Runtime arrays don't appear in places where their size would be interesting
644 case spv::OpTypeBool:
645 case spv::OpTypeFloat:
647 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
648 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
651 case spv::OpTypeVector:
652 case spv::OpTypeMatrix:
653 // Vectors and matrices both consume element count * element size.
654 return getType(insn.word(2)).sizeInComponents * insn.word(3);
656 case spv::OpTypeArray:
658 // Element count * element size. Array sizes come from constant ids.
659 auto arraySize = GetConstantInt(insn.word(3));
660 return getType(insn.word(2)).sizeInComponents * arraySize;
663 case spv::OpTypeStruct:
666 for (uint32_t i = 2u; i < insn.wordCount(); i++)
668 size += getType(insn.word(i)).sizeInComponents;
673 case spv::OpTypePointer:
674 // Runtime representation of a pointer is a per-lane index.
675 // Note: clients are expected to look through the pointer if they want the pointee size instead.
679 // Some other random insn.
680 UNIMPLEMENTED("Only types are supported");
685 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
687 switch (storageClass)
689 case spv::StorageClassUniform:
690 case spv::StorageClassStorageBuffer:
691 case spv::StorageClassPushConstant:
699 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
701 // Recursively walks variable definition and its type tree, taking into account
702 // any explicit Location or Component decorations encountered; where explicit
703 // Locations or Components are not specified, assigns them sequentially.
704 // Collected decorations are carried down toward the leaves and across
705 // siblings; Effect of decorations intentionally does not flow back up the tree.
707 // F is a functor to be called with the effective decoration set for every component.
709 // Returns the next available location, and calls f().
711 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
713 ApplyDecorationsForId(&d, id);
715 auto const &obj = getType(id);
718 case spv::OpTypePointer:
719 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
720 case spv::OpTypeMatrix:
721 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
723 // consumes same components of N consecutive locations
724 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
727 case spv::OpTypeVector:
728 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
730 // consumes N consecutive components in the same location
731 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
733 return d.Location + 1;
734 case spv::OpTypeFloat:
735 f(d, ATTRIBTYPE_FLOAT);
736 return d.Location + 1;
738 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
739 return d.Location + 1;
740 case spv::OpTypeBool:
741 f(d, ATTRIBTYPE_UINT);
742 return d.Location + 1;
743 case spv::OpTypeStruct:
745 // iterate over members, which may themselves have Location/Component decorations
746 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
748 ApplyDecorationsForIdMember(&d, id, i);
749 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
750 d.Component = 0; // Implicit locations always have component=0
754 case spv::OpTypeArray:
756 auto arraySize = GetConstantInt(obj.definition.word(3));
757 for (auto i = 0u; i < arraySize; i++)
759 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
764 // Intentionally partial; most opcodes do not participate in type hierarchies
770 void SpirvShader::VisitInterface(Object::ID id, F f) const
772 // Walk a variable definition and call f for each component in it.
774 ApplyDecorationsForId(&d, id);
776 auto def = getObject(id).definition;
777 ASSERT(def.opcode() == spv::OpVariable);
778 VisitInterfaceInner<F>(def.word(1), d, f);
781 SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
783 // Produce a offset into external memory in sizeof(float) units
785 int constantOffset = 0;
786 SIMD::Int dynamicOffset = SIMD::Int(0);
787 auto &baseObject = getObject(id);
788 Type::ID typeId = getType(baseObject.type).element;
790 ApplyDecorationsForId(&d, baseObject.type);
792 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
793 // Start with its offset and build from there.
794 if (baseObject.kind == Object::Kind::Value)
796 dynamicOffset += routine->getIntermediate(id).Int(0);
799 for (auto i = 0u; i < numIndexes; i++)
801 auto & type = getType(typeId);
802 switch (type.definition.opcode())
804 case spv::OpTypeStruct:
806 int memberIndex = GetConstantInt(indexIds[i]);
807 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
809 constantOffset += d.Offset / sizeof(float);
810 typeId = type.definition.word(2u + memberIndex);
813 case spv::OpTypeArray:
814 case spv::OpTypeRuntimeArray:
816 // TODO: b/127950082: Check bounds.
817 ApplyDecorationsForId(&d, typeId);
818 ASSERT(d.HasArrayStride);
819 auto & obj = getObject(indexIds[i]);
820 if (obj.kind == Object::Kind::Constant)
821 constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
823 dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
824 typeId = type.element;
827 case spv::OpTypeMatrix:
829 // TODO: b/127950082: Check bounds.
830 ApplyDecorationsForId(&d, typeId);
831 ASSERT(d.HasMatrixStride);
832 auto & obj = getObject(indexIds[i]);
833 if (obj.kind == Object::Kind::Constant)
834 constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
836 dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
837 typeId = type.element;
840 case spv::OpTypeVector:
842 auto & obj = getObject(indexIds[i]);
843 if (obj.kind == Object::Kind::Constant)
844 constantOffset += GetConstantInt(indexIds[i]);
846 dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
847 typeId = type.element;
851 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
855 return dynamicOffset + SIMD::Int(constantOffset);
858 SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
860 // TODO: avoid doing per-lane work in some cases if we can?
861 // Produce a *component* offset into location-oriented memory
863 int constantOffset = 0;
864 SIMD::Int dynamicOffset = SIMD::Int(0);
865 auto &baseObject = getObject(id);
866 Type::ID typeId = getType(baseObject.type).element;
868 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
869 // Start with its offset and build from there.
870 if (baseObject.kind == Object::Kind::Value)
872 dynamicOffset += routine->getIntermediate(id).Int(0);
875 for (auto i = 0u; i < numIndexes; i++)
877 auto & type = getType(typeId);
878 switch(type.opcode())
880 case spv::OpTypeStruct:
882 int memberIndex = GetConstantInt(indexIds[i]);
883 int offsetIntoStruct = 0;
884 for (auto j = 0; j < memberIndex; j++) {
885 auto memberType = type.definition.word(2u + j);
886 offsetIntoStruct += getType(memberType).sizeInComponents;
888 constantOffset += offsetIntoStruct;
889 typeId = type.definition.word(2u + memberIndex);
893 case spv::OpTypeVector:
894 case spv::OpTypeMatrix:
895 case spv::OpTypeArray:
896 case spv::OpTypeRuntimeArray:
898 // TODO: b/127950082: Check bounds.
899 auto stride = getType(type.element).sizeInComponents;
900 auto & obj = getObject(indexIds[i]);
901 if (obj.kind == Object::Kind::Constant)
902 constantOffset += stride * GetConstantInt(indexIds[i]);
904 dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
905 typeId = type.element;
910 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
914 return dynamicOffset + SIMD::Int(constantOffset);
917 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
919 uint32_t constantOffset = 0;
921 for (auto i = 0u; i < numIndexes; i++)
923 auto & type = getType(typeId);
924 switch(type.opcode())
926 case spv::OpTypeStruct:
928 int memberIndex = indexes[i];
929 int offsetIntoStruct = 0;
930 for (auto j = 0; j < memberIndex; j++) {
931 auto memberType = type.definition.word(2u + j);
932 offsetIntoStruct += getType(memberType).sizeInComponents;
934 constantOffset += offsetIntoStruct;
935 typeId = type.definition.word(2u + memberIndex);
939 case spv::OpTypeVector:
940 case spv::OpTypeMatrix:
941 case spv::OpTypeArray:
943 auto elementType = type.definition.word(2);
944 auto stride = getType(elementType).sizeInComponents;
945 constantOffset += stride * indexes[i];
946 typeId = elementType;
951 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
955 return constantOffset;
958 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
962 case spv::DecorationLocation:
964 Location = static_cast<int32_t>(arg);
966 case spv::DecorationComponent:
970 case spv::DecorationDescriptorSet:
971 HasDescriptorSet = true;
974 case spv::DecorationBinding:
978 case spv::DecorationBuiltIn:
980 BuiltIn = static_cast<spv::BuiltIn>(arg);
982 case spv::DecorationFlat:
985 case spv::DecorationNoPerspective:
986 NoPerspective = true;
988 case spv::DecorationCentroid:
991 case spv::DecorationBlock:
994 case spv::DecorationBufferBlock:
997 case spv::DecorationOffset:
999 Offset = static_cast<int32_t>(arg);
1001 case spv::DecorationArrayStride:
1002 HasArrayStride = true;
1003 ArrayStride = static_cast<int32_t>(arg);
1005 case spv::DecorationMatrixStride:
1006 HasMatrixStride = true;
1007 MatrixStride = static_cast<int32_t>(arg);
1010 // Intentionally partial, there are many decorations we just don't care about.
1015 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1017 // Apply a decoration group to this set of decorations
1021 BuiltIn = src.BuiltIn;
1024 if (src.HasLocation)
1027 Location = src.Location;
1030 if (src.HasComponent)
1032 HasComponent = true;
1033 Component = src.Component;
1036 if (src.HasDescriptorSet)
1038 HasDescriptorSet = true;
1039 DescriptorSet = src.DescriptorSet;
1045 Binding = src.Binding;
1051 Offset = src.Offset;
1054 if (src.HasArrayStride)
1056 HasArrayStride = true;
1057 ArrayStride = src.ArrayStride;
1060 if (src.HasMatrixStride)
1062 HasMatrixStride = true;
1063 MatrixStride = src.MatrixStride;
1067 NoPerspective |= src.NoPerspective;
1068 Centroid |= src.Centroid;
1070 BufferBlock |= src.BufferBlock;
1073 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1075 auto it = decorations.find(id);
1076 if (it != decorations.end())
1077 d->Apply(it->second);
1080 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1082 auto it = memberDecorations.find(id);
1083 if (it != memberDecorations.end() && member < it->second.size())
1085 d->Apply(it->second[member]);
1089 uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1091 // Slightly hackish access to constants very early in translation.
1092 // General consumption of constants by other instructions should
1093 // probably be just lowered to Reactor.
1095 // TODO: not encountered yet since we only use this for array sizes etc,
1096 // but is possible to construct integer constant 0 via OpConstantNull.
1097 auto insn = getObject(id).definition;
1098 ASSERT(insn.opcode() == spv::OpConstant);
1099 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1100 return insn.word(3);
1105 void SpirvShader::emitProlog(SpirvRoutine *routine) const
1107 for (auto insn : *this)
1109 switch (insn.opcode())
1111 case spv::OpVariable:
1113 Type::ID resultPointerTypeId = insn.word(1);
1114 auto resultPointerType = getType(resultPointerTypeId);
1115 auto pointeeType = getType(resultPointerType.element);
1117 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
1119 Object::ID resultId = insn.word(2);
1120 routine->createLvalue(resultId, pointeeType.sizeInComponents);
1125 // Nothing else produces interface variables, so can all be safely ignored.
1131 void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1134 state.setActiveLaneMask(activeLaneMask);
1135 state.routine = routine;
1137 // Emit everything up to the first label
1138 // TODO: Separate out dispatch of block from non-block instructions?
1139 for (auto insn : *this)
1141 if (insn.opcode() == spv::OpLabel)
1145 EmitInstruction(insn, &state);
1148 // Emit all the blocks in BFS order, starting with the main block.
1149 std::queue<Block::ID> pending;
1150 pending.push(mainBlockId);
1151 while (pending.size() > 0)
1153 auto id = pending.front();
1155 if (state.visited.count(id) == 0)
1157 EmitBlock(id, &state);
1158 for (auto it : getBlock(id).outs)
1166 void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1168 if (state->visited.count(id) > 0)
1170 return; // Already processed this block.
1173 state->visited.emplace(id);
1175 auto &block = getBlock(id);
1180 case Block::StructuredBranchConditional:
1181 case Block::UnstructuredBranchConditional:
1182 case Block::StructuredSwitch:
1183 case Block::UnstructuredSwitch:
1184 if (id != mainBlockId)
1186 // Emit all preceeding blocks and set the activeLaneMask.
1187 Intermediate activeLaneMask(1);
1188 activeLaneMask.move(0, SIMD::Int(0));
1189 for (auto in : block.ins)
1191 EmitBlock(in, state);
1192 auto inMask = state->getActiveLaneMaskEdge(in, id);
1193 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1195 state->setActiveLaneMask(activeLaneMask.Int(0));
1197 state->currentBlock = id;
1198 EmitInstructions(block.begin(), block.end(), state);
1202 UNIMPLEMENTED("Unhandled Block Kind: %d", int(block.kind));
1206 void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1208 for (auto insn = begin; insn != end; insn++)
1210 auto res = EmitInstruction(insn, state);
1213 case EmitResult::Continue:
1215 case EmitResult::Terminator:
1218 UNREACHABLE("Unexpected EmitResult %d", int(res));
1224 SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1226 switch (insn.opcode())
1228 case spv::OpTypeVoid:
1229 case spv::OpTypeInt:
1230 case spv::OpTypeFloat:
1231 case spv::OpTypeBool:
1232 case spv::OpTypeVector:
1233 case spv::OpTypeArray:
1234 case spv::OpTypeRuntimeArray:
1235 case spv::OpTypeMatrix:
1236 case spv::OpTypeStruct:
1237 case spv::OpTypePointer:
1238 case spv::OpTypeFunction:
1239 case spv::OpExecutionMode:
1240 case spv::OpMemoryModel:
1241 case spv::OpFunction:
1242 case spv::OpFunctionEnd:
1243 case spv::OpConstant:
1244 case spv::OpConstantNull:
1245 case spv::OpConstantTrue:
1246 case spv::OpConstantFalse:
1247 case spv::OpConstantComposite:
1249 case spv::OpExtension:
1250 case spv::OpCapability:
1251 case spv::OpEntryPoint:
1252 case spv::OpExtInstImport:
1253 case spv::OpDecorate:
1254 case spv::OpMemberDecorate:
1255 case spv::OpGroupDecorate:
1256 case spv::OpGroupMemberDecorate:
1257 case spv::OpDecorationGroup:
1259 case spv::OpMemberName:
1261 case spv::OpSourceContinued:
1262 case spv::OpSourceExtension:
1265 case spv::OpModuleProcessed:
1267 // Nothing to do at emit time. These are either fully handled at analysis time,
1268 // or don't require any work at all.
1269 return EmitResult::Continue;
1272 return EmitResult::Continue;
1274 case spv::OpVariable:
1275 return EmitVariable(insn, state);
1278 case spv::OpAtomicLoad:
1279 return EmitLoad(insn, state);
1282 case spv::OpAtomicStore:
1283 return EmitStore(insn, state);
1285 case spv::OpAccessChain:
1286 case spv::OpInBoundsAccessChain:
1287 return EmitAccessChain(insn, state);
1289 case spv::OpCompositeConstruct:
1290 return EmitCompositeConstruct(insn, state);
1292 case spv::OpCompositeInsert:
1293 return EmitCompositeInsert(insn, state);
1295 case spv::OpCompositeExtract:
1296 return EmitCompositeExtract(insn, state);
1298 case spv::OpVectorShuffle:
1299 return EmitVectorShuffle(insn, state);
1301 case spv::OpVectorExtractDynamic:
1302 return EmitVectorExtractDynamic(insn, state);
1304 case spv::OpVectorInsertDynamic:
1305 return EmitVectorInsertDynamic(insn, state);
1307 case spv::OpVectorTimesScalar:
1308 return EmitVectorTimesScalar(insn, state);
1311 case spv::OpSNegate:
1312 case spv::OpFNegate:
1313 case spv::OpLogicalNot:
1314 case spv::OpConvertFToU:
1315 case spv::OpConvertFToS:
1316 case spv::OpConvertSToF:
1317 case spv::OpConvertUToF:
1318 case spv::OpBitcast:
1322 case spv::OpDPdxCoarse:
1324 case spv::OpDPdyCoarse:
1326 case spv::OpFwidthCoarse:
1327 case spv::OpDPdxFine:
1328 case spv::OpDPdyFine:
1329 case spv::OpFwidthFine:
1330 return EmitUnaryOp(insn, state);
1343 case spv::OpFOrdEqual:
1344 case spv::OpFUnordEqual:
1345 case spv::OpFOrdNotEqual:
1346 case spv::OpFUnordNotEqual:
1347 case spv::OpFOrdLessThan:
1348 case spv::OpFUnordLessThan:
1349 case spv::OpFOrdGreaterThan:
1350 case spv::OpFUnordGreaterThan:
1351 case spv::OpFOrdLessThanEqual:
1352 case spv::OpFUnordLessThanEqual:
1353 case spv::OpFOrdGreaterThanEqual:
1354 case spv::OpFUnordGreaterThanEqual:
1359 case spv::OpINotEqual:
1360 case spv::OpUGreaterThan:
1361 case spv::OpSGreaterThan:
1362 case spv::OpUGreaterThanEqual:
1363 case spv::OpSGreaterThanEqual:
1364 case spv::OpULessThan:
1365 case spv::OpSLessThan:
1366 case spv::OpULessThanEqual:
1367 case spv::OpSLessThanEqual:
1368 case spv::OpShiftRightLogical:
1369 case spv::OpShiftRightArithmetic:
1370 case spv::OpShiftLeftLogical:
1371 case spv::OpBitwiseOr:
1372 case spv::OpBitwiseXor:
1373 case spv::OpBitwiseAnd:
1374 case spv::OpLogicalOr:
1375 case spv::OpLogicalAnd:
1376 case spv::OpLogicalEqual:
1377 case spv::OpLogicalNotEqual:
1378 case spv::OpUMulExtended:
1379 case spv::OpSMulExtended:
1380 return EmitBinaryOp(insn, state);
1383 return EmitDot(insn, state);
1386 return EmitSelect(insn, state);
1388 case spv::OpExtInst:
1389 return EmitExtendedInstruction(insn, state);
1392 return EmitAny(insn, state);
1395 return EmitAll(insn, state);
1398 return EmitBranch(insn, state);
1401 return EmitPhi(insn, state);
1403 case spv::OpSelectionMerge:
1404 return EmitResult::Continue;
1406 case spv::OpBranchConditional:
1407 return EmitBranchConditional(insn, state);
1410 return EmitSwitch(insn, state);
1412 case spv::OpUnreachable:
1413 return EmitUnreachable(insn, state);
1416 return EmitReturn(insn, state);
1419 UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1423 return EmitResult::Continue;
1426 SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1428 auto routine = state->routine;
1429 Object::ID resultId = insn.word(2);
1430 auto &object = getObject(resultId);
1431 auto &objectTy = getType(object.type);
1432 switch (objectTy.storageClass)
1434 case spv::StorageClassInput:
1436 if (object.kind == Object::Kind::InterfaceVariable)
1438 auto &dst = routine->getValue(resultId);
1440 VisitInterface(resultId,
1441 [&](Decorations const &d, AttribType type) {
1442 auto scalarSlot = d.Location << 2 | d.Component;
1443 dst[offset++] = routine->inputs[scalarSlot];
1448 case spv::StorageClassUniform:
1449 case spv::StorageClassStorageBuffer:
1452 ApplyDecorationsForId(&d, resultId);
1453 ASSERT(d.DescriptorSet >= 0);
1454 ASSERT(d.Binding >= 0);
1456 size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1458 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1459 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1460 Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1461 Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1462 Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1463 Pointer<Byte> address = data + offset;
1464 routine->physicalPointers[resultId] = address;
1467 case spv::StorageClassPushConstant:
1469 routine->physicalPointers[resultId] = routine->pushConstants;
1476 return EmitResult::Continue;
1479 SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1481 auto routine = state->routine;
1482 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1483 Object::ID resultId = insn.word(2);
1484 Object::ID pointerId = insn.word(3);
1485 auto &result = getObject(resultId);
1486 auto &resultTy = getType(result.type);
1487 auto &pointer = getObject(pointerId);
1488 auto &pointerBase = getObject(pointer.pointerBase);
1489 auto &pointerBaseTy = getType(pointerBase.type);
1490 std::memory_order memoryOrder = std::memory_order_relaxed;
1494 Object::ID semanticsId = insn.word(5);
1495 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1496 memoryOrder = MemoryOrder(memorySemantics);
1499 ASSERT(getType(pointer.type).element == result.type);
1500 ASSERT(Type::ID(insn.word(1)) == result.type);
1501 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1503 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1505 UNIMPLEMENTED("StorageClassImage load not yet implemented");
1508 Pointer<Float> ptrBase;
1509 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1511 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1515 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1518 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1519 auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
1521 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1523 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1525 // Divergent offsets or masked lanes.
1526 auto offsets = pointer.kind == Object::Kind::Value ?
1527 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1528 RValue<SIMD::Int>(SIMD::Int(0));
1529 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1531 // i wish i had a Float,Float,Float,Float constructor here..
1532 for (int j = 0; j < SIMD::Width; j++)
1534 If(Extract(state->activeLaneMask(), j) != 0)
1536 Int offset = Int(i) + Extract(offsets, j);
1537 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1538 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1545 // No divergent offsets or masked lanes.
1546 if (interleavedByLane)
1548 // Lane-interleaved data.
1549 Pointer<SIMD::Float> src = ptrBase;
1550 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1552 load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1557 // Non-interleaved data.
1558 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1560 load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder)); // TODO: optimize alignment
1565 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1566 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1568 dst.move(i, load[i]);
1571 return EmitResult::Continue;
1574 SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1576 auto routine = state->routine;
1577 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1578 Object::ID pointerId = insn.word(1);
1579 Object::ID objectId = insn.word(atomic ? 4 : 2);
1580 auto &object = getObject(objectId);
1581 auto &pointer = getObject(pointerId);
1582 auto &pointerTy = getType(pointer.type);
1583 auto &elementTy = getType(pointerTy.element);
1584 auto &pointerBase = getObject(pointer.pointerBase);
1585 auto &pointerBaseTy = getType(pointerBase.type);
1586 std::memory_order memoryOrder = std::memory_order_relaxed;
1590 Object::ID semanticsId = insn.word(3);
1591 auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1592 memoryOrder = MemoryOrder(memorySemantics);
1595 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt); // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1597 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1599 UNIMPLEMENTED("StorageClassImage store not yet implemented");
1602 Pointer<Float> ptrBase;
1603 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1605 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1609 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1612 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1613 auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
1615 if (object.kind == Object::Kind::Constant)
1617 // Constant source data.
1618 auto src = reinterpret_cast<float *>(object.constantValue.get());
1619 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1621 // Divergent offsets or masked lanes.
1622 auto offsets = pointer.kind == Object::Kind::Value ?
1623 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1624 RValue<SIMD::Int>(SIMD::Int(0));
1625 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1627 for (int j = 0; j < SIMD::Width; j++)
1629 If(Extract(state->activeLaneMask(), j) != 0)
1631 Int offset = Int(i) + Extract(offsets, j);
1632 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1633 Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1640 // Constant source data.
1641 // No divergent offsets or masked lanes.
1642 Pointer<SIMD::Float> dst = ptrBase;
1643 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1645 Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1651 // Intermediate source data.
1652 auto &src = routine->getIntermediate(objectId);
1653 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1655 // Divergent offsets or masked lanes.
1656 auto offsets = pointer.kind == Object::Kind::Value ?
1657 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1658 RValue<SIMD::Int>(SIMD::Int(0));
1659 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1661 for (int j = 0; j < SIMD::Width; j++)
1663 If(Extract(state->activeLaneMask(), j) != 0)
1665 Int offset = Int(i) + Extract(offsets, j);
1666 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1667 Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1674 // No divergent offsets or masked lanes.
1675 if (interleavedByLane)
1677 // Lane-interleaved data.
1678 Pointer<SIMD::Float> dst = ptrBase;
1679 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1681 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1686 // Intermediate source data. Non-interleaved data.
1687 Pointer<SIMD::Float> dst = ptrBase;
1688 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1690 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder); // TODO: optimize alignment
1696 return EmitResult::Continue;
1699 SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1701 auto routine = state->routine;
1702 Type::ID typeId = insn.word(1);
1703 Object::ID resultId = insn.word(2);
1704 Object::ID baseId = insn.word(3);
1705 uint32_t numIndexes = insn.wordCount() - 4;
1706 const uint32_t *indexes = insn.wordPointer(4);
1707 auto &type = getType(typeId);
1708 ASSERT(type.sizeInComponents == 1);
1709 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1711 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1713 if(type.storageClass == spv::StorageClassPushConstant ||
1714 type.storageClass == spv::StorageClassUniform ||
1715 type.storageClass == spv::StorageClassStorageBuffer)
1717 dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1721 dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1724 return EmitResult::Continue;
1727 SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1729 auto routine = state->routine;
1730 auto &type = getType(insn.word(1));
1731 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1734 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1736 Object::ID srcObjectId = insn.word(3u + i);
1737 auto & srcObject = getObject(srcObjectId);
1738 auto & srcObjectTy = getType(srcObject.type);
1739 GenericValue srcObjectAccess(this, routine, srcObjectId);
1741 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1743 dst.move(offset++, srcObjectAccess.Float(j));
1747 return EmitResult::Continue;
1750 SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1752 auto routine = state->routine;
1753 Type::ID resultTypeId = insn.word(1);
1754 auto &type = getType(resultTypeId);
1755 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1756 auto &newPartObject = getObject(insn.word(3));
1757 auto &newPartObjectTy = getType(newPartObject.type);
1758 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1760 GenericValue srcObjectAccess(this, routine, insn.word(4));
1761 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1763 // old components before
1764 for (auto i = 0u; i < firstNewComponent; i++)
1766 dst.move(i, srcObjectAccess.Float(i));
1769 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1771 dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1773 // old components after
1774 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1776 dst.move(i, srcObjectAccess.Float(i));
1779 return EmitResult::Continue;
1782 SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1784 auto routine = state->routine;
1785 auto &type = getType(insn.word(1));
1786 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1787 auto &compositeObject = getObject(insn.word(3));
1788 Type::ID compositeTypeId = compositeObject.definition.word(1);
1789 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1791 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1792 for (auto i = 0u; i < type.sizeInComponents; i++)
1794 dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1797 return EmitResult::Continue;
1800 SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1802 auto routine = state->routine;
1803 auto &type = getType(insn.word(1));
1804 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1806 // Note: number of components in result type, first half type, and second
1807 // half type are all independent.
1808 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1810 GenericValue firstHalfAccess(this, routine, insn.word(3));
1811 GenericValue secondHalfAccess(this, routine, insn.word(4));
1813 for (auto i = 0u; i < type.sizeInComponents; i++)
1815 auto selector = insn.word(5 + i);
1816 if (selector == static_cast<uint32_t>(-1))
1818 // Undefined value. Until we decide to do real undef values, zero is as good
1820 dst.move(i, RValue<SIMD::Float>(0.0f));
1822 else if (selector < firstHalfType.sizeInComponents)
1824 dst.move(i, firstHalfAccess.Float(selector));
1828 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
1832 return EmitResult::Continue;
1835 SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
1837 auto routine = state->routine;
1838 auto &type = getType(insn.word(1));
1839 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1840 auto &srcType = getType(getObject(insn.word(3)).type);
1842 GenericValue src(this, routine, insn.word(3));
1843 GenericValue index(this, routine, insn.word(4));
1845 SIMD::UInt v = SIMD::UInt(0);
1847 for (auto i = 0u; i < srcType.sizeInComponents; i++)
1849 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
1853 return EmitResult::Continue;
1856 SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
1858 auto routine = state->routine;
1859 auto &type = getType(insn.word(1));
1860 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1862 GenericValue src(this, routine, insn.word(3));
1863 GenericValue component(this, routine, insn.word(4));
1864 GenericValue index(this, routine, insn.word(5));
1866 for (auto i = 0u; i < type.sizeInComponents; i++)
1868 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
1869 dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
1871 return EmitResult::Continue;
1874 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
1876 auto routine = state->routine;
1877 auto &type = getType(insn.word(1));
1878 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1879 auto lhs = GenericValue(this, routine, insn.word(3));
1880 auto rhs = GenericValue(this, routine, insn.word(4));
1882 for (auto i = 0u; i < type.sizeInComponents; i++)
1884 dst.move(i, lhs.Float(i) * rhs.Float(0));
1887 return EmitResult::Continue;
1890 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
1892 auto routine = state->routine;
1893 auto &type = getType(insn.word(1));
1894 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1895 auto src = GenericValue(this, routine, insn.word(3));
1897 for (auto i = 0u; i < type.sizeInComponents; i++)
1899 switch (insn.opcode())
1902 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
1903 dst.move(i, ~src.UInt(i));
1905 case spv::OpSNegate:
1906 dst.move(i, -src.Int(i));
1908 case spv::OpFNegate:
1909 dst.move(i, -src.Float(i));
1911 case spv::OpConvertFToU:
1912 dst.move(i, SIMD::UInt(src.Float(i)));
1914 case spv::OpConvertFToS:
1915 dst.move(i, SIMD::Int(src.Float(i)));
1917 case spv::OpConvertSToF:
1918 dst.move(i, SIMD::Float(src.Int(i)));
1920 case spv::OpConvertUToF:
1921 dst.move(i, SIMD::Float(src.UInt(i)));
1923 case spv::OpBitcast:
1924 dst.move(i, src.Float(i));
1927 dst.move(i, IsInf(src.Float(i)));
1930 dst.move(i, IsNan(src.Float(i)));
1933 case spv::OpDPdxCoarse:
1934 // Derivative instructions: FS invocations are laid out like so:
1937 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
1938 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
1941 case spv::OpDPdyCoarse:
1942 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
1945 case spv::OpFwidthCoarse:
1946 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
1947 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
1949 case spv::OpDPdxFine:
1951 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1952 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1953 SIMD::Float v = SIMD::Float(firstRow);
1954 v = Insert(v, secondRow, 2);
1955 v = Insert(v, secondRow, 3);
1959 case spv::OpDPdyFine:
1961 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1962 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1963 SIMD::Float v = SIMD::Float(firstColumn);
1964 v = Insert(v, secondColumn, 1);
1965 v = Insert(v, secondColumn, 3);
1969 case spv::OpFwidthFine:
1971 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1972 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1973 SIMD::Float dpdx = SIMD::Float(firstRow);
1974 dpdx = Insert(dpdx, secondRow, 2);
1975 dpdx = Insert(dpdx, secondRow, 3);
1976 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1977 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1978 SIMD::Float dpdy = SIMD::Float(firstColumn);
1979 dpdy = Insert(dpdy, secondColumn, 1);
1980 dpdy = Insert(dpdy, secondColumn, 3);
1981 dst.move(i, Abs(dpdx) + Abs(dpdy));
1985 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
1989 return EmitResult::Continue;
1992 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
1994 auto routine = state->routine;
1995 auto &type = getType(insn.word(1));
1996 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1997 auto &lhsType = getType(getObject(insn.word(3)).type);
1998 auto lhs = GenericValue(this, routine, insn.word(3));
1999 auto rhs = GenericValue(this, routine, insn.word(4));
2001 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2003 switch (insn.opcode())
2006 dst.move(i, lhs.Int(i) + rhs.Int(i));
2009 dst.move(i, lhs.Int(i) - rhs.Int(i));
2012 dst.move(i, lhs.Int(i) * rhs.Int(i));
2016 SIMD::Int a = lhs.Int(i);
2017 SIMD::Int b = rhs.Int(i);
2018 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2019 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2025 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2026 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2031 SIMD::Int a = lhs.Int(i);
2032 SIMD::Int b = rhs.Int(i);
2033 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2034 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2040 SIMD::Int a = lhs.Int(i);
2041 SIMD::Int b = rhs.Int(i);
2042 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2043 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2045 // If a and b have opposite signs, the remainder operation takes
2046 // the sign from a but OpSMod is supposed to take the sign of b.
2047 // Adding b will ensure that the result has the correct sign and
2048 // that it is still congruent to a modulo b.
2050 // See also http://mathforum.org/library/drmath/view/52343.html
2051 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2052 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2053 dst.move(i, As<SIMD::Float>(fixedMod));
2058 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2059 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2063 case spv::OpLogicalEqual:
2064 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2066 case spv::OpINotEqual:
2067 case spv::OpLogicalNotEqual:
2068 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2070 case spv::OpUGreaterThan:
2071 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2073 case spv::OpSGreaterThan:
2074 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2076 case spv::OpUGreaterThanEqual:
2077 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2079 case spv::OpSGreaterThanEqual:
2080 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2082 case spv::OpULessThan:
2083 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2085 case spv::OpSLessThan:
2086 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2088 case spv::OpULessThanEqual:
2089 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2091 case spv::OpSLessThanEqual:
2092 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2095 dst.move(i, lhs.Float(i) + rhs.Float(i));
2098 dst.move(i, lhs.Float(i) - rhs.Float(i));
2101 dst.move(i, lhs.Float(i) * rhs.Float(i));
2104 dst.move(i, lhs.Float(i) / rhs.Float(i));
2107 // TODO(b/126873455): inaccurate for values greater than 2^24
2108 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2111 dst.move(i, lhs.Float(i) % rhs.Float(i));
2113 case spv::OpFOrdEqual:
2114 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2116 case spv::OpFUnordEqual:
2117 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2119 case spv::OpFOrdNotEqual:
2120 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2122 case spv::OpFUnordNotEqual:
2123 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2125 case spv::OpFOrdLessThan:
2126 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2128 case spv::OpFUnordLessThan:
2129 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2131 case spv::OpFOrdGreaterThan:
2132 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2134 case spv::OpFUnordGreaterThan:
2135 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2137 case spv::OpFOrdLessThanEqual:
2138 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2140 case spv::OpFUnordLessThanEqual:
2141 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2143 case spv::OpFOrdGreaterThanEqual:
2144 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2146 case spv::OpFUnordGreaterThanEqual:
2147 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2149 case spv::OpShiftRightLogical:
2150 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2152 case spv::OpShiftRightArithmetic:
2153 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2155 case spv::OpShiftLeftLogical:
2156 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2158 case spv::OpBitwiseOr:
2159 case spv::OpLogicalOr:
2160 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2162 case spv::OpBitwiseXor:
2163 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2165 case spv::OpBitwiseAnd:
2166 case spv::OpLogicalAnd:
2167 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2169 case spv::OpSMulExtended:
2170 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2171 // In our flat view then, component i is the i'th component of the first member;
2172 // component i + N is the i'th component of the second member.
2173 dst.move(i, lhs.Int(i) * rhs.Int(i));
2174 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2176 case spv::OpUMulExtended:
2177 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2178 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2181 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2185 return EmitResult::Continue;
2188 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2190 auto routine = state->routine;
2191 auto &type = getType(insn.word(1));
2192 ASSERT(type.sizeInComponents == 1);
2193 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2194 auto &lhsType = getType(getObject(insn.word(3)).type);
2195 auto lhs = GenericValue(this, routine, insn.word(3));
2196 auto rhs = GenericValue(this, routine, insn.word(4));
2198 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2199 return EmitResult::Continue;
2202 SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2204 auto routine = state->routine;
2205 auto &type = getType(insn.word(1));
2206 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2207 auto cond = GenericValue(this, routine, insn.word(3));
2208 auto lhs = GenericValue(this, routine, insn.word(4));
2209 auto rhs = GenericValue(this, routine, insn.word(5));
2211 for (auto i = 0u; i < type.sizeInComponents; i++)
2213 dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i))); // FIXME: IfThenElse()
2216 return EmitResult::Continue;
2219 SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2221 auto routine = state->routine;
2222 auto &type = getType(insn.word(1));
2223 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2224 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2226 switch (extInstIndex)
2228 case GLSLstd450FAbs:
2230 auto src = GenericValue(this, routine, insn.word(5));
2231 for (auto i = 0u; i < type.sizeInComponents; i++)
2233 dst.move(i, Abs(src.Float(i)));
2237 case GLSLstd450SAbs:
2239 auto src = GenericValue(this, routine, insn.word(5));
2240 for (auto i = 0u; i < type.sizeInComponents; i++)
2242 dst.move(i, Abs(src.Int(i)));
2246 case GLSLstd450Cross:
2248 auto lhs = GenericValue(this, routine, insn.word(5));
2249 auto rhs = GenericValue(this, routine, insn.word(6));
2250 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2251 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2252 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2255 case GLSLstd450Floor:
2257 auto src = GenericValue(this, routine, insn.word(5));
2258 for (auto i = 0u; i < type.sizeInComponents; i++)
2260 dst.move(i, Floor(src.Float(i)));
2264 case GLSLstd450Trunc:
2266 auto src = GenericValue(this, routine, insn.word(5));
2267 for (auto i = 0u; i < type.sizeInComponents; i++)
2269 dst.move(i, Trunc(src.Float(i)));
2273 case GLSLstd450Ceil:
2275 auto src = GenericValue(this, routine, insn.word(5));
2276 for (auto i = 0u; i < type.sizeInComponents; i++)
2278 dst.move(i, Ceil(src.Float(i)));
2282 case GLSLstd450Fract:
2284 auto src = GenericValue(this, routine, insn.word(5));
2285 for (auto i = 0u; i < type.sizeInComponents; i++)
2287 dst.move(i, Frac(src.Float(i)));
2291 case GLSLstd450Round:
2293 auto src = GenericValue(this, routine, insn.word(5));
2294 for (auto i = 0u; i < type.sizeInComponents; i++)
2296 dst.move(i, Round(src.Float(i)));
2300 case GLSLstd450RoundEven:
2302 auto src = GenericValue(this, routine, insn.word(5));
2303 for (auto i = 0u; i < type.sizeInComponents; i++)
2305 auto x = Round(src.Float(i));
2306 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2307 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2308 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2312 case GLSLstd450FMin:
2314 auto lhs = GenericValue(this, routine, insn.word(5));
2315 auto rhs = GenericValue(this, routine, insn.word(6));
2316 for (auto i = 0u; i < type.sizeInComponents; i++)
2318 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2322 case GLSLstd450FMax:
2324 auto lhs = GenericValue(this, routine, insn.word(5));
2325 auto rhs = GenericValue(this, routine, insn.word(6));
2326 for (auto i = 0u; i < type.sizeInComponents; i++)
2328 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2332 case GLSLstd450SMin:
2334 auto lhs = GenericValue(this, routine, insn.word(5));
2335 auto rhs = GenericValue(this, routine, insn.word(6));
2336 for (auto i = 0u; i < type.sizeInComponents; i++)
2338 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2342 case GLSLstd450SMax:
2344 auto lhs = GenericValue(this, routine, insn.word(5));
2345 auto rhs = GenericValue(this, routine, insn.word(6));
2346 for (auto i = 0u; i < type.sizeInComponents; i++)
2348 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2352 case GLSLstd450UMin:
2354 auto lhs = GenericValue(this, routine, insn.word(5));
2355 auto rhs = GenericValue(this, routine, insn.word(6));
2356 for (auto i = 0u; i < type.sizeInComponents; i++)
2358 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2362 case GLSLstd450UMax:
2364 auto lhs = GenericValue(this, routine, insn.word(5));
2365 auto rhs = GenericValue(this, routine, insn.word(6));
2366 for (auto i = 0u; i < type.sizeInComponents; i++)
2368 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2372 case GLSLstd450Step:
2374 auto edge = GenericValue(this, routine, insn.word(5));
2375 auto x = GenericValue(this, routine, insn.word(6));
2376 for (auto i = 0u; i < type.sizeInComponents; i++)
2378 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2382 case GLSLstd450SmoothStep:
2384 auto edge0 = GenericValue(this, routine, insn.word(5));
2385 auto edge1 = GenericValue(this, routine, insn.word(6));
2386 auto x = GenericValue(this, routine, insn.word(7));
2387 for (auto i = 0u; i < type.sizeInComponents; i++)
2389 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2390 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2391 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2395 case GLSLstd450FMix:
2397 auto x = GenericValue(this, routine, insn.word(5));
2398 auto y = GenericValue(this, routine, insn.word(6));
2399 auto a = GenericValue(this, routine, insn.word(7));
2400 for (auto i = 0u; i < type.sizeInComponents; i++)
2402 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2406 case GLSLstd450FClamp:
2408 auto x = GenericValue(this, routine, insn.word(5));
2409 auto minVal = GenericValue(this, routine, insn.word(6));
2410 auto maxVal = GenericValue(this, routine, insn.word(7));
2411 for (auto i = 0u; i < type.sizeInComponents; i++)
2413 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2417 case GLSLstd450SClamp:
2419 auto x = GenericValue(this, routine, insn.word(5));
2420 auto minVal = GenericValue(this, routine, insn.word(6));
2421 auto maxVal = GenericValue(this, routine, insn.word(7));
2422 for (auto i = 0u; i < type.sizeInComponents; i++)
2424 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2428 case GLSLstd450UClamp:
2430 auto x = GenericValue(this, routine, insn.word(5));
2431 auto minVal = GenericValue(this, routine, insn.word(6));
2432 auto maxVal = GenericValue(this, routine, insn.word(7));
2433 for (auto i = 0u; i < type.sizeInComponents; i++)
2435 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2439 case GLSLstd450FSign:
2441 auto src = GenericValue(this, routine, insn.word(5));
2442 for (auto i = 0u; i < type.sizeInComponents; i++)
2444 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2445 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2446 dst.move(i, neg | pos);
2450 case GLSLstd450SSign:
2452 auto src = GenericValue(this, routine, insn.word(5));
2453 for (auto i = 0u; i < type.sizeInComponents; i++)
2455 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2456 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2457 dst.move(i, neg | pos);
2461 case GLSLstd450Reflect:
2463 auto I = GenericValue(this, routine, insn.word(5));
2464 auto N = GenericValue(this, routine, insn.word(6));
2466 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2468 for (auto i = 0u; i < type.sizeInComponents; i++)
2470 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2474 case GLSLstd450Refract:
2476 auto I = GenericValue(this, routine, insn.word(5));
2477 auto N = GenericValue(this, routine, insn.word(6));
2478 auto eta = GenericValue(this, routine, insn.word(7));
2480 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2481 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2482 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2483 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2485 for (auto i = 0u; i < type.sizeInComponents; i++)
2487 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2491 case GLSLstd450FaceForward:
2493 auto N = GenericValue(this, routine, insn.word(5));
2494 auto I = GenericValue(this, routine, insn.word(6));
2495 auto Nref = GenericValue(this, routine, insn.word(7));
2497 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2498 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2500 for (auto i = 0u; i < type.sizeInComponents; i++)
2502 auto n = N.Float(i);
2503 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2507 case GLSLstd450Length:
2509 auto x = GenericValue(this, routine, insn.word(5));
2510 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2512 dst.move(0, Sqrt(d));
2515 case GLSLstd450Normalize:
2517 auto x = GenericValue(this, routine, insn.word(5));
2518 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2519 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2521 for (auto i = 0u; i < type.sizeInComponents; i++)
2523 dst.move(i, invLength * x.Float(i));
2527 case GLSLstd450Distance:
2529 auto p0 = GenericValue(this, routine, insn.word(5));
2530 auto p1 = GenericValue(this, routine, insn.word(6));
2531 auto p0Type = getType(getObject(insn.word(5)).type);
2533 // sqrt(dot(p0-p1, p0-p1))
2534 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2536 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2538 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2541 dst.move(0, Sqrt(d));
2545 UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2548 return EmitResult::Continue;
2551 std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2553 switch(memorySemantics)
2555 case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
2556 case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
2557 case spv::MemorySemanticsReleaseMask: return std::memory_order_release;
2558 case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
2559 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2561 UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2562 return std::memory_order_acq_rel;
2566 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2568 SIMD::Float d = x.Float(0) * y.Float(0);
2570 for (auto i = 1u; i < numComponents; i++)
2572 d += x.Float(i) * y.Float(i);
2578 SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2580 auto routine = state->routine;
2581 auto &type = getType(insn.word(1));
2582 ASSERT(type.sizeInComponents == 1);
2583 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2584 auto &srcType = getType(getObject(insn.word(3)).type);
2585 auto src = GenericValue(this, routine, insn.word(3));
2587 SIMD::UInt result = src.UInt(0);
2589 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2591 result |= src.UInt(i);
2594 dst.move(0, result);
2595 return EmitResult::Continue;
2598 SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2600 auto routine = state->routine;
2601 auto &type = getType(insn.word(1));
2602 ASSERT(type.sizeInComponents == 1);
2603 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2604 auto &srcType = getType(getObject(insn.word(3)).type);
2605 auto src = GenericValue(this, routine, insn.word(3));
2607 SIMD::UInt result = src.UInt(0);
2609 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2611 result &= src.UInt(i);
2614 dst.move(0, result);
2615 return EmitResult::Continue;
2618 SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2620 auto target = Block::ID(insn.word(1));
2621 auto edge = Block::Edge{state->currentBlock, target};
2622 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2623 return EmitResult::Terminator;
2626 SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2628 auto block = getBlock(state->currentBlock);
2629 ASSERT(block.branchInstruction == insn);
2631 auto condId = Object::ID(block.branchInstruction.word(1));
2632 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2633 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2635 auto cond = GenericValue(this, state->routine, condId);
2636 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2638 // TODO: Optimize for case where all lanes take same path.
2640 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2641 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2643 return EmitResult::Terminator;
2646 SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2648 auto block = getBlock(state->currentBlock);
2649 ASSERT(block.branchInstruction == insn);
2651 auto selId = Object::ID(block.branchInstruction.word(1));
2653 auto sel = GenericValue(this, state->routine, selId);
2654 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2656 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2658 // TODO: Optimize for case where all lanes take same path.
2660 SIMD::Int defaultLaneMask = state->activeLaneMask();
2662 // Gather up the case label matches and calculate defaultLaneMask.
2663 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2664 caseLabelMatches.reserve(numCases);
2665 for (uint32_t i = 0; i < numCases; i++)
2667 auto label = block.branchInstruction.word(i * 2 + 3);
2668 auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2669 auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2670 state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2671 defaultLaneMask &= ~caseLabelMatch;
2674 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2675 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2677 return EmitResult::Terminator;
2680 SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2682 // TODO: Log something in this case?
2683 state->setActiveLaneMask(SIMD::Int(0));
2684 return EmitResult::Terminator;
2687 SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2689 state->setActiveLaneMask(SIMD::Int(0));
2690 return EmitResult::Terminator;
2693 SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2695 auto routine = state->routine;
2696 auto typeId = Type::ID(insn.word(1));
2697 auto type = getType(typeId);
2698 auto objectId = Object::ID(insn.word(2));
2700 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2703 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2705 auto varId = Object::ID(insn.word(w + 0));
2706 auto blockId = Block::ID(insn.word(w + 1));
2708 auto in = GenericValue(this, routine, varId);
2709 auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2711 for (uint32_t i = 0; i < type.sizeInComponents; i++)
2713 auto inMasked = in.Int(i) & mask;
2714 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2719 return EmitResult::Continue;
2722 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2724 for (auto insn : *this)
2726 switch (insn.opcode())
2728 case spv::OpVariable:
2730 Object::ID resultId = insn.word(2);
2731 auto &object = getObject(resultId);
2732 auto &objectTy = getType(object.type);
2733 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2735 auto &dst = routine->getValue(resultId);
2737 VisitInterface(resultId,
2738 [&](Decorations const &d, AttribType type) {
2739 auto scalarSlot = d.Location << 2 | d.Component;
2740 routine->outputs[scalarSlot] = dst[offset++];
2751 SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
2753 // Default to a Simple, this may change later.
2754 kind = Block::Simple;
2756 // Walk the instructions to find the last two of the block.
2757 InsnIterator insns[2];
2758 for (auto insn : *this)
2760 insns[0] = insns[1];
2764 switch (insns[1].opcode())
2767 branchInstruction = insns[1];
2768 outs.emplace(Block::ID(branchInstruction.word(1)));
2770 switch (insns[0].opcode())
2772 case spv::OpLoopMerge:
2774 mergeInstruction = insns[0];
2775 mergeBlock = Block::ID(mergeInstruction.word(1));
2776 continueTarget = Block::ID(mergeInstruction.word(2));
2780 kind = Block::Simple;
2785 case spv::OpBranchConditional:
2786 branchInstruction = insns[1];
2787 outs.emplace(Block::ID(branchInstruction.word(2)));
2788 outs.emplace(Block::ID(branchInstruction.word(3)));
2790 switch (insns[0].opcode())
2792 case spv::OpSelectionMerge:
2793 kind = StructuredBranchConditional;
2794 mergeInstruction = insns[0];
2795 mergeBlock = Block::ID(mergeInstruction.word(1));
2798 case spv::OpLoopMerge:
2800 mergeInstruction = insns[0];
2801 mergeBlock = Block::ID(mergeInstruction.word(1));
2802 continueTarget = Block::ID(mergeInstruction.word(2));
2806 kind = UnstructuredBranchConditional;
2812 branchInstruction = insns[1];
2813 outs.emplace(Block::ID(branchInstruction.word(2)));
2814 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
2816 outs.emplace(Block::ID(branchInstruction.word(w)));
2819 switch (insns[0].opcode())
2821 case spv::OpSelectionMerge:
2822 kind = StructuredSwitch;
2823 mergeInstruction = insns[0];
2824 mergeBlock = Block::ID(mergeInstruction.word(1));
2828 kind = UnstructuredSwitch;
2838 void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
2840 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
2843 void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
2845 auto edge = Block::Edge{from, to};
2846 auto it = edgeActiveLaneMasks.find(edge);
2847 if (it == edgeActiveLaneMasks.end())
2849 edgeActiveLaneMasks.emplace(edge, mask);
2853 auto combined = it->second | mask;
2854 edgeActiveLaneMasks.erase(edge);
2855 edgeActiveLaneMasks.emplace(edge, combined);
2859 RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
2861 auto edge = Block::Edge{from, to};
2862 auto it = edgeActiveLaneMasks.find(edge);
2863 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
2867 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
2868 pipelineLayout(pipelineLayout)