1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
26 volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
28 SpirvShader::SpirvShader(InsnStore const &insns)
29 : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
30 outputs{MAX_INTERFACE_COMPONENTS},
31 serialID{serialCounter++}, modes{}
33 ASSERT(insns.size() > 0);
35 // Simplifying assumptions (to be satisfied by earlier transformations)
36 // - There is exactly one entrypoint in the module, and it's the one we want
37 // - The only input/output OpVariables present are those used by the entrypoint
39 Block::ID currentBlock;
40 InsnIterator blockStart;
42 for (auto insn : *this)
44 switch (insn.opcode())
46 case spv::OpExecutionMode:
47 ProcessExecutionMode(insn);
52 TypeOrObjectID targetId = insn.word(1);
53 auto decoration = static_cast<spv::Decoration>(insn.word(2));
54 decorations[targetId].Apply(
56 insn.wordCount() > 3 ? insn.word(3) : 0);
58 if (decoration == spv::DecorationCentroid)
59 modes.NeedsCentroid = true;
63 case spv::OpMemberDecorate:
65 Type::ID targetId = insn.word(1);
66 auto memberIndex = insn.word(2);
67 auto &d = memberDecorations[targetId];
68 if (memberIndex >= d.size())
69 d.resize(memberIndex + 1); // on demand; exact size would require another pass...
70 auto decoration = static_cast<spv::Decoration>(insn.word(3));
73 insn.wordCount() > 4 ? insn.word(4) : 0);
75 if (decoration == spv::DecorationCentroid)
76 modes.NeedsCentroid = true;
80 case spv::OpDecorationGroup:
81 // Nothing to do here. We don't need to record the definition of the group; we'll just have
82 // the bundle of decorations float around. If we were to ever walk the decorations directly,
83 // we might think about introducing this as a real Object.
86 case spv::OpGroupDecorate:
88 auto const &srcDecorations = decorations[insn.word(1)];
89 for (auto i = 2u; i < insn.wordCount(); i++)
91 // remaining operands are targets to apply the group to.
92 decorations[insn.word(i)].Apply(srcDecorations);
97 case spv::OpGroupMemberDecorate:
99 auto const &srcDecorations = decorations[insn.word(1)];
100 for (auto i = 2u; i < insn.wordCount(); i += 2)
102 // remaining operands are pairs of <id>, literal for members to apply to.
103 auto &d = memberDecorations[insn.word(i)];
104 auto memberIndex = insn.word(i + 1);
105 if (memberIndex >= d.size())
106 d.resize(memberIndex + 1); // on demand resize, see above...
107 d[memberIndex].Apply(srcDecorations);
114 ASSERT(currentBlock.value() == 0);
115 currentBlock = Block::ID(insn.word(1));
120 // Branch Instructions (subset of Termination Instructions):
122 case spv::OpBranchConditional:
127 // Termination instruction:
129 case spv::OpUnreachable:
131 ASSERT(currentBlock.value() != 0);
132 auto blockEnd = insn; blockEnd++;
133 blocks[currentBlock] = Block(blockStart, blockEnd);
134 currentBlock = Block::ID(0);
136 if (insn.opcode() == spv::OpKill)
138 modes.ContainsKill = true;
143 case spv::OpTypeVoid:
144 case spv::OpTypeBool:
146 case spv::OpTypeFloat:
147 case spv::OpTypeVector:
148 case spv::OpTypeMatrix:
149 case spv::OpTypeImage:
150 case spv::OpTypeSampler:
151 case spv::OpTypeSampledImage:
152 case spv::OpTypeArray:
153 case spv::OpTypeRuntimeArray:
154 case spv::OpTypeStruct:
155 case spv::OpTypePointer:
156 case spv::OpTypeFunction:
160 case spv::OpVariable:
162 Type::ID typeId = insn.word(1);
163 Object::ID resultId = insn.word(2);
164 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
165 if (insn.wordCount() > 4)
166 UNIMPLEMENTED("Variable initializers not yet supported");
168 auto &object = defs[resultId];
169 object.kind = Object::Kind::Variable;
170 object.definition = insn;
171 object.type = typeId;
172 object.pointerBase = insn.word(2); // base is itself
174 ASSERT(getType(typeId).storageClass == storageClass);
176 switch (storageClass)
178 case spv::StorageClassInput:
179 case spv::StorageClassOutput:
180 ProcessInterfaceVariable(object);
182 case spv::StorageClassUniform:
183 case spv::StorageClassStorageBuffer:
184 case spv::StorageClassPushConstant:
185 object.kind = Object::Kind::PhysicalPointer;
188 case spv::StorageClassPrivate:
189 case spv::StorageClassFunction:
190 break; // Correctly handled.
192 case spv::StorageClassUniformConstant:
193 case spv::StorageClassWorkgroup:
194 case spv::StorageClassCrossWorkgroup:
195 case spv::StorageClassGeneric:
196 case spv::StorageClassAtomicCounter:
197 case spv::StorageClassImage:
198 UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
202 UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
208 case spv::OpConstant:
209 CreateConstant(insn).constantValue[0] = insn.word(3);
211 case spv::OpConstantFalse:
212 CreateConstant(insn).constantValue[0] = 0; // represent boolean false as zero
214 case spv::OpConstantTrue:
215 CreateConstant(insn).constantValue[0] = ~0u; // represent boolean true as all bits set
217 case spv::OpConstantNull:
220 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
221 // OpConstantNull forms a constant of arbitrary type, all zeros.
222 auto &object = CreateConstant(insn);
223 auto &objectTy = getType(object.type);
224 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
226 object.constantValue[i] = 0;
230 case spv::OpConstantComposite:
232 auto &object = CreateConstant(insn);
234 for (auto i = 0u; i < insn.wordCount() - 3; i++)
236 auto &constituent = getObject(insn.word(i + 3));
237 auto &constituentTy = getType(constituent.type);
238 for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
239 object.constantValue[offset++] = constituent.constantValue[j];
242 auto objectId = Object::ID(insn.word(2));
243 auto decorationsIt = decorations.find(objectId);
244 if (decorationsIt != decorations.end() &&
245 decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
247 // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
248 // Decorating an object with the WorkgroupSize built-in
249 // decoration will make that object contain the dimensions
250 // of a local workgroup. If an object is decorated with the
251 // WorkgroupSize decoration, this must take precedence over
252 // any execution mode set for LocalSize.
253 // The object decorated with WorkgroupSize must be declared
254 // as a three-component vector of 32-bit integers.
255 ASSERT(getType(object.type).sizeInComponents == 3);
256 modes.WorkgroupSizeX = object.constantValue[0];
257 modes.WorkgroupSizeY = object.constantValue[1];
258 modes.WorkgroupSizeZ = object.constantValue[2];
263 case spv::OpCapability:
264 break; // Various capabilities will be declared, but none affect our code generation at this point.
265 case spv::OpMemoryModel:
266 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
268 case spv::OpEntryPoint:
270 case spv::OpFunction:
271 ASSERT(mainBlockId.value() == 0); // Multiple functions found
272 // Scan forward to find the function's label.
273 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
277 case spv::OpFunction:
278 case spv::OpFunctionParameter:
281 mainBlockId = Block::ID(it.word(1));
284 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
287 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
289 case spv::OpFunctionEnd:
290 // Due to preprocessing, the entrypoint and its function provide no value.
292 case spv::OpExtInstImport:
293 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
294 // Valid shaders will not attempt to import any other instruction sets.
295 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
297 UNIMPLEMENTED("Only GLSL extended instruction set is supported");
301 case spv::OpMemberName:
303 case spv::OpSourceContinued:
304 case spv::OpSourceExtension:
307 case spv::OpModuleProcessed:
309 // No semantic impact
312 case spv::OpFunctionParameter:
313 case spv::OpFunctionCall:
314 case spv::OpSpecConstant:
315 case spv::OpSpecConstantComposite:
316 case spv::OpSpecConstantFalse:
317 case spv::OpSpecConstantOp:
318 case spv::OpSpecConstantTrue:
319 // These should have all been removed by preprocessing passes. If we see them here,
320 // our assumptions are wrong and we will probably generate wrong code.
321 UNIMPLEMENTED("These instructions should have already been lowered.");
324 case spv::OpFConvert:
325 case spv::OpSConvert:
326 case spv::OpUConvert:
327 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
331 case spv::OpAccessChain:
332 case spv::OpInBoundsAccessChain:
333 case spv::OpCompositeConstruct:
334 case spv::OpCompositeInsert:
335 case spv::OpCompositeExtract:
336 case spv::OpVectorShuffle:
337 case spv::OpVectorTimesScalar:
338 case spv::OpVectorExtractDynamic:
339 case spv::OpVectorInsertDynamic:
340 case spv::OpNot: // Unary ops
343 case spv::OpLogicalNot:
344 case spv::OpIAdd: // Binary ops
355 case spv::OpFOrdEqual:
356 case spv::OpFUnordEqual:
357 case spv::OpFOrdNotEqual:
358 case spv::OpFUnordNotEqual:
359 case spv::OpFOrdLessThan:
360 case spv::OpFUnordLessThan:
361 case spv::OpFOrdGreaterThan:
362 case spv::OpFUnordGreaterThan:
363 case spv::OpFOrdLessThanEqual:
364 case spv::OpFUnordLessThanEqual:
365 case spv::OpFOrdGreaterThanEqual:
366 case spv::OpFUnordGreaterThanEqual:
371 case spv::OpINotEqual:
372 case spv::OpUGreaterThan:
373 case spv::OpSGreaterThan:
374 case spv::OpUGreaterThanEqual:
375 case spv::OpSGreaterThanEqual:
376 case spv::OpULessThan:
377 case spv::OpSLessThan:
378 case spv::OpULessThanEqual:
379 case spv::OpSLessThanEqual:
380 case spv::OpShiftRightLogical:
381 case spv::OpShiftRightArithmetic:
382 case spv::OpShiftLeftLogical:
383 case spv::OpBitwiseOr:
384 case spv::OpBitwiseXor:
385 case spv::OpBitwiseAnd:
386 case spv::OpLogicalOr:
387 case spv::OpLogicalAnd:
388 case spv::OpLogicalEqual:
389 case spv::OpLogicalNotEqual:
390 case spv::OpUMulExtended:
391 case spv::OpSMulExtended:
393 case spv::OpConvertFToU:
394 case spv::OpConvertFToS:
395 case spv::OpConvertSToF:
396 case spv::OpConvertUToF:
405 case spv::OpDPdxCoarse:
407 case spv::OpDPdyCoarse:
409 case spv::OpFwidthCoarse:
410 case spv::OpDPdxFine:
411 case spv::OpDPdyFine:
412 case spv::OpFwidthFine:
413 // Instructions that yield an intermediate value
415 Type::ID typeId = insn.word(1);
416 Object::ID resultId = insn.word(2);
417 auto &object = defs[resultId];
418 object.type = typeId;
419 object.kind = Object::Kind::Value;
420 object.definition = insn;
422 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
424 // interior ptr has two parts:
425 // - logical base ptr, common across all lanes and known at compile time
427 Object::ID baseId = insn.word(3);
428 object.pointerBase = getObject(baseId).pointerBase;
434 // Don't need to do anything during analysis pass
438 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
443 void SpirvShader::DeclareType(InsnIterator insn)
445 Type::ID resultId = insn.word(1);
447 auto &type = types[resultId];
448 type.definition = insn;
449 type.sizeInComponents = ComputeTypeSize(insn);
451 // A structure is a builtin block if it has a builtin
452 // member. All members of such a structure are builtins.
453 switch (insn.opcode())
455 case spv::OpTypeStruct:
457 auto d = memberDecorations.find(resultId);
458 if (d != memberDecorations.end())
460 for (auto &m : d->second)
464 type.isBuiltInBlock = true;
471 case spv::OpTypePointer:
473 Type::ID elementTypeId = insn.word(3);
474 type.element = elementTypeId;
475 type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
476 type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
479 case spv::OpTypeVector:
480 case spv::OpTypeMatrix:
481 case spv::OpTypeArray:
482 case spv::OpTypeRuntimeArray:
484 Type::ID elementTypeId = insn.word(2);
485 type.element = elementTypeId;
493 SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
495 Type::ID typeId = insn.word(1);
496 Object::ID resultId = insn.word(2);
497 auto &object = defs[resultId];
498 auto &objectTy = getType(typeId);
499 object.type = typeId;
500 object.kind = Object::Kind::Constant;
501 object.definition = insn;
502 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
506 void SpirvShader::ProcessInterfaceVariable(Object &object)
508 auto &objectTy = getType(object.type);
509 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
511 ASSERT(objectTy.opcode() == spv::OpTypePointer);
512 auto pointeeTy = getType(objectTy.element);
514 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
515 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
517 ASSERT(object.opcode() == spv::OpVariable);
518 Object::ID resultId = object.definition.word(2);
520 if (objectTy.isBuiltInBlock)
522 // walk the builtin block, registering each of its members separately.
523 auto m = memberDecorations.find(objectTy.element);
524 ASSERT(m != memberDecorations.end()); // otherwise we wouldn't have marked the type chain
525 auto &structType = pointeeTy.definition;
528 for (auto &member : m->second)
530 auto &memberType = getType(structType.word(word));
532 if (member.HasBuiltIn)
534 builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
537 offset += memberType.sizeInComponents;
543 auto d = decorations.find(resultId);
544 if (d != decorations.end() && d->second.HasBuiltIn)
546 builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
550 object.kind = Object::Kind::InterfaceVariable;
551 VisitInterface(resultId,
552 [&userDefinedInterface](Decorations const &d, AttribType type) {
553 // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
554 auto scalarSlot = (d.Location << 2) | d.Component;
555 ASSERT(scalarSlot >= 0 &&
556 scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
558 auto &slot = userDefinedInterface[scalarSlot];
561 slot.NoPerspective = d.NoPerspective;
562 slot.Centroid = d.Centroid;
567 void SpirvShader::ProcessExecutionMode(InsnIterator insn)
569 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
572 case spv::ExecutionModeEarlyFragmentTests:
573 modes.EarlyFragmentTests = true;
575 case spv::ExecutionModeDepthReplacing:
576 modes.DepthReplacing = true;
578 case spv::ExecutionModeDepthGreater:
579 modes.DepthGreater = true;
581 case spv::ExecutionModeDepthLess:
582 modes.DepthLess = true;
584 case spv::ExecutionModeDepthUnchanged:
585 modes.DepthUnchanged = true;
587 case spv::ExecutionModeLocalSize:
588 modes.WorkgroupSizeX = insn.word(3);
589 modes.WorkgroupSizeY = insn.word(4);
590 modes.WorkgroupSizeZ = insn.word(5);
592 case spv::ExecutionModeOriginUpperLeft:
593 // This is always the case for a Vulkan shader. Do nothing.
596 UNIMPLEMENTED("No other execution modes are permitted");
600 uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
602 // Types are always built from the bottom up (with the exception of forward ptrs, which
603 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
604 // already been described (and so their sizes determined)
605 switch (insn.opcode())
607 case spv::OpTypeVoid:
608 case spv::OpTypeSampler:
609 case spv::OpTypeImage:
610 case spv::OpTypeSampledImage:
611 case spv::OpTypeFunction:
612 case spv::OpTypeRuntimeArray:
613 // Objects that don't consume any space.
614 // Descriptor-backed objects currently only need exist at compile-time.
615 // Runtime arrays don't appear in places where their size would be interesting
618 case spv::OpTypeBool:
619 case spv::OpTypeFloat:
621 // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
622 // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
625 case spv::OpTypeVector:
626 case spv::OpTypeMatrix:
627 // Vectors and matrices both consume element count * element size.
628 return getType(insn.word(2)).sizeInComponents * insn.word(3);
630 case spv::OpTypeArray:
632 // Element count * element size. Array sizes come from constant ids.
633 auto arraySize = GetConstantInt(insn.word(3));
634 return getType(insn.word(2)).sizeInComponents * arraySize;
637 case spv::OpTypeStruct:
640 for (uint32_t i = 2u; i < insn.wordCount(); i++)
642 size += getType(insn.word(i)).sizeInComponents;
647 case spv::OpTypePointer:
648 // Runtime representation of a pointer is a per-lane index.
649 // Note: clients are expected to look through the pointer if they want the pointee size instead.
653 // Some other random insn.
654 UNIMPLEMENTED("Only types are supported");
659 bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
661 switch (storageClass)
663 case spv::StorageClassUniform:
664 case spv::StorageClassStorageBuffer:
665 case spv::StorageClassPushConstant:
673 int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
675 // Recursively walks variable definition and its type tree, taking into account
676 // any explicit Location or Component decorations encountered; where explicit
677 // Locations or Components are not specified, assigns them sequentially.
678 // Collected decorations are carried down toward the leaves and across
679 // siblings; Effect of decorations intentionally does not flow back up the tree.
681 // F is a functor to be called with the effective decoration set for every component.
683 // Returns the next available location, and calls f().
685 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
687 ApplyDecorationsForId(&d, id);
689 auto const &obj = getType(id);
692 case spv::OpTypePointer:
693 return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
694 case spv::OpTypeMatrix:
695 for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
697 // consumes same components of N consecutive locations
698 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
701 case spv::OpTypeVector:
702 for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
704 // consumes N consecutive components in the same location
705 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
707 return d.Location + 1;
708 case spv::OpTypeFloat:
709 f(d, ATTRIBTYPE_FLOAT);
710 return d.Location + 1;
712 f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
713 return d.Location + 1;
714 case spv::OpTypeBool:
715 f(d, ATTRIBTYPE_UINT);
716 return d.Location + 1;
717 case spv::OpTypeStruct:
719 // iterate over members, which may themselves have Location/Component decorations
720 for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
722 ApplyDecorationsForIdMember(&d, id, i);
723 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
724 d.Component = 0; // Implicit locations always have component=0
728 case spv::OpTypeArray:
730 auto arraySize = GetConstantInt(obj.definition.word(3));
731 for (auto i = 0u; i < arraySize; i++)
733 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
738 // Intentionally partial; most opcodes do not participate in type hierarchies
744 void SpirvShader::VisitInterface(Object::ID id, F f) const
746 // Walk a variable definition and call f for each component in it.
748 ApplyDecorationsForId(&d, id);
750 auto def = getObject(id).definition;
751 ASSERT(def.opcode() == spv::OpVariable);
752 VisitInterfaceInner<F>(def.word(1), d, f);
755 SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
757 // Produce a offset into external memory in sizeof(float) units
759 int constantOffset = 0;
760 SIMD::Int dynamicOffset = SIMD::Int(0);
761 auto &baseObject = getObject(id);
762 Type::ID typeId = getType(baseObject.type).element;
764 ApplyDecorationsForId(&d, baseObject.type);
766 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
767 // Start with its offset and build from there.
768 if (baseObject.kind == Object::Kind::Value)
770 dynamicOffset += routine->getIntermediate(id).Int(0);
773 for (auto i = 0u; i < numIndexes; i++)
775 auto & type = getType(typeId);
776 switch (type.definition.opcode())
778 case spv::OpTypeStruct:
780 int memberIndex = GetConstantInt(indexIds[i]);
781 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
783 constantOffset += d.Offset / sizeof(float);
784 typeId = type.definition.word(2u + memberIndex);
787 case spv::OpTypeArray:
788 case spv::OpTypeRuntimeArray:
790 // TODO: b/127950082: Check bounds.
791 ApplyDecorationsForId(&d, typeId);
792 ASSERT(d.HasArrayStride);
793 auto & obj = getObject(indexIds[i]);
794 if (obj.kind == Object::Kind::Constant)
795 constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
797 dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
798 typeId = type.element;
801 case spv::OpTypeMatrix:
803 // TODO: b/127950082: Check bounds.
804 ApplyDecorationsForId(&d, typeId);
805 ASSERT(d.HasMatrixStride);
806 auto & obj = getObject(indexIds[i]);
807 if (obj.kind == Object::Kind::Constant)
808 constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
810 dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
811 typeId = type.element;
814 case spv::OpTypeVector:
816 auto & obj = getObject(indexIds[i]);
817 if (obj.kind == Object::Kind::Constant)
818 constantOffset += GetConstantInt(indexIds[i]);
820 dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
821 typeId = type.element;
825 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
829 return dynamicOffset + SIMD::Int(constantOffset);
832 SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
834 // TODO: avoid doing per-lane work in some cases if we can?
835 // Produce a *component* offset into location-oriented memory
837 int constantOffset = 0;
838 SIMD::Int dynamicOffset = SIMD::Int(0);
839 auto &baseObject = getObject(id);
840 Type::ID typeId = getType(baseObject.type).element;
842 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
843 // Start with its offset and build from there.
844 if (baseObject.kind == Object::Kind::Value)
846 dynamicOffset += routine->getIntermediate(id).Int(0);
849 for (auto i = 0u; i < numIndexes; i++)
851 auto & type = getType(typeId);
852 switch(type.opcode())
854 case spv::OpTypeStruct:
856 int memberIndex = GetConstantInt(indexIds[i]);
857 int offsetIntoStruct = 0;
858 for (auto j = 0; j < memberIndex; j++) {
859 auto memberType = type.definition.word(2u + j);
860 offsetIntoStruct += getType(memberType).sizeInComponents;
862 constantOffset += offsetIntoStruct;
863 typeId = type.definition.word(2u + memberIndex);
867 case spv::OpTypeVector:
868 case spv::OpTypeMatrix:
869 case spv::OpTypeArray:
870 case spv::OpTypeRuntimeArray:
872 // TODO: b/127950082: Check bounds.
873 auto stride = getType(type.element).sizeInComponents;
874 auto & obj = getObject(indexIds[i]);
875 if (obj.kind == Object::Kind::Constant)
876 constantOffset += stride * GetConstantInt(indexIds[i]);
878 dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
879 typeId = type.element;
884 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
888 return dynamicOffset + SIMD::Int(constantOffset);
891 uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
893 uint32_t constantOffset = 0;
895 for (auto i = 0u; i < numIndexes; i++)
897 auto & type = getType(typeId);
898 switch(type.opcode())
900 case spv::OpTypeStruct:
902 int memberIndex = indexes[i];
903 int offsetIntoStruct = 0;
904 for (auto j = 0; j < memberIndex; j++) {
905 auto memberType = type.definition.word(2u + j);
906 offsetIntoStruct += getType(memberType).sizeInComponents;
908 constantOffset += offsetIntoStruct;
909 typeId = type.definition.word(2u + memberIndex);
913 case spv::OpTypeVector:
914 case spv::OpTypeMatrix:
915 case spv::OpTypeArray:
917 auto elementType = type.definition.word(2);
918 auto stride = getType(elementType).sizeInComponents;
919 constantOffset += stride * indexes[i];
920 typeId = elementType;
925 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
929 return constantOffset;
932 void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
936 case spv::DecorationLocation:
938 Location = static_cast<int32_t>(arg);
940 case spv::DecorationComponent:
944 case spv::DecorationDescriptorSet:
945 HasDescriptorSet = true;
948 case spv::DecorationBinding:
952 case spv::DecorationBuiltIn:
954 BuiltIn = static_cast<spv::BuiltIn>(arg);
956 case spv::DecorationFlat:
959 case spv::DecorationNoPerspective:
960 NoPerspective = true;
962 case spv::DecorationCentroid:
965 case spv::DecorationBlock:
968 case spv::DecorationBufferBlock:
971 case spv::DecorationOffset:
973 Offset = static_cast<int32_t>(arg);
975 case spv::DecorationArrayStride:
976 HasArrayStride = true;
977 ArrayStride = static_cast<int32_t>(arg);
979 case spv::DecorationMatrixStride:
980 HasMatrixStride = true;
981 MatrixStride = static_cast<int32_t>(arg);
984 // Intentionally partial, there are many decorations we just don't care about.
989 void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
991 // Apply a decoration group to this set of decorations
995 BuiltIn = src.BuiltIn;
1001 Location = src.Location;
1004 if (src.HasComponent)
1006 HasComponent = true;
1007 Component = src.Component;
1010 if (src.HasDescriptorSet)
1012 HasDescriptorSet = true;
1013 DescriptorSet = src.DescriptorSet;
1019 Binding = src.Binding;
1025 Offset = src.Offset;
1028 if (src.HasArrayStride)
1030 HasArrayStride = true;
1031 ArrayStride = src.ArrayStride;
1034 if (src.HasMatrixStride)
1036 HasMatrixStride = true;
1037 MatrixStride = src.MatrixStride;
1041 NoPerspective |= src.NoPerspective;
1042 Centroid |= src.Centroid;
1044 BufferBlock |= src.BufferBlock;
1047 void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1049 auto it = decorations.find(id);
1050 if (it != decorations.end())
1051 d->Apply(it->second);
1054 void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1056 auto it = memberDecorations.find(id);
1057 if (it != memberDecorations.end() && member < it->second.size())
1059 d->Apply(it->second[member]);
1063 uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1065 // Slightly hackish access to constants very early in translation.
1066 // General consumption of constants by other instructions should
1067 // probably be just lowered to Reactor.
1069 // TODO: not encountered yet since we only use this for array sizes etc,
1070 // but is possible to construct integer constant 0 via OpConstantNull.
1071 auto insn = getObject(id).definition;
1072 ASSERT(insn.opcode() == spv::OpConstant);
1073 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1074 return insn.word(3);
1079 void SpirvShader::emitProlog(SpirvRoutine *routine) const
1081 for (auto insn : *this)
1083 switch (insn.opcode())
1085 case spv::OpVariable:
1087 Type::ID resultPointerTypeId = insn.word(1);
1088 auto resultPointerType = getType(resultPointerTypeId);
1089 auto pointeeType = getType(resultPointerType.element);
1091 if(pointeeType.sizeInComponents > 0) // TODO: what to do about zero-slot objects?
1093 Object::ID resultId = insn.word(2);
1094 routine->createLvalue(resultId, pointeeType.sizeInComponents);
1099 // Nothing else produces interface variables, so can all be safely ignored.
1105 void SpirvShader::emit(SpirvRoutine *routine) const
1107 // Emit everything up to the first label
1108 // TODO: Separate out dispatch of block from non-block instructions?
1109 for (auto insn : *this)
1111 if (insn.opcode() == spv::OpLabel)
1115 EmitInstruction(routine, insn);
1118 // Emit the main function block
1119 EmitBlock(routine, getBlock(mainBlockId));
1122 void SpirvShader::EmitBlock(SpirvRoutine *routine, Block const &block) const
1124 for (auto insn : block)
1126 EmitInstruction(routine, insn);
1130 void SpirvShader::EmitInstruction(SpirvRoutine *routine, InsnIterator insn) const
1132 switch (insn.opcode())
1134 case spv::OpTypeVoid:
1135 case spv::OpTypeInt:
1136 case spv::OpTypeFloat:
1137 case spv::OpTypeBool:
1138 case spv::OpTypeVector:
1139 case spv::OpTypeArray:
1140 case spv::OpTypeRuntimeArray:
1141 case spv::OpTypeMatrix:
1142 case spv::OpTypeStruct:
1143 case spv::OpTypePointer:
1144 case spv::OpTypeFunction:
1145 case spv::OpExecutionMode:
1146 case spv::OpMemoryModel:
1147 case spv::OpFunction:
1148 case spv::OpFunctionEnd:
1149 case spv::OpConstant:
1150 case spv::OpConstantNull:
1151 case spv::OpConstantTrue:
1152 case spv::OpConstantFalse:
1153 case spv::OpConstantComposite:
1155 case spv::OpExtension:
1156 case spv::OpCapability:
1157 case spv::OpEntryPoint:
1158 case spv::OpExtInstImport:
1159 case spv::OpDecorate:
1160 case spv::OpMemberDecorate:
1161 case spv::OpGroupDecorate:
1162 case spv::OpGroupMemberDecorate:
1163 case spv::OpDecorationGroup:
1165 case spv::OpMemberName:
1167 case spv::OpSourceContinued:
1168 case spv::OpSourceExtension:
1171 case spv::OpModuleProcessed:
1173 // Nothing to do at emit time. These are either fully handled at analysis time,
1174 // or don't require any work at all.
1179 // TODO: when we do control flow, will need to do some work here.
1180 // Until then, there is nothing to do -- we expect there to be an initial OpLabel
1181 // in the entrypoint function, for which we do nothing; and a final OpReturn at the
1182 // end of the entrypoint function, for which we do nothing.
1185 case spv::OpVariable:
1186 EmitVariable(insn, routine);
1190 EmitLoad(insn, routine);
1194 EmitStore(insn, routine);
1197 case spv::OpAccessChain:
1198 case spv::OpInBoundsAccessChain:
1199 EmitAccessChain(insn, routine);
1202 case spv::OpCompositeConstruct:
1203 EmitCompositeConstruct(insn, routine);
1206 case spv::OpCompositeInsert:
1207 EmitCompositeInsert(insn, routine);
1210 case spv::OpCompositeExtract:
1211 EmitCompositeExtract(insn, routine);
1214 case spv::OpVectorShuffle:
1215 EmitVectorShuffle(insn, routine);
1218 case spv::OpVectorExtractDynamic:
1219 EmitVectorExtractDynamic(insn, routine);
1222 case spv::OpVectorInsertDynamic:
1223 EmitVectorInsertDynamic(insn, routine);
1226 case spv::OpVectorTimesScalar:
1227 EmitVectorTimesScalar(insn, routine);
1231 case spv::OpSNegate:
1232 case spv::OpFNegate:
1233 case spv::OpLogicalNot:
1234 case spv::OpConvertFToU:
1235 case spv::OpConvertFToS:
1236 case spv::OpConvertSToF:
1237 case spv::OpConvertUToF:
1238 case spv::OpBitcast:
1242 case spv::OpDPdxCoarse:
1244 case spv::OpDPdyCoarse:
1246 case spv::OpFwidthCoarse:
1247 case spv::OpDPdxFine:
1248 case spv::OpDPdyFine:
1249 case spv::OpFwidthFine:
1250 EmitUnaryOp(insn, routine);
1264 case spv::OpFOrdEqual:
1265 case spv::OpFUnordEqual:
1266 case spv::OpFOrdNotEqual:
1267 case spv::OpFUnordNotEqual:
1268 case spv::OpFOrdLessThan:
1269 case spv::OpFUnordLessThan:
1270 case spv::OpFOrdGreaterThan:
1271 case spv::OpFUnordGreaterThan:
1272 case spv::OpFOrdLessThanEqual:
1273 case spv::OpFUnordLessThanEqual:
1274 case spv::OpFOrdGreaterThanEqual:
1275 case spv::OpFUnordGreaterThanEqual:
1280 case spv::OpINotEqual:
1281 case spv::OpUGreaterThan:
1282 case spv::OpSGreaterThan:
1283 case spv::OpUGreaterThanEqual:
1284 case spv::OpSGreaterThanEqual:
1285 case spv::OpULessThan:
1286 case spv::OpSLessThan:
1287 case spv::OpULessThanEqual:
1288 case spv::OpSLessThanEqual:
1289 case spv::OpShiftRightLogical:
1290 case spv::OpShiftRightArithmetic:
1291 case spv::OpShiftLeftLogical:
1292 case spv::OpBitwiseOr:
1293 case spv::OpBitwiseXor:
1294 case spv::OpBitwiseAnd:
1295 case spv::OpLogicalOr:
1296 case spv::OpLogicalAnd:
1297 case spv::OpLogicalEqual:
1298 case spv::OpLogicalNotEqual:
1299 case spv::OpUMulExtended:
1300 case spv::OpSMulExtended:
1301 EmitBinaryOp(insn, routine);
1305 EmitDot(insn, routine);
1309 EmitSelect(insn, routine);
1312 case spv::OpExtInst:
1313 EmitExtendedInstruction(insn, routine);
1317 EmitAny(insn, routine);
1321 EmitAll(insn, routine);
1325 EmitBranch(insn, routine);
1329 UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1334 void SpirvShader::EmitVariable(InsnIterator insn, SpirvRoutine *routine) const
1336 Object::ID resultId = insn.word(2);
1337 auto &object = getObject(resultId);
1338 auto &objectTy = getType(object.type);
1339 switch (objectTy.storageClass)
1341 case spv::StorageClassInput:
1343 if (object.kind == Object::Kind::InterfaceVariable)
1345 auto &dst = routine->getValue(resultId);
1347 VisitInterface(resultId,
1348 [&](Decorations const &d, AttribType type) {
1349 auto scalarSlot = d.Location << 2 | d.Component;
1350 dst[offset++] = routine->inputs[scalarSlot];
1355 case spv::StorageClassUniform:
1356 case spv::StorageClassStorageBuffer:
1359 ApplyDecorationsForId(&d, resultId);
1360 ASSERT(d.DescriptorSet >= 0);
1361 ASSERT(d.Binding >= 0);
1363 size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1365 Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1366 Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1367 Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1368 Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1369 Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1370 Pointer<Byte> address = data + offset;
1371 routine->physicalPointers[resultId] = address;
1374 case spv::StorageClassPushConstant:
1376 routine->physicalPointers[resultId] = routine->pushConstants;
1384 void SpirvShader::EmitLoad(InsnIterator insn, SpirvRoutine *routine) const
1386 Object::ID objectId = insn.word(2);
1387 Object::ID pointerId = insn.word(3);
1388 auto &object = getObject(objectId);
1389 auto &objectTy = getType(object.type);
1390 auto &pointer = getObject(pointerId);
1391 auto &pointerBase = getObject(pointer.pointerBase);
1392 auto &pointerBaseTy = getType(pointerBase.type);
1394 ASSERT(getType(pointer.type).element == object.type);
1395 ASSERT(Type::ID(insn.word(1)) == object.type);
1397 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1399 UNIMPLEMENTED("StorageClassImage load not yet implemented");
1402 Pointer<Float> ptrBase;
1403 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1405 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1409 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1412 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1413 auto anyInactiveLanes = SignMask(~routine->activeLaneMask) != 0;
1415 auto load = SpirvRoutine::Value(objectTy.sizeInComponents);
1417 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1419 // Divergent offsets or masked lanes.
1420 auto offsets = pointer.kind == Object::Kind::Value ?
1421 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1422 RValue<SIMD::Int>(SIMD::Int(0));
1423 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1425 // i wish i had a Float,Float,Float,Float constructor here..
1426 for (int j = 0; j < SIMD::Width; j++)
1428 If(Extract(routine->activeLaneMask, j) != 0)
1430 Int offset = Int(i) + Extract(offsets, j);
1431 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1432 load[i] = Insert(load[i], ptrBase[offset], j);
1439 // No divergent offsets or masked lanes.
1440 if (interleavedByLane)
1442 // Lane-interleaved data.
1443 Pointer<SIMD::Float> src = ptrBase;
1444 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1451 // Non-interleaved data.
1452 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1454 load[i] = RValue<SIMD::Float>(ptrBase[i]);
1459 auto &dst = routine->createIntermediate(objectId, objectTy.sizeInComponents);
1460 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1462 dst.emplace(i, load[i]);
1466 void SpirvShader::EmitAccessChain(InsnIterator insn, SpirvRoutine *routine) const
1468 Type::ID typeId = insn.word(1);
1469 Object::ID resultId = insn.word(2);
1470 Object::ID baseId = insn.word(3);
1471 uint32_t numIndexes = insn.wordCount() - 4;
1472 const uint32_t *indexes = insn.wordPointer(4);
1473 auto &type = getType(typeId);
1474 ASSERT(type.sizeInComponents == 1);
1475 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1477 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1479 if(type.storageClass == spv::StorageClassPushConstant ||
1480 type.storageClass == spv::StorageClassUniform ||
1481 type.storageClass == spv::StorageClassStorageBuffer)
1483 dst.emplace(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1487 dst.emplace(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1491 void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const
1493 Object::ID pointerId = insn.word(1);
1494 Object::ID objectId = insn.word(2);
1495 auto &object = getObject(objectId);
1496 auto &pointer = getObject(pointerId);
1497 auto &pointerTy = getType(pointer.type);
1498 auto &elementTy = getType(pointerTy.element);
1499 auto &pointerBase = getObject(pointer.pointerBase);
1500 auto &pointerBaseTy = getType(pointerBase.type);
1502 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1504 UNIMPLEMENTED("StorageClassImage store not yet implemented");
1507 Pointer<Float> ptrBase;
1508 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1510 ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1514 ptrBase = &routine->getValue(pointer.pointerBase)[0];
1517 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1518 auto anyInactiveLanes = SignMask(~routine->activeLaneMask) != 0;
1520 if (object.kind == Object::Kind::Constant)
1522 // Constant source data.
1523 auto src = reinterpret_cast<float *>(object.constantValue.get());
1524 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1526 // Divergent offsets or masked lanes.
1527 auto offsets = pointer.kind == Object::Kind::Value ?
1528 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1529 RValue<SIMD::Int>(SIMD::Int(0));
1530 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1532 for (int j = 0; j < SIMD::Width; j++)
1534 If(Extract(routine->activeLaneMask, j) != 0)
1536 Int offset = Int(i) + Extract(offsets, j);
1537 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1538 ptrBase[offset] = RValue<Float>(src[i]);
1545 // Constant source data.
1546 // No divergent offsets or masked lanes.
1547 Pointer<SIMD::Float> dst = ptrBase;
1548 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1550 dst[i] = RValue<SIMD::Float>(src[i]);
1556 // Intermediate source data.
1557 auto &src = routine->getIntermediate(objectId);
1558 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1560 // Divergent offsets or masked lanes.
1561 auto offsets = pointer.kind == Object::Kind::Value ?
1562 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1563 RValue<SIMD::Int>(SIMD::Int(0));
1564 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1566 for (int j = 0; j < SIMD::Width; j++)
1568 If(Extract(routine->activeLaneMask, j) != 0)
1570 Int offset = Int(i) + Extract(offsets, j);
1571 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1572 ptrBase[offset] = Extract(src.Float(i), j);
1579 // No divergent offsets or masked lanes.
1580 if (interleavedByLane)
1582 // Lane-interleaved data.
1583 Pointer<SIMD::Float> dst = ptrBase;
1584 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1586 dst[i] = src.Float(i);
1591 // Intermediate source data. Non-interleaved data.
1592 Pointer<SIMD::Float> dst = ptrBase;
1593 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1595 dst[i] = SIMD::Float(src.Float(i));
1602 void SpirvShader::EmitCompositeConstruct(InsnIterator insn, SpirvRoutine *routine) const
1604 auto &type = getType(insn.word(1));
1605 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1608 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1610 Object::ID srcObjectId = insn.word(3u + i);
1611 auto & srcObject = getObject(srcObjectId);
1612 auto & srcObjectTy = getType(srcObject.type);
1613 GenericValue srcObjectAccess(this, routine, srcObjectId);
1615 for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1617 dst.emplace(offset++, srcObjectAccess.Float(j));
1622 void SpirvShader::EmitCompositeInsert(InsnIterator insn, SpirvRoutine *routine) const
1624 Type::ID resultTypeId = insn.word(1);
1625 auto &type = getType(resultTypeId);
1626 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1627 auto &newPartObject = getObject(insn.word(3));
1628 auto &newPartObjectTy = getType(newPartObject.type);
1629 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1631 GenericValue srcObjectAccess(this, routine, insn.word(4));
1632 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1634 // old components before
1635 for (auto i = 0u; i < firstNewComponent; i++)
1637 dst.emplace(i, srcObjectAccess.Float(i));
1640 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1642 dst.emplace(firstNewComponent + i, newPartObjectAccess.Float(i));
1644 // old components after
1645 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1647 dst.emplace(i, srcObjectAccess.Float(i));
1651 void SpirvShader::EmitCompositeExtract(InsnIterator insn, SpirvRoutine *routine) const
1653 auto &type = getType(insn.word(1));
1654 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1655 auto &compositeObject = getObject(insn.word(3));
1656 Type::ID compositeTypeId = compositeObject.definition.word(1);
1657 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1659 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1660 for (auto i = 0u; i < type.sizeInComponents; i++)
1662 dst.emplace(i, compositeObjectAccess.Float(firstComponent + i));
1666 void SpirvShader::EmitVectorShuffle(InsnIterator insn, SpirvRoutine *routine) const
1668 auto &type = getType(insn.word(1));
1669 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1671 // Note: number of components in result type, first half type, and second
1672 // half type are all independent.
1673 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1675 GenericValue firstHalfAccess(this, routine, insn.word(3));
1676 GenericValue secondHalfAccess(this, routine, insn.word(4));
1678 for (auto i = 0u; i < type.sizeInComponents; i++)
1680 auto selector = insn.word(5 + i);
1681 if (selector == static_cast<uint32_t>(-1))
1683 // Undefined value. Until we decide to do real undef values, zero is as good
1685 dst.emplace(i, RValue<SIMD::Float>(0.0f));
1687 else if (selector < firstHalfType.sizeInComponents)
1689 dst.emplace(i, firstHalfAccess.Float(selector));
1693 dst.emplace(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
1698 void SpirvShader::EmitVectorExtractDynamic(sw::SpirvShader::InsnIterator insn, sw::SpirvRoutine *routine) const
1700 auto &type = getType(insn.word(1));
1701 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1702 auto &srcType = getType(getObject(insn.word(3)).type);
1704 GenericValue src(this, routine, insn.word(3));
1705 GenericValue index(this, routine, insn.word(4));
1707 SIMD::UInt v = SIMD::UInt(0);
1709 for (auto i = 0u; i < srcType.sizeInComponents; i++)
1711 v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
1717 void SpirvShader::EmitVectorInsertDynamic(sw::SpirvShader::InsnIterator insn, sw::SpirvRoutine *routine) const
1719 auto &type = getType(insn.word(1));
1720 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1722 GenericValue src(this, routine, insn.word(3));
1723 GenericValue component(this, routine, insn.word(4));
1724 GenericValue index(this, routine, insn.word(5));
1726 for (auto i = 0u; i < type.sizeInComponents; i++)
1728 SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
1729 dst.emplace(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
1733 void SpirvShader::EmitVectorTimesScalar(InsnIterator insn, SpirvRoutine *routine) const
1735 auto &type = getType(insn.word(1));
1736 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1737 auto lhs = GenericValue(this, routine, insn.word(3));
1738 auto rhs = GenericValue(this, routine, insn.word(4));
1740 for (auto i = 0u; i < type.sizeInComponents; i++)
1742 dst.emplace(i, lhs.Float(i) * rhs.Float(0));
1746 void SpirvShader::EmitUnaryOp(InsnIterator insn, SpirvRoutine *routine) const
1748 auto &type = getType(insn.word(1));
1749 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1750 auto src = GenericValue(this, routine, insn.word(3));
1752 for (auto i = 0u; i < type.sizeInComponents; i++)
1754 switch (insn.opcode())
1757 case spv::OpLogicalNot: // logical not == bitwise not due to all-bits boolean representation
1758 dst.emplace(i, ~src.UInt(i));
1760 case spv::OpSNegate:
1761 dst.emplace(i, -src.Int(i));
1763 case spv::OpFNegate:
1764 dst.emplace(i, -src.Float(i));
1766 case spv::OpConvertFToU:
1767 dst.emplace(i, SIMD::UInt(src.Float(i)));
1769 case spv::OpConvertFToS:
1770 dst.emplace(i, SIMD::Int(src.Float(i)));
1772 case spv::OpConvertSToF:
1773 dst.emplace(i, SIMD::Float(src.Int(i)));
1775 case spv::OpConvertUToF:
1776 dst.emplace(i, SIMD::Float(src.UInt(i)));
1778 case spv::OpBitcast:
1779 dst.emplace(i, src.Float(i));
1782 dst.emplace(i, IsInf(src.Float(i)));
1785 dst.emplace(i, IsNan(src.Float(i)));
1788 case spv::OpDPdxCoarse:
1789 // Derivative instructions: FS invocations are laid out like so:
1792 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
1793 dst.emplace(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
1796 case spv::OpDPdyCoarse:
1797 dst.emplace(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
1800 case spv::OpFwidthCoarse:
1801 dst.emplace(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
1802 + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
1804 case spv::OpDPdxFine:
1806 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1807 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1808 SIMD::Float v = SIMD::Float(firstRow);
1809 v = Insert(v, secondRow, 2);
1810 v = Insert(v, secondRow, 3);
1814 case spv::OpDPdyFine:
1816 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1817 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1818 SIMD::Float v = SIMD::Float(firstColumn);
1819 v = Insert(v, secondColumn, 1);
1820 v = Insert(v, secondColumn, 3);
1824 case spv::OpFwidthFine:
1826 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1827 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1828 SIMD::Float dpdx = SIMD::Float(firstRow);
1829 dpdx = Insert(dpdx, secondRow, 2);
1830 dpdx = Insert(dpdx, secondRow, 3);
1831 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1832 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1833 SIMD::Float dpdy = SIMD::Float(firstColumn);
1834 dpdy = Insert(dpdy, secondColumn, 1);
1835 dpdy = Insert(dpdy, secondColumn, 3);
1836 dst.emplace(i, Abs(dpdx) + Abs(dpdy));
1840 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
1845 void SpirvShader::EmitBinaryOp(InsnIterator insn, SpirvRoutine *routine) const
1847 auto &type = getType(insn.word(1));
1848 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1849 auto &lhsType = getType(getObject(insn.word(3)).type);
1850 auto lhs = GenericValue(this, routine, insn.word(3));
1851 auto rhs = GenericValue(this, routine, insn.word(4));
1853 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
1855 switch (insn.opcode())
1858 dst.emplace(i, lhs.Int(i) + rhs.Int(i));
1861 dst.emplace(i, lhs.Int(i) - rhs.Int(i));
1864 dst.emplace(i, lhs.Int(i) * rhs.Int(i));
1868 SIMD::Int a = lhs.Int(i);
1869 SIMD::Int b = rhs.Int(i);
1870 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
1871 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
1872 dst.emplace(i, a / b);
1877 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
1878 dst.emplace(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
1883 SIMD::Int a = lhs.Int(i);
1884 SIMD::Int b = rhs.Int(i);
1885 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
1886 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
1887 dst.emplace(i, a % b);
1892 SIMD::Int a = lhs.Int(i);
1893 SIMD::Int b = rhs.Int(i);
1894 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
1895 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
1897 // If a and b have opposite signs, the remainder operation takes
1898 // the sign from a but OpSMod is supposed to take the sign of b.
1899 // Adding b will ensure that the result has the correct sign and
1900 // that it is still congruent to a modulo b.
1902 // See also http://mathforum.org/library/drmath/view/52343.html
1903 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
1904 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
1905 dst.emplace(i, As<SIMD::Float>(fixedMod));
1910 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
1911 dst.emplace(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
1915 case spv::OpLogicalEqual:
1916 dst.emplace(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
1918 case spv::OpINotEqual:
1919 case spv::OpLogicalNotEqual:
1920 dst.emplace(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
1922 case spv::OpUGreaterThan:
1923 dst.emplace(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
1925 case spv::OpSGreaterThan:
1926 dst.emplace(i, CmpGT(lhs.Int(i), rhs.Int(i)));
1928 case spv::OpUGreaterThanEqual:
1929 dst.emplace(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
1931 case spv::OpSGreaterThanEqual:
1932 dst.emplace(i, CmpGE(lhs.Int(i), rhs.Int(i)));
1934 case spv::OpULessThan:
1935 dst.emplace(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
1937 case spv::OpSLessThan:
1938 dst.emplace(i, CmpLT(lhs.Int(i), rhs.Int(i)));
1940 case spv::OpULessThanEqual:
1941 dst.emplace(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
1943 case spv::OpSLessThanEqual:
1944 dst.emplace(i, CmpLE(lhs.Int(i), rhs.Int(i)));
1947 dst.emplace(i, lhs.Float(i) + rhs.Float(i));
1950 dst.emplace(i, lhs.Float(i) - rhs.Float(i));
1953 dst.emplace(i, lhs.Float(i) * rhs.Float(i));
1956 dst.emplace(i, lhs.Float(i) / rhs.Float(i));
1959 // TODO(b/126873455): inaccurate for values greater than 2^24
1960 dst.emplace(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
1963 dst.emplace(i, lhs.Float(i) % rhs.Float(i));
1965 case spv::OpFOrdEqual:
1966 dst.emplace(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
1968 case spv::OpFUnordEqual:
1969 dst.emplace(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
1971 case spv::OpFOrdNotEqual:
1972 dst.emplace(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
1974 case spv::OpFUnordNotEqual:
1975 dst.emplace(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
1977 case spv::OpFOrdLessThan:
1978 dst.emplace(i, CmpLT(lhs.Float(i), rhs.Float(i)));
1980 case spv::OpFUnordLessThan:
1981 dst.emplace(i, CmpULT(lhs.Float(i), rhs.Float(i)));
1983 case spv::OpFOrdGreaterThan:
1984 dst.emplace(i, CmpGT(lhs.Float(i), rhs.Float(i)));
1986 case spv::OpFUnordGreaterThan:
1987 dst.emplace(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
1989 case spv::OpFOrdLessThanEqual:
1990 dst.emplace(i, CmpLE(lhs.Float(i), rhs.Float(i)));
1992 case spv::OpFUnordLessThanEqual:
1993 dst.emplace(i, CmpULE(lhs.Float(i), rhs.Float(i)));
1995 case spv::OpFOrdGreaterThanEqual:
1996 dst.emplace(i, CmpGE(lhs.Float(i), rhs.Float(i)));
1998 case spv::OpFUnordGreaterThanEqual:
1999 dst.emplace(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2001 case spv::OpShiftRightLogical:
2002 dst.emplace(i, lhs.UInt(i) >> rhs.UInt(i));
2004 case spv::OpShiftRightArithmetic:
2005 dst.emplace(i, lhs.Int(i) >> rhs.Int(i));
2007 case spv::OpShiftLeftLogical:
2008 dst.emplace(i, lhs.UInt(i) << rhs.UInt(i));
2010 case spv::OpBitwiseOr:
2011 case spv::OpLogicalOr:
2012 dst.emplace(i, lhs.UInt(i) | rhs.UInt(i));
2014 case spv::OpBitwiseXor:
2015 dst.emplace(i, lhs.UInt(i) ^ rhs.UInt(i));
2017 case spv::OpBitwiseAnd:
2018 case spv::OpLogicalAnd:
2019 dst.emplace(i, lhs.UInt(i) & rhs.UInt(i));
2021 case spv::OpSMulExtended:
2022 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2023 // In our flat view then, component i is the i'th component of the first member;
2024 // component i + N is the i'th component of the second member.
2025 dst.emplace(i, lhs.Int(i) * rhs.Int(i));
2026 dst.emplace(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2028 case spv::OpUMulExtended:
2029 dst.emplace(i, lhs.UInt(i) * rhs.UInt(i));
2030 dst.emplace(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2033 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2038 void SpirvShader::EmitDot(InsnIterator insn, SpirvRoutine *routine) const
2040 auto &type = getType(insn.word(1));
2041 assert(type.sizeInComponents == 1);
2042 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2043 auto &lhsType = getType(getObject(insn.word(3)).type);
2044 auto lhs = GenericValue(this, routine, insn.word(3));
2045 auto rhs = GenericValue(this, routine, insn.word(4));
2047 dst.emplace(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2050 void SpirvShader::EmitSelect(InsnIterator insn, SpirvRoutine *routine) const
2052 auto &type = getType(insn.word(1));
2053 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2054 auto cond = GenericValue(this, routine, insn.word(3));
2055 auto lhs = GenericValue(this, routine, insn.word(4));
2056 auto rhs = GenericValue(this, routine, insn.word(5));
2058 for (auto i = 0u; i < type.sizeInComponents; i++)
2060 dst.emplace(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i))); // FIXME: IfThenElse()
2064 void SpirvShader::EmitExtendedInstruction(InsnIterator insn, SpirvRoutine *routine) const
2066 auto &type = getType(insn.word(1));
2067 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2068 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2070 switch (extInstIndex)
2072 case GLSLstd450FAbs:
2074 auto src = GenericValue(this, routine, insn.word(5));
2075 for (auto i = 0u; i < type.sizeInComponents; i++)
2077 dst.emplace(i, Abs(src.Float(i)));
2081 case GLSLstd450SAbs:
2083 auto src = GenericValue(this, routine, insn.word(5));
2084 for (auto i = 0u; i < type.sizeInComponents; i++)
2086 dst.emplace(i, Abs(src.Int(i)));
2090 case GLSLstd450Cross:
2092 auto lhs = GenericValue(this, routine, insn.word(5));
2093 auto rhs = GenericValue(this, routine, insn.word(6));
2094 dst.emplace(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2095 dst.emplace(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2096 dst.emplace(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2099 case GLSLstd450Floor:
2101 auto src = GenericValue(this, routine, insn.word(5));
2102 for (auto i = 0u; i < type.sizeInComponents; i++)
2104 dst.emplace(i, Floor(src.Float(i)));
2108 case GLSLstd450Trunc:
2110 auto src = GenericValue(this, routine, insn.word(5));
2111 for (auto i = 0u; i < type.sizeInComponents; i++)
2113 dst.emplace(i, Trunc(src.Float(i)));
2117 case GLSLstd450Ceil:
2119 auto src = GenericValue(this, routine, insn.word(5));
2120 for (auto i = 0u; i < type.sizeInComponents; i++)
2122 dst.emplace(i, Ceil(src.Float(i)));
2126 case GLSLstd450Fract:
2128 auto src = GenericValue(this, routine, insn.word(5));
2129 for (auto i = 0u; i < type.sizeInComponents; i++)
2131 dst.emplace(i, Frac(src.Float(i)));
2135 case GLSLstd450Round:
2137 auto src = GenericValue(this, routine, insn.word(5));
2138 for (auto i = 0u; i < type.sizeInComponents; i++)
2140 dst.emplace(i, Round(src.Float(i)));
2144 case GLSLstd450RoundEven:
2146 auto src = GenericValue(this, routine, insn.word(5));
2147 for (auto i = 0u; i < type.sizeInComponents; i++)
2149 auto x = Round(src.Float(i));
2150 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2151 dst.emplace(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2152 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2156 case GLSLstd450FMin:
2158 auto lhs = GenericValue(this, routine, insn.word(5));
2159 auto rhs = GenericValue(this, routine, insn.word(6));
2160 for (auto i = 0u; i < type.sizeInComponents; i++)
2162 dst.emplace(i, Min(lhs.Float(i), rhs.Float(i)));
2166 case GLSLstd450FMax:
2168 auto lhs = GenericValue(this, routine, insn.word(5));
2169 auto rhs = GenericValue(this, routine, insn.word(6));
2170 for (auto i = 0u; i < type.sizeInComponents; i++)
2172 dst.emplace(i, Max(lhs.Float(i), rhs.Float(i)));
2176 case GLSLstd450SMin:
2178 auto lhs = GenericValue(this, routine, insn.word(5));
2179 auto rhs = GenericValue(this, routine, insn.word(6));
2180 for (auto i = 0u; i < type.sizeInComponents; i++)
2182 dst.emplace(i, Min(lhs.Int(i), rhs.Int(i)));
2186 case GLSLstd450SMax:
2188 auto lhs = GenericValue(this, routine, insn.word(5));
2189 auto rhs = GenericValue(this, routine, insn.word(6));
2190 for (auto i = 0u; i < type.sizeInComponents; i++)
2192 dst.emplace(i, Max(lhs.Int(i), rhs.Int(i)));
2196 case GLSLstd450UMin:
2198 auto lhs = GenericValue(this, routine, insn.word(5));
2199 auto rhs = GenericValue(this, routine, insn.word(6));
2200 for (auto i = 0u; i < type.sizeInComponents; i++)
2202 dst.emplace(i, Min(lhs.UInt(i), rhs.UInt(i)));
2206 case GLSLstd450UMax:
2208 auto lhs = GenericValue(this, routine, insn.word(5));
2209 auto rhs = GenericValue(this, routine, insn.word(6));
2210 for (auto i = 0u; i < type.sizeInComponents; i++)
2212 dst.emplace(i, Max(lhs.UInt(i), rhs.UInt(i)));
2216 case GLSLstd450Step:
2218 auto edge = GenericValue(this, routine, insn.word(5));
2219 auto x = GenericValue(this, routine, insn.word(6));
2220 for (auto i = 0u; i < type.sizeInComponents; i++)
2222 dst.emplace(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2226 case GLSLstd450SmoothStep:
2228 auto edge0 = GenericValue(this, routine, insn.word(5));
2229 auto edge1 = GenericValue(this, routine, insn.word(6));
2230 auto x = GenericValue(this, routine, insn.word(7));
2231 for (auto i = 0u; i < type.sizeInComponents; i++)
2233 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2234 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2235 dst.emplace(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2239 case GLSLstd450FMix:
2241 auto x = GenericValue(this, routine, insn.word(5));
2242 auto y = GenericValue(this, routine, insn.word(6));
2243 auto a = GenericValue(this, routine, insn.word(7));
2244 for (auto i = 0u; i < type.sizeInComponents; i++)
2246 dst.emplace(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2250 case GLSLstd450FClamp:
2252 auto x = GenericValue(this, routine, insn.word(5));
2253 auto minVal = GenericValue(this, routine, insn.word(6));
2254 auto maxVal = GenericValue(this, routine, insn.word(7));
2255 for (auto i = 0u; i < type.sizeInComponents; i++)
2257 dst.emplace(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2261 case GLSLstd450SClamp:
2263 auto x = GenericValue(this, routine, insn.word(5));
2264 auto minVal = GenericValue(this, routine, insn.word(6));
2265 auto maxVal = GenericValue(this, routine, insn.word(7));
2266 for (auto i = 0u; i < type.sizeInComponents; i++)
2268 dst.emplace(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2272 case GLSLstd450UClamp:
2274 auto x = GenericValue(this, routine, insn.word(5));
2275 auto minVal = GenericValue(this, routine, insn.word(6));
2276 auto maxVal = GenericValue(this, routine, insn.word(7));
2277 for (auto i = 0u; i < type.sizeInComponents; i++)
2279 dst.emplace(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2283 case GLSLstd450FSign:
2285 auto src = GenericValue(this, routine, insn.word(5));
2286 for (auto i = 0u; i < type.sizeInComponents; i++)
2288 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2289 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2290 dst.emplace(i, neg | pos);
2294 case GLSLstd450SSign:
2296 auto src = GenericValue(this, routine, insn.word(5));
2297 for (auto i = 0u; i < type.sizeInComponents; i++)
2299 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2300 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2301 dst.emplace(i, neg | pos);
2305 case GLSLstd450Reflect:
2307 auto I = GenericValue(this, routine, insn.word(5));
2308 auto N = GenericValue(this, routine, insn.word(6));
2310 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2312 for (auto i = 0u; i < type.sizeInComponents; i++)
2314 dst.emplace(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2318 case GLSLstd450Refract:
2320 auto I = GenericValue(this, routine, insn.word(5));
2321 auto N = GenericValue(this, routine, insn.word(6));
2322 auto eta = GenericValue(this, routine, insn.word(7));
2324 SIMD::Float d = Dot(type.sizeInComponents, I, N);
2325 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2326 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2327 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2329 for (auto i = 0u; i < type.sizeInComponents; i++)
2331 dst.emplace(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2335 case GLSLstd450FaceForward:
2337 auto N = GenericValue(this, routine, insn.word(5));
2338 auto I = GenericValue(this, routine, insn.word(6));
2339 auto Nref = GenericValue(this, routine, insn.word(7));
2341 SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2342 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2344 for (auto i = 0u; i < type.sizeInComponents; i++)
2346 auto n = N.Float(i);
2347 dst.emplace(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2351 case GLSLstd450Length:
2353 auto x = GenericValue(this, routine, insn.word(5));
2354 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2356 dst.emplace(0, Sqrt(d));
2359 case GLSLstd450Normalize:
2361 auto x = GenericValue(this, routine, insn.word(5));
2362 SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2363 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2365 for (auto i = 0u; i < type.sizeInComponents; i++)
2367 dst.emplace(i, invLength * x.Float(i));
2371 case GLSLstd450Distance:
2373 auto p0 = GenericValue(this, routine, insn.word(5));
2374 auto p1 = GenericValue(this, routine, insn.word(6));
2375 auto p0Type = getType(getObject(insn.word(5)).type);
2377 // sqrt(dot(p0-p1, p0-p1))
2378 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2380 for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2382 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2385 dst.emplace(0, Sqrt(d));
2389 UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2393 SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2395 SIMD::Float d = x.Float(0) * y.Float(0);
2397 for (auto i = 1u; i < numComponents; i++)
2399 d += x.Float(i) * y.Float(i);
2405 void SpirvShader::EmitAny(InsnIterator insn, SpirvRoutine *routine) const
2407 auto &type = getType(insn.word(1));
2408 assert(type.sizeInComponents == 1);
2409 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2410 auto &srcType = getType(getObject(insn.word(3)).type);
2411 auto src = GenericValue(this, routine, insn.word(3));
2413 SIMD::UInt result = src.UInt(0);
2415 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2417 result |= src.UInt(i);
2420 dst.emplace(0, result);
2423 void SpirvShader::EmitAll(InsnIterator insn, SpirvRoutine *routine) const
2425 auto &type = getType(insn.word(1));
2426 assert(type.sizeInComponents == 1);
2427 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2428 auto &srcType = getType(getObject(insn.word(3)).type);
2429 auto src = GenericValue(this, routine, insn.word(3));
2431 SIMD::UInt result = src.UInt(0);
2433 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2435 result &= src.UInt(i);
2438 dst.emplace(0, result);
2441 void SpirvShader::EmitBranch(InsnIterator insn, SpirvRoutine *routine) const
2443 auto blockId = Block::ID(insn.word(1));
2444 EmitBlock(routine, getBlock(blockId));
2447 void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2449 for (auto insn : *this)
2451 switch (insn.opcode())
2453 case spv::OpVariable:
2455 Object::ID resultId = insn.word(2);
2456 auto &object = getObject(resultId);
2457 auto &objectTy = getType(object.type);
2458 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2460 auto &dst = routine->getValue(resultId);
2462 VisitInterface(resultId,
2463 [&](Decorations const &d, AttribType type) {
2464 auto scalarSlot = d.Location << 2 | d.Component;
2465 routine->outputs[scalarSlot] = dst[offset++];
2476 SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
2477 pipelineLayout(pipelineLayout)