OSDN Git Service

Add support for more ExtInst instructions
[android-x86/external-swiftshader.git] / src / Pipeline / SpirvShader.cpp
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
23
24 namespace sw
25 {
26         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
27
28         SpirvShader::SpirvShader(InsnStore const &insns)
29                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
30                           outputs{MAX_INTERFACE_COMPONENTS},
31                           serialID{serialCounter++}, modes{}
32         {
33                 ASSERT(insns.size() > 0);
34
35                 // Simplifying assumptions (to be satisfied by earlier transformations)
36                 // - There is exactly one entrypoint in the module, and it's the one we want
37                 // - The only input/output OpVariables present are those used by the entrypoint
38
39                 // TODO: Add real support for control flow. For now, track whether we've seen
40                 // a label or a return already (if so, the shader does things we will mishandle).
41                 // We expect there to be one of each in a simple shader -- the first and last instruction
42                 // of the entrypoint function.
43                 bool seenLabel = false;
44                 bool seenReturn = false;
45
46                 for (auto insn : *this)
47                 {
48                         switch (insn.opcode())
49                         {
50                         case spv::OpExecutionMode:
51                                 ProcessExecutionMode(insn);
52                                 break;
53
54                         case spv::OpDecorate:
55                         {
56                                 TypeOrObjectID targetId = insn.word(1);
57                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
58                                 decorations[targetId].Apply(
59                                                 decoration,
60                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
61
62                                 if (decoration == spv::DecorationCentroid)
63                                         modes.NeedsCentroid = true;
64                                 break;
65                         }
66
67                         case spv::OpMemberDecorate:
68                         {
69                                 TypeID targetId = insn.word(1);
70                                 auto memberIndex = insn.word(2);
71                                 auto &d = memberDecorations[targetId];
72                                 if (memberIndex >= d.size())
73                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
74                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
75                                 d[memberIndex].Apply(
76                                                 decoration,
77                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
78
79                                 if (decoration == spv::DecorationCentroid)
80                                         modes.NeedsCentroid = true;
81                                 break;
82                         }
83
84                         case spv::OpDecorationGroup:
85                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
86                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
87                                 // we might think about introducing this as a real Object.
88                                 break;
89
90                         case spv::OpGroupDecorate:
91                         {
92                                 auto const &srcDecorations = decorations[insn.word(1)];
93                                 for (auto i = 2u; i < insn.wordCount(); i++)
94                                 {
95                                         // remaining operands are targets to apply the group to.
96                                         decorations[insn.word(i)].Apply(srcDecorations);
97                                 }
98                                 break;
99                         }
100
101                         case spv::OpGroupMemberDecorate:
102                         {
103                                 auto const &srcDecorations = decorations[insn.word(1)];
104                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
105                                 {
106                                         // remaining operands are pairs of <id>, literal for members to apply to.
107                                         auto &d = memberDecorations[insn.word(i)];
108                                         auto memberIndex = insn.word(i + 1);
109                                         if (memberIndex >= d.size())
110                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
111                                         d[memberIndex].Apply(srcDecorations);
112                                 }
113                                 break;
114                         }
115
116                         case spv::OpLabel:
117                                 if (seenLabel)
118                                         UNIMPLEMENTED("Shader contains multiple labels, has control flow");
119                                 seenLabel = true;
120                                 break;
121
122                         case spv::OpReturn:
123                                 if (seenReturn)
124                                         UNIMPLEMENTED("Shader contains multiple returns, has control flow");
125                                 seenReturn = true;
126                                 break;
127
128                         case spv::OpTypeVoid:
129                         case spv::OpTypeBool:
130                         case spv::OpTypeInt:
131                         case spv::OpTypeFloat:
132                         case spv::OpTypeVector:
133                         case spv::OpTypeMatrix:
134                         case spv::OpTypeImage:
135                         case spv::OpTypeSampler:
136                         case spv::OpTypeSampledImage:
137                         case spv::OpTypeArray:
138                         case spv::OpTypeRuntimeArray:
139                         case spv::OpTypeStruct:
140                         case spv::OpTypePointer:
141                         case spv::OpTypeFunction:
142                                 DeclareType(insn);
143                                 break;
144
145                         case spv::OpVariable:
146                         {
147                                 TypeID typeId = insn.word(1);
148                                 ObjectID resultId = insn.word(2);
149                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
150                                 if (insn.wordCount() > 4)
151                                         UNIMPLEMENTED("Variable initializers not yet supported");
152
153                                 auto &object = defs[resultId];
154                                 object.kind = Object::Kind::Variable;
155                                 object.definition = insn;
156                                 object.type = typeId;
157                                 object.pointerBase = insn.word(2);      // base is itself
158
159                                 ASSERT(getType(typeId).storageClass == storageClass);
160
161                                 switch (storageClass)
162                                 {
163                                 case spv::StorageClassInput:
164                                 case spv::StorageClassOutput:
165                                         ProcessInterfaceVariable(object);
166                                         break;
167                                 case spv::StorageClassUniform:
168                                 case spv::StorageClassStorageBuffer:
169                                         object.kind = Object::Kind::PhysicalPointer;
170                                         break;
171
172                                 case spv::StorageClassPrivate:
173                                 case spv::StorageClassFunction:
174                                         break; // Correctly handled.
175
176                                 case spv::StorageClassUniformConstant:
177                                 case spv::StorageClassWorkgroup:
178                                 case spv::StorageClassCrossWorkgroup:
179                                 case spv::StorageClassGeneric:
180                                 case spv::StorageClassPushConstant:
181                                 case spv::StorageClassAtomicCounter:
182                                 case spv::StorageClassImage:
183                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
184                                         break;
185
186                                 default:
187                                         UNREACHABLE("Unexpected StorageClass"); // See Appendix A of the Vulkan spec.
188                                         break;
189                                 }
190                                 break;
191                         }
192
193                         case spv::OpConstant:
194                                 CreateConstant(insn).constantValue[0] = insn.word(3);
195                                 break;
196                         case spv::OpConstantFalse:
197                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
198                                 break;
199                         case spv::OpConstantTrue:
200                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
201                                 break;
202                         case spv::OpConstantNull:
203                         {
204                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
205                                 auto &object = CreateConstant(insn);
206                                 auto &objectTy = getType(object.type);
207                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
208                                 {
209                                         object.constantValue[i] = 0;
210                                 }
211                                 break;
212                         }
213                         case spv::OpConstantComposite:
214                         {
215                                 auto &object = CreateConstant(insn);
216                                 auto offset = 0u;
217                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
218                                 {
219                                         auto &constituent = getObject(insn.word(i + 3));
220                                         auto &constituentTy = getType(constituent.type);
221                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
222                                                 object.constantValue[offset++] = constituent.constantValue[j];
223                                 }
224                                 break;
225                         }
226
227                         case spv::OpCapability:
228                                 // Various capabilities will be declared, but none affect our code generation at this point.
229                         case spv::OpMemoryModel:
230                                 // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
231                         case spv::OpEntryPoint:
232                         case spv::OpFunction:
233                         case spv::OpFunctionEnd:
234                                 // Due to preprocessing, the entrypoint and its function provide no value.
235                                 break;
236                         case spv::OpExtInstImport:
237                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
238                                 // Valid shaders will not attempt to import any other instruction sets.
239                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
240                                 {
241                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
242                                 }
243                                 break;
244                         case spv::OpName:
245                         case spv::OpMemberName:
246                         case spv::OpSource:
247                         case spv::OpSourceContinued:
248                         case spv::OpSourceExtension:
249                         case spv::OpLine:
250                         case spv::OpNoLine:
251                         case spv::OpModuleProcessed:
252                         case spv::OpString:
253                                 // No semantic impact
254                                 break;
255
256                         case spv::OpFunctionParameter:
257                         case spv::OpFunctionCall:
258                         case spv::OpSpecConstant:
259                         case spv::OpSpecConstantComposite:
260                         case spv::OpSpecConstantFalse:
261                         case spv::OpSpecConstantOp:
262                         case spv::OpSpecConstantTrue:
263                                 // These should have all been removed by preprocessing passes. If we see them here,
264                                 // our assumptions are wrong and we will probably generate wrong code.
265                                 UNIMPLEMENTED("These instructions should have already been lowered.");
266                                 break;
267
268                         case spv::OpFConvert:
269                         case spv::OpSConvert:
270                         case spv::OpUConvert:
271                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
272                                 break;
273
274                         case spv::OpLoad:
275                         case spv::OpAccessChain:
276                         case spv::OpCompositeConstruct:
277                         case spv::OpCompositeInsert:
278                         case spv::OpCompositeExtract:
279                         case spv::OpVectorShuffle:
280                         case spv::OpNot: // Unary ops
281                         case spv::OpSNegate:
282                         case spv::OpFNegate:
283                         case spv::OpLogicalNot:
284                         case spv::OpIAdd: // Binary ops
285                         case spv::OpISub:
286                         case spv::OpIMul:
287                         case spv::OpSDiv:
288                         case spv::OpUDiv:
289                         case spv::OpFAdd:
290                         case spv::OpFSub:
291                         case spv::OpFMul:
292                         case spv::OpFDiv:
293                         case spv::OpFOrdEqual:
294                         case spv::OpFUnordEqual:
295                         case spv::OpFOrdNotEqual:
296                         case spv::OpFUnordNotEqual:
297                         case spv::OpFOrdLessThan:
298                         case spv::OpFUnordLessThan:
299                         case spv::OpFOrdGreaterThan:
300                         case spv::OpFUnordGreaterThan:
301                         case spv::OpFOrdLessThanEqual:
302                         case spv::OpFUnordLessThanEqual:
303                         case spv::OpFOrdGreaterThanEqual:
304                         case spv::OpFUnordGreaterThanEqual:
305                         case spv::OpUMod:
306                         case spv::OpIEqual:
307                         case spv::OpINotEqual:
308                         case spv::OpUGreaterThan:
309                         case spv::OpSGreaterThan:
310                         case spv::OpUGreaterThanEqual:
311                         case spv::OpSGreaterThanEqual:
312                         case spv::OpULessThan:
313                         case spv::OpSLessThan:
314                         case spv::OpULessThanEqual:
315                         case spv::OpSLessThanEqual:
316                         case spv::OpShiftRightLogical:
317                         case spv::OpShiftRightArithmetic:
318                         case spv::OpShiftLeftLogical:
319                         case spv::OpBitwiseOr:
320                         case spv::OpBitwiseXor:
321                         case spv::OpBitwiseAnd:
322                         case spv::OpLogicalOr:
323                         case spv::OpLogicalAnd:
324                         case spv::OpLogicalEqual:
325                         case spv::OpLogicalNotEqual:
326                         case spv::OpUMulExtended:
327                         case spv::OpSMulExtended:
328                         case spv::OpDot:
329                         case spv::OpConvertFToU:
330                         case spv::OpConvertFToS:
331                         case spv::OpConvertSToF:
332                         case spv::OpConvertUToF:
333                         case spv::OpBitcast:
334                         case spv::OpSelect:
335                         case spv::OpExtInst:
336                         case spv::OpIsInf:
337                         case spv::OpIsNan:
338                         case spv::OpAny:
339                         case spv::OpAll:
340                                 // Instructions that yield an intermediate value
341                         {
342                                 TypeID typeId = insn.word(1);
343                                 ObjectID resultId = insn.word(2);
344                                 auto &object = defs[resultId];
345                                 object.type = typeId;
346                                 object.kind = Object::Kind::Value;
347                                 object.definition = insn;
348
349                                 if (insn.opcode() == spv::OpAccessChain)
350                                 {
351                                         // interior ptr has two parts:
352                                         // - logical base ptr, common across all lanes and known at compile time
353                                         // - per-lane offset
354                                         ObjectID baseId = insn.word(3);
355                                         object.pointerBase = getObject(baseId).pointerBase;
356                                 }
357                                 break;
358                         }
359
360                         case spv::OpStore:
361                                 // Don't need to do anything during analysis pass
362                                 break;
363
364                         case spv::OpKill:
365                                 modes.ContainsKill = true;
366                                 break;
367
368                         default:
369                                 UNIMPLEMENTED(OpcodeName(insn.opcode()).c_str());
370                         }
371                 }
372         }
373
374         void SpirvShader::DeclareType(InsnIterator insn)
375         {
376                 TypeID resultId = insn.word(1);
377
378                 auto &type = types[resultId];
379                 type.definition = insn;
380                 type.sizeInComponents = ComputeTypeSize(insn);
381
382                 // A structure is a builtin block if it has a builtin
383                 // member. All members of such a structure are builtins.
384                 switch (insn.opcode())
385                 {
386                 case spv::OpTypeStruct:
387                 {
388                         auto d = memberDecorations.find(resultId);
389                         if (d != memberDecorations.end())
390                         {
391                                 for (auto &m : d->second)
392                                 {
393                                         if (m.HasBuiltIn)
394                                         {
395                                                 type.isBuiltInBlock = true;
396                                                 break;
397                                         }
398                                 }
399                         }
400                         break;
401                 }
402                 case spv::OpTypePointer:
403                 {
404                         TypeID elementTypeId = insn.word(3);
405                         type.element = elementTypeId;
406                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
407                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
408                         break;
409                 }
410                 case spv::OpTypeVector:
411                 case spv::OpTypeMatrix:
412                 case spv::OpTypeArray:
413                 case spv::OpTypeRuntimeArray:
414                 {
415                         TypeID elementTypeId = insn.word(2);
416                         type.element = elementTypeId;
417                         break;
418                 }
419                 default:
420                         break;
421                 }
422         }
423
424         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
425         {
426                 TypeID typeId = insn.word(1);
427                 ObjectID resultId = insn.word(2);
428                 auto &object = defs[resultId];
429                 auto &objectTy = getType(typeId);
430                 object.type = typeId;
431                 object.kind = Object::Kind::Constant;
432                 object.definition = insn;
433                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
434                 return object;
435         }
436
437         void SpirvShader::ProcessInterfaceVariable(Object &object)
438         {
439                 auto &objectTy = getType(object.type);
440                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
441
442                 ASSERT(objectTy.definition.opcode() == spv::OpTypePointer);
443                 auto pointeeTy = getType(objectTy.element);
444
445                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
446                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
447
448                 ASSERT(object.definition.opcode() == spv::OpVariable);
449                 ObjectID resultId = object.definition.word(2);
450
451                 if (objectTy.isBuiltInBlock)
452                 {
453                         // walk the builtin block, registering each of its members separately.
454                         auto m = memberDecorations.find(objectTy.element);
455                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
456                         auto &structType = pointeeTy.definition;
457                         auto offset = 0u;
458                         auto word = 2u;
459                         for (auto &member : m->second)
460                         {
461                                 auto &memberType = getType(structType.word(word));
462
463                                 if (member.HasBuiltIn)
464                                 {
465                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
466                                 }
467
468                                 offset += memberType.sizeInComponents;
469                                 ++word;
470                         }
471                         return;
472                 }
473
474                 auto d = decorations.find(resultId);
475                 if (d != decorations.end() && d->second.HasBuiltIn)
476                 {
477                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
478                 }
479                 else
480                 {
481                         object.kind = Object::Kind::InterfaceVariable;
482                         VisitInterface(resultId,
483                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
484                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
485                                                            auto scalarSlot = (d.Location << 2) | d.Component;
486                                                            ASSERT(scalarSlot >= 0 &&
487                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
488
489                                                            auto &slot = userDefinedInterface[scalarSlot];
490                                                            slot.Type = type;
491                                                            slot.Flat = d.Flat;
492                                                            slot.NoPerspective = d.NoPerspective;
493                                                            slot.Centroid = d.Centroid;
494                                                    });
495                 }
496         }
497
498         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
499         {
500                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
501                 switch (mode)
502                 {
503                 case spv::ExecutionModeEarlyFragmentTests:
504                         modes.EarlyFragmentTests = true;
505                         break;
506                 case spv::ExecutionModeDepthReplacing:
507                         modes.DepthReplacing = true;
508                         break;
509                 case spv::ExecutionModeDepthGreater:
510                         modes.DepthGreater = true;
511                         break;
512                 case spv::ExecutionModeDepthLess:
513                         modes.DepthLess = true;
514                         break;
515                 case spv::ExecutionModeDepthUnchanged:
516                         modes.DepthUnchanged = true;
517                         break;
518                 case spv::ExecutionModeLocalSize:
519                         modes.LocalSizeX = insn.word(3);
520                         modes.LocalSizeZ = insn.word(5);
521                         modes.LocalSizeY = insn.word(4);
522                         break;
523                 case spv::ExecutionModeOriginUpperLeft:
524                         // This is always the case for a Vulkan shader. Do nothing.
525                         break;
526                 default:
527                         UNIMPLEMENTED("No other execution modes are permitted");
528                 }
529         }
530
531         uint32_t SpirvShader::ComputeTypeSize(sw::SpirvShader::InsnIterator insn)
532         {
533                 // Types are always built from the bottom up (with the exception of forward ptrs, which
534                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
535                 // already been described (and so their sizes determined)
536                 switch (insn.opcode())
537                 {
538                 case spv::OpTypeVoid:
539                 case spv::OpTypeSampler:
540                 case spv::OpTypeImage:
541                 case spv::OpTypeSampledImage:
542                 case spv::OpTypeFunction:
543                 case spv::OpTypeRuntimeArray:
544                         // Objects that don't consume any space.
545                         // Descriptor-backed objects currently only need exist at compile-time.
546                         // Runtime arrays don't appear in places where their size would be interesting
547                         return 0;
548
549                 case spv::OpTypeBool:
550                 case spv::OpTypeFloat:
551                 case spv::OpTypeInt:
552                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
553                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
554                         return 1;
555
556                 case spv::OpTypeVector:
557                 case spv::OpTypeMatrix:
558                         // Vectors and matrices both consume element count * element size.
559                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
560
561                 case spv::OpTypeArray:
562                 {
563                         // Element count * element size. Array sizes come from constant ids.
564                         auto arraySize = GetConstantInt(insn.word(3));
565                         return getType(insn.word(2)).sizeInComponents * arraySize;
566                 }
567
568                 case spv::OpTypeStruct:
569                 {
570                         uint32_t size = 0;
571                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
572                         {
573                                 size += getType(insn.word(i)).sizeInComponents;
574                         }
575                         return size;
576                 }
577
578                 case spv::OpTypePointer:
579                         // Runtime representation of a pointer is a per-lane index.
580                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
581                         return 1;
582
583                 default:
584                         // Some other random insn.
585                         UNIMPLEMENTED("Only types are supported");
586                         return 0;
587                 }
588         }
589
590         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
591         {
592                 switch (storageClass)
593                 {
594                 case spv::StorageClassUniform:
595                 case spv::StorageClassStorageBuffer:
596                         return false;
597                 default:
598                         return true;
599                 }
600         }
601
602         template<typename F>
603         int SpirvShader::VisitInterfaceInner(TypeID id, Decorations d, F f) const
604         {
605                 // Recursively walks variable definition and its type tree, taking into account
606                 // any explicit Location or Component decorations encountered; where explicit
607                 // Locations or Components are not specified, assigns them sequentially.
608                 // Collected decorations are carried down toward the leaves and across
609                 // siblings; Effect of decorations intentionally does not flow back up the tree.
610                 //
611                 // F is a functor to be called with the effective decoration set for every component.
612                 //
613                 // Returns the next available location, and calls f().
614
615                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
616
617                 ApplyDecorationsForId(&d, id);
618
619                 auto const &obj = getType(id);
620                 switch (obj.definition.opcode())
621                 {
622                 case spv::OpTypePointer:
623                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
624                 case spv::OpTypeMatrix:
625                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
626                         {
627                                 // consumes same components of N consecutive locations
628                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
629                         }
630                         return d.Location;
631                 case spv::OpTypeVector:
632                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
633                         {
634                                 // consumes N consecutive components in the same location
635                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
636                         }
637                         return d.Location + 1;
638                 case spv::OpTypeFloat:
639                         f(d, ATTRIBTYPE_FLOAT);
640                         return d.Location + 1;
641                 case spv::OpTypeInt:
642                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
643                         return d.Location + 1;
644                 case spv::OpTypeBool:
645                         f(d, ATTRIBTYPE_UINT);
646                         return d.Location + 1;
647                 case spv::OpTypeStruct:
648                 {
649                         // iterate over members, which may themselves have Location/Component decorations
650                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
651                         {
652                                 ApplyDecorationsForIdMember(&d, id, i);
653                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
654                                 d.Component = 0;    // Implicit locations always have component=0
655                         }
656                         return d.Location;
657                 }
658                 case spv::OpTypeArray:
659                 {
660                         auto arraySize = GetConstantInt(obj.definition.word(3));
661                         for (auto i = 0u; i < arraySize; i++)
662                         {
663                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
664                         }
665                         return d.Location;
666                 }
667                 default:
668                         // Intentionally partial; most opcodes do not participate in type hierarchies
669                         return 0;
670                 }
671         }
672
673         template<typename F>
674         void SpirvShader::VisitInterface(ObjectID id, F f) const
675         {
676                 // Walk a variable definition and call f for each component in it.
677                 Decorations d{};
678                 ApplyDecorationsForId(&d, id);
679
680                 auto def = getObject(id).definition;
681                 ASSERT(def.opcode() == spv::OpVariable);
682                 VisitInterfaceInner<F>(def.word(1), d, f);
683         }
684
685         SIMD::Int SpirvShader::WalkAccessChain(ObjectID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
686         {
687                 // TODO: think about explicit layout (UBO/SSBO) storage classes
688                 // TODO: avoid doing per-lane work in some cases if we can?
689
690                 int constantOffset = 0;
691                 SIMD::Int dynamicOffset = SIMD::Int(0);
692                 auto &baseObject = getObject(id);
693                 TypeID typeId = getType(baseObject.type).element;
694
695                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
696                 // Start with its offset and build from there.
697                 if (baseObject.kind == Object::Kind::Value)
698                         dynamicOffset += As<SIMD::Int>(routine->getIntermediate(id)[0]);
699
700                 for (auto i = 0u; i < numIndexes; i++)
701                 {
702                         auto & type = getType(typeId);
703                         switch (type.definition.opcode())
704                         {
705                         case spv::OpTypeStruct:
706                         {
707                                 int memberIndex = GetConstantInt(indexIds[i]);
708                                 int offsetIntoStruct = 0;
709                                 for (auto j = 0; j < memberIndex; j++) {
710                                         auto memberType = type.definition.word(2u + j);
711                                         offsetIntoStruct += getType(memberType).sizeInComponents;
712                                 }
713                                 constantOffset += offsetIntoStruct;
714                                 typeId = type.definition.word(2u + memberIndex);
715                                 break;
716                         }
717
718                         case spv::OpTypeVector:
719                         case spv::OpTypeMatrix:
720                         case spv::OpTypeArray:
721                         {
722                                 auto stride = getType(type.element).sizeInComponents;
723                                 auto & obj = getObject(indexIds[i]);
724                                 if (obj.kind == Object::Kind::Constant)
725                                         constantOffset += stride * GetConstantInt(indexIds[i]);
726                                 else
727                                         dynamicOffset += SIMD::Int(stride) * As<SIMD::Int>(routine->getIntermediate(indexIds[i])[0]);
728                                 typeId = type.element;
729                                 break;
730                         }
731
732                         default:
733                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.definition.opcode()).c_str());
734                         }
735                 }
736
737                 return dynamicOffset + SIMD::Int(constantOffset);
738         }
739
740         uint32_t SpirvShader::WalkLiteralAccessChain(TypeID typeId, uint32_t numIndexes, uint32_t const *indexes) const
741         {
742                 uint32_t constantOffset = 0;
743
744                 for (auto i = 0u; i < numIndexes; i++)
745                 {
746                         auto & type = getType(typeId);
747                         switch (type.definition.opcode())
748                         {
749                         case spv::OpTypeStruct:
750                         {
751                                 int memberIndex = indexes[i];
752                                 int offsetIntoStruct = 0;
753                                 for (auto j = 0; j < memberIndex; j++) {
754                                         auto memberType = type.definition.word(2u + j);
755                                         offsetIntoStruct += getType(memberType).sizeInComponents;
756                                 }
757                                 constantOffset += offsetIntoStruct;
758                                 typeId = type.definition.word(2u + memberIndex);
759                                 break;
760                         }
761
762                         case spv::OpTypeVector:
763                         case spv::OpTypeMatrix:
764                         case spv::OpTypeArray:
765                         {
766                                 auto elementType = type.definition.word(2);
767                                 auto stride = getType(elementType).sizeInComponents;
768                                 constantOffset += stride * indexes[i];
769                                 typeId = elementType;
770                                 break;
771                         }
772
773                         default:
774                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
775                         }
776                 }
777
778                 return constantOffset;
779         }
780
781         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
782         {
783                 switch (decoration)
784                 {
785                 case spv::DecorationLocation:
786                         HasLocation = true;
787                         Location = static_cast<int32_t>(arg);
788                         break;
789                 case spv::DecorationComponent:
790                         HasComponent = true;
791                         Component = arg;
792                         break;
793                 case spv::DecorationDescriptorSet:
794                         HasDescriptorSet = true;
795                         DescriptorSet = arg;
796                         break;
797                 case spv::DecorationBinding:
798                         HasBinding = true;
799                         Binding = arg;
800                         break;
801                 case spv::DecorationBuiltIn:
802                         HasBuiltIn = true;
803                         BuiltIn = static_cast<spv::BuiltIn>(arg);
804                         break;
805                 case spv::DecorationFlat:
806                         Flat = true;
807                         break;
808                 case spv::DecorationNoPerspective:
809                         NoPerspective = true;
810                         break;
811                 case spv::DecorationCentroid:
812                         Centroid = true;
813                         break;
814                 case spv::DecorationBlock:
815                         Block = true;
816                         break;
817                 case spv::DecorationBufferBlock:
818                         BufferBlock = true;
819                         break;
820                 case spv::DecorationOffset:
821                         HasOffset = true;
822                         Offset = static_cast<int32_t>(arg);
823                         break;
824                 case spv::DecorationArrayStride:
825                         HasArrayStride = true;
826                         ArrayStride = static_cast<int32_t>(arg);
827                         break;
828                 case spv::DecorationMatrixStride:
829                         HasMatrixStride = true;
830                         MatrixStride = static_cast<int32_t>(arg);
831                         break;
832                 default:
833                         // Intentionally partial, there are many decorations we just don't care about.
834                         break;
835                 }
836         }
837
838         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
839         {
840                 // Apply a decoration group to this set of decorations
841                 if (src.HasBuiltIn)
842                 {
843                         HasBuiltIn = true;
844                         BuiltIn = src.BuiltIn;
845                 }
846
847                 if (src.HasLocation)
848                 {
849                         HasLocation = true;
850                         Location = src.Location;
851                 }
852
853                 if (src.HasComponent)
854                 {
855                         HasComponent = true;
856                         Component = src.Component;
857                 }
858
859                 if (src.HasDescriptorSet)
860                 {
861                         HasDescriptorSet = true;
862                         DescriptorSet = src.DescriptorSet;
863                 }
864
865                 if (src.HasBinding)
866                 {
867                         HasBinding = true;
868                         Binding = src.Binding;
869                 }
870
871                 if (src.HasOffset)
872                 {
873                         HasOffset = true;
874                         Offset = src.Offset;
875                 }
876
877                 if (src.HasArrayStride)
878                 {
879                         HasArrayStride = true;
880                         ArrayStride = src.ArrayStride;
881                 }
882
883                 if (src.HasMatrixStride)
884                 {
885                         HasMatrixStride = true;
886                         MatrixStride = src.MatrixStride;
887                 }
888
889                 Flat |= src.Flat;
890                 NoPerspective |= src.NoPerspective;
891                 Centroid |= src.Centroid;
892                 Block |= src.Block;
893                 BufferBlock |= src.BufferBlock;
894         }
895
896         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
897         {
898                 auto it = decorations.find(id);
899                 if (it != decorations.end())
900                         d->Apply(it->second);
901         }
902
903         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, TypeID id, uint32_t member) const
904         {
905                 auto it = memberDecorations.find(id);
906                 if (it != memberDecorations.end() && member < it->second.size())
907                 {
908                         d->Apply(it->second[member]);
909                 }
910         }
911
912         uint32_t SpirvShader::GetConstantInt(ObjectID id) const
913         {
914                 // Slightly hackish access to constants very early in translation.
915                 // General consumption of constants by other instructions should
916                 // probably be just lowered to Reactor.
917
918                 // TODO: not encountered yet since we only use this for array sizes etc,
919                 // but is possible to construct integer constant 0 via OpConstantNull.
920                 auto insn = getObject(id).definition;
921                 ASSERT(insn.opcode() == spv::OpConstant);
922                 ASSERT(getType(insn.word(1)).definition.opcode() == spv::OpTypeInt);
923                 return insn.word(3);
924         }
925
926         // emit-time
927
928         void SpirvShader::emitProlog(SpirvRoutine *routine) const
929         {
930                 for (auto insn : *this)
931                 {
932                         switch (insn.opcode())
933                         {
934                         case spv::OpVariable:
935                         {
936                                 ObjectID resultId = insn.word(2);
937                                 auto &object = getObject(resultId);
938                                 auto &objectTy = getType(object.type);
939                                 auto &pointeeTy = getType(objectTy.element);
940                                 // TODO: what to do about zero-slot objects?
941                                 if (pointeeTy.sizeInComponents > 0)
942                                 {
943                                         routine->createLvalue(insn.word(2), pointeeTy.sizeInComponents);
944                                 }
945                                 break;
946                         }
947                         default:
948                                 // Nothing else produces interface variables, so can all be safely ignored.
949                                 break;
950                         }
951                 }
952         }
953
954         void SpirvShader::emit(SpirvRoutine *routine) const
955         {
956                 for (auto insn : *this)
957                 {
958                         switch (insn.opcode())
959                         {
960                         case spv::OpTypeVoid:
961                         case spv::OpTypeInt:
962                         case spv::OpTypeFloat:
963                         case spv::OpTypeBool:
964                         case spv::OpTypeVector:
965                         case spv::OpTypeArray:
966                         case spv::OpTypeRuntimeArray:
967                         case spv::OpTypeMatrix:
968                         case spv::OpTypeStruct:
969                         case spv::OpTypePointer:
970                         case spv::OpTypeFunction:
971                         case spv::OpExecutionMode:
972                         case spv::OpMemoryModel:
973                         case spv::OpFunction:
974                         case spv::OpFunctionEnd:
975                         case spv::OpConstant:
976                         case spv::OpConstantNull:
977                         case spv::OpConstantTrue:
978                         case spv::OpConstantFalse:
979                         case spv::OpConstantComposite:
980                         case spv::OpExtension:
981                         case spv::OpCapability:
982                         case spv::OpEntryPoint:
983                         case spv::OpExtInstImport:
984                         case spv::OpDecorate:
985                         case spv::OpMemberDecorate:
986                         case spv::OpGroupDecorate:
987                         case spv::OpGroupMemberDecorate:
988                         case spv::OpDecorationGroup:
989                         case spv::OpName:
990                         case spv::OpMemberName:
991                         case spv::OpSource:
992                         case spv::OpSourceContinued:
993                         case spv::OpSourceExtension:
994                         case spv::OpLine:
995                         case spv::OpNoLine:
996                         case spv::OpModuleProcessed:
997                         case spv::OpString:
998                                 // Nothing to do at emit time. These are either fully handled at analysis time,
999                                 // or don't require any work at all.
1000                                 break;
1001
1002                         case spv::OpLabel:
1003                         case spv::OpReturn:
1004                                 // TODO: when we do control flow, will need to do some work here.
1005                                 // Until then, there is nothing to do -- we expect there to be an initial OpLabel
1006                                 // in the entrypoint function, for which we do nothing; and a final OpReturn at the
1007                                 // end of the entrypoint function, for which we do nothing.
1008                                 break;
1009
1010                         case spv::OpVariable:
1011                                 EmitVariable(insn, routine);
1012                                 break;
1013
1014                         case spv::OpLoad:
1015                                 EmitLoad(insn, routine);
1016                                 break;
1017
1018                         case spv::OpStore:
1019                                 EmitStore(insn, routine);
1020                                 break;
1021
1022                         case spv::OpAccessChain:
1023                                 EmitAccessChain(insn, routine);
1024                                 break;
1025
1026                         case spv::OpCompositeConstruct:
1027                                 EmitCompositeConstruct(insn, routine);
1028                                 break;
1029
1030                         case spv::OpCompositeInsert:
1031                                 EmitCompositeInsert(insn, routine);
1032                                 break;
1033
1034                         case spv::OpCompositeExtract:
1035                                 EmitCompositeExtract(insn, routine);
1036                                 break;
1037
1038                         case spv::OpVectorShuffle:
1039                                 EmitVectorShuffle(insn, routine);
1040                                 break;
1041
1042                         case spv::OpNot:
1043                         case spv::OpSNegate:
1044                         case spv::OpFNegate:
1045                         case spv::OpLogicalNot:
1046                         case spv::OpConvertFToU:
1047                         case spv::OpConvertFToS:
1048                         case spv::OpConvertSToF:
1049                         case spv::OpConvertUToF:
1050                         case spv::OpBitcast:
1051                         case spv::OpIsInf:
1052                         case spv::OpIsNan:
1053                                 EmitUnaryOp(insn, routine);
1054                                 break;
1055
1056                         case spv::OpIAdd:
1057                         case spv::OpISub:
1058                         case spv::OpIMul:
1059                         case spv::OpSDiv:
1060                         case spv::OpUDiv:
1061                         case spv::OpFAdd:
1062                         case spv::OpFSub:
1063                         case spv::OpFMul:
1064                         case spv::OpFDiv:
1065                         case spv::OpFOrdEqual:
1066                         case spv::OpFUnordEqual:
1067                         case spv::OpFOrdNotEqual:
1068                         case spv::OpFUnordNotEqual:
1069                         case spv::OpFOrdLessThan:
1070                         case spv::OpFUnordLessThan:
1071                         case spv::OpFOrdGreaterThan:
1072                         case spv::OpFUnordGreaterThan:
1073                         case spv::OpFOrdLessThanEqual:
1074                         case spv::OpFUnordLessThanEqual:
1075                         case spv::OpFOrdGreaterThanEqual:
1076                         case spv::OpFUnordGreaterThanEqual:
1077                         case spv::OpUMod:
1078                         case spv::OpIEqual:
1079                         case spv::OpINotEqual:
1080                         case spv::OpUGreaterThan:
1081                         case spv::OpSGreaterThan:
1082                         case spv::OpUGreaterThanEqual:
1083                         case spv::OpSGreaterThanEqual:
1084                         case spv::OpULessThan:
1085                         case spv::OpSLessThan:
1086                         case spv::OpULessThanEqual:
1087                         case spv::OpSLessThanEqual:
1088                         case spv::OpShiftRightLogical:
1089                         case spv::OpShiftRightArithmetic:
1090                         case spv::OpShiftLeftLogical:
1091                         case spv::OpBitwiseOr:
1092                         case spv::OpBitwiseXor:
1093                         case spv::OpBitwiseAnd:
1094                         case spv::OpLogicalOr:
1095                         case spv::OpLogicalAnd:
1096                         case spv::OpLogicalEqual:
1097                         case spv::OpLogicalNotEqual:
1098                         case spv::OpUMulExtended:
1099                         case spv::OpSMulExtended:
1100                                 EmitBinaryOp(insn, routine);
1101                                 break;
1102
1103                         case spv::OpDot:
1104                                 EmitDot(insn, routine);
1105                                 break;
1106
1107                         case spv::OpSelect:
1108                                 EmitSelect(insn, routine);
1109                                 break;
1110
1111                         case spv::OpExtInst:
1112                                 EmitExtendedInstruction(insn, routine);
1113                                 break;
1114
1115                         case spv::OpAny:
1116                                 EmitAny(insn, routine);
1117                                 break;
1118
1119                         case spv::OpAll:
1120                                 EmitAll(insn, routine);
1121                                 break;
1122
1123                         default:
1124                                 UNIMPLEMENTED(OpcodeName(insn.opcode()).c_str());
1125                                 break;
1126                         }
1127                 }
1128         }
1129
1130         void SpirvShader::EmitVariable(InsnIterator insn, SpirvRoutine *routine) const
1131         {
1132                 ObjectID resultId = insn.word(2);
1133                 auto &object = getObject(resultId);
1134                 auto &objectTy = getType(object.type);
1135                 switch (objectTy.storageClass)
1136                 {
1137                 case spv::StorageClassInput:
1138                 {
1139                         if (object.kind == Object::Kind::InterfaceVariable)
1140                         {
1141                                 auto &dst = routine->getValue(resultId);
1142                                 int offset = 0;
1143                                 VisitInterface(resultId,
1144                                                                 [&](Decorations const &d, AttribType type) {
1145                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1146                                                                         dst[offset++] = routine->inputs[scalarSlot];
1147                                                                 });
1148                         }
1149                         break;
1150                 }
1151                 case spv::StorageClassUniform:
1152                 case spv::StorageClassStorageBuffer:
1153                 {
1154                         Decorations d{};
1155                         ApplyDecorationsForId(&d, resultId);
1156                         ASSERT(d.DescriptorSet >= 0);
1157                         ASSERT(d.Binding >= 0);
1158
1159                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1160
1161                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1162                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1163                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1164                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1165                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1166                         Pointer<Byte> address = data + offset;
1167                         routine->physicalPointers[resultId] = address;
1168                         break;
1169                 }
1170                 default:
1171                         break;
1172                 }
1173         }
1174
1175         void SpirvShader::EmitLoad(InsnIterator insn, SpirvRoutine *routine) const
1176         {
1177                 ObjectID objectId = insn.word(2);
1178                 ObjectID pointerId = insn.word(3);
1179                 auto &object = getObject(objectId);
1180                 auto &objectTy = getType(object.type);
1181                 auto &pointer = getObject(pointerId);
1182                 auto &pointerBase = getObject(pointer.pointerBase);
1183                 auto &pointerBaseTy = getType(pointerBase.type);
1184
1185                 ASSERT(getType(pointer.type).element == object.type);
1186                 ASSERT(TypeID(insn.word(1)) == object.type);
1187
1188                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1189                 {
1190                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1191                 }
1192
1193                 Pointer<Float> ptrBase;
1194                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1195                 {
1196                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1197                 }
1198                 else
1199                 {
1200                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1201                 }
1202
1203                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1204
1205                 auto &dst = routine->createIntermediate(objectId, objectTy.sizeInComponents);
1206
1207                 if (pointer.kind == Object::Kind::Value)
1208                 {
1209                         // Divergent offsets.
1210                         auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
1211                         for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1212                         {
1213                                 // i wish i had a Float,Float,Float,Float constructor here..
1214                                 SIMD::Float v;
1215                                 for (int j = 0; j < SIMD::Width; j++)
1216                                 {
1217                                         Int offset = Int(i) + Extract(offsets, j);
1218                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1219                                         v = Insert(v, ptrBase[offset], j);
1220                                 }
1221                                 dst.emplace(i, v);
1222                         }
1223                 }
1224                 else if (interleavedByLane)
1225                 {
1226                         // Lane-interleaved data. No divergent offsets.
1227                         Pointer<SIMD::Float> src = ptrBase;
1228                         for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1229                         {
1230                                 dst.emplace(i, src[i]);
1231                         }
1232                 }
1233                 else
1234                 {
1235                         // Non-interleaved data. No divergent offsets.
1236                         for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1237                         {
1238                                 dst.emplace(i, RValue<SIMD::Float>(ptrBase[i]));
1239                         }
1240                 }
1241         }
1242
1243         void SpirvShader::EmitAccessChain(InsnIterator insn, SpirvRoutine *routine) const
1244         {
1245                 TypeID typeId = insn.word(1);
1246                 ObjectID objectId = insn.word(2);
1247                 ObjectID baseId = insn.word(3);
1248                 auto &object = getObject(objectId);
1249                 auto &type = getType(typeId);
1250                 ASSERT(type.sizeInComponents == 1);
1251                 ASSERT(getObject(baseId).pointerBase == object.pointerBase);
1252
1253                 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
1254                 dst.emplace(0, As<SIMD::Float>(WalkAccessChain(baseId, insn.wordCount() - 4, insn.wordPointer(4), routine)));
1255         }
1256
1257         void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const
1258         {
1259                 ObjectID pointerId = insn.word(1);
1260                 ObjectID objectId = insn.word(2);
1261                 auto &object = getObject(objectId);
1262                 auto &pointer = getObject(pointerId);
1263                 auto &pointerTy = getType(pointer.type);
1264                 auto &elementTy = getType(pointerTy.element);
1265                 auto &pointerBase = getObject(pointer.pointerBase);
1266                 auto &pointerBaseTy = getType(pointerBase.type);
1267
1268                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1269                 {
1270                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1271                 }
1272
1273                 Pointer<Float> ptrBase;
1274                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1275                 {
1276                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1277                 }
1278                 else
1279                 {
1280                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1281                 }
1282
1283                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1284
1285                 if (object.kind == Object::Kind::Constant)
1286                 {
1287                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1288
1289                         if (pointer.kind == Object::Kind::Value)
1290                         {
1291                                 // Constant source data. Divergent offsets.
1292                                 auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
1293                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1294                                 {
1295                                         for (int j = 0; j < SIMD::Width; j++)
1296                                         {
1297                                                 Int offset = Int(i) + Extract(offsets, j);
1298                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1299                                                 ptrBase[offset] = RValue<Float>(src[i]);
1300                                         }
1301                                 }
1302                         }
1303                         else
1304                         {
1305                                 // Constant source data. No divergent offsets.
1306                                 Pointer<SIMD::Float> dst = ptrBase;
1307                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1308                                 {
1309                                         dst[i] = RValue<SIMD::Float>(src[i]);
1310                                 }
1311                         }
1312                 }
1313                 else
1314                 {
1315                         auto &src = routine->getIntermediate(objectId);
1316
1317                         if (pointer.kind == Object::Kind::Value)
1318                         {
1319                                 // Intermediate source data. Divergent offsets.
1320                                 auto offsets = As<SIMD::Int>(routine->getIntermediate(pointerId)[0]);
1321                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1322                                 {
1323                                         for (int j = 0; j < SIMD::Width; j++)
1324                                         {
1325                                                 Int offset = Int(i) + Extract(offsets, j);
1326                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1327                                                 ptrBase[offset] = Extract(src[i], j);
1328                                         }
1329                                 }
1330                         }
1331                         else if (interleavedByLane)
1332                         {
1333                                 // Intermediate source data. Lane-interleaved data. No divergent offsets.
1334                                 Pointer<SIMD::Float> dst = ptrBase;
1335                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1336                                 {
1337                                         dst[i] = src[i];
1338                                 }
1339                         }
1340                         else
1341                         {
1342                                 // Intermediate source data. Non-interleaved data. No divergent offsets.
1343                                 Pointer<SIMD::Float> dst = ptrBase;
1344                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1345                                 {
1346                                         dst[i] = SIMD::Float(src[i]);
1347                                 }
1348                         }
1349                 }
1350         }
1351
1352         void SpirvShader::EmitCompositeConstruct(InsnIterator insn, SpirvRoutine *routine) const
1353         {
1354                 auto &type = getType(insn.word(1));
1355                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1356                 auto offset = 0u;
1357
1358                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1359                 {
1360                         ObjectID srcObjectId = insn.word(3u + i);
1361                         auto & srcObject = getObject(srcObjectId);
1362                         auto & srcObjectTy = getType(srcObject.type);
1363                         GenericValue srcObjectAccess(this, routine, srcObjectId);
1364
1365                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1366                                 dst.emplace(offset++, srcObjectAccess[j]);
1367                 }
1368         }
1369
1370         void SpirvShader::EmitCompositeInsert(InsnIterator insn, SpirvRoutine *routine) const
1371         {
1372                 TypeID resultTypeId = insn.word(1);
1373                 auto &type = getType(resultTypeId);
1374                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1375                 auto &newPartObject = getObject(insn.word(3));
1376                 auto &newPartObjectTy = getType(newPartObject.type);
1377                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1378
1379                 GenericValue srcObjectAccess(this, routine, insn.word(4));
1380                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1381
1382                 // old components before
1383                 for (auto i = 0u; i < firstNewComponent; i++)
1384                 {
1385                         dst.emplace(i, srcObjectAccess[i]);
1386                 }
1387                 // new part
1388                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1389                 {
1390                         dst.emplace(firstNewComponent + i, newPartObjectAccess[i]);
1391                 }
1392                 // old components after
1393                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1394                 {
1395                         dst.emplace(i, srcObjectAccess[i]);
1396                 }
1397         }
1398
1399         void SpirvShader::EmitCompositeExtract(InsnIterator insn, SpirvRoutine *routine) const
1400         {
1401                 auto &type = getType(insn.word(1));
1402                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1403                 auto &compositeObject = getObject(insn.word(3));
1404                 TypeID compositeTypeId = compositeObject.definition.word(1);
1405                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1406
1407                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1408                 for (auto i = 0u; i < type.sizeInComponents; i++)
1409                 {
1410                         dst.emplace(i, compositeObjectAccess[firstComponent + i]);
1411                 }
1412         }
1413
1414         void SpirvShader::EmitVectorShuffle(InsnIterator insn, SpirvRoutine *routine) const
1415         {
1416                 auto &type = getType(insn.word(1));
1417                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1418
1419                 // Note: number of components in result type, first half type, and second
1420                 // half type are all independent.
1421                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1422
1423                 GenericValue firstHalfAccess(this, routine, insn.word(3));
1424                 GenericValue secondHalfAccess(this, routine, insn.word(4));
1425
1426                 for (auto i = 0u; i < type.sizeInComponents; i++)
1427                 {
1428                         auto selector = insn.word(5 + i);
1429                         if (selector == static_cast<uint32_t>(-1))
1430                         {
1431                                 // Undefined value. Until we decide to do real undef values, zero is as good
1432                                 // a value as any
1433                                 dst.emplace(i, RValue<SIMD::Float>(0.0f));
1434                         }
1435                         else if (selector < firstHalfType.sizeInComponents)
1436                         {
1437                                 dst.emplace(i, firstHalfAccess[selector]);
1438                         }
1439                         else
1440                         {
1441                                 dst.emplace(i, secondHalfAccess[selector - firstHalfType.sizeInComponents]);
1442                         }
1443                 }
1444         }
1445
1446         void SpirvShader::EmitUnaryOp(InsnIterator insn, SpirvRoutine *routine) const
1447         {
1448                 auto &type = getType(insn.word(1));
1449                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1450                 auto src = GenericValue(this, routine, insn.word(3));
1451
1452                 for (auto i = 0u; i < type.sizeInComponents; i++)
1453                 {
1454                         auto val = src[i];
1455
1456                         switch (insn.opcode())
1457                         {
1458                         case spv::OpNot:
1459                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
1460                                 dst.emplace(i, As<SIMD::Float>(~As<SIMD::UInt>(val)));
1461                                 break;
1462                         case spv::OpSNegate:
1463                                 dst.emplace(i, As<SIMD::Float>(-As<SIMD::Int>(val)));
1464                                 break;
1465                         case spv::OpFNegate:
1466                                 dst.emplace(i, -val);
1467                                 break;
1468                         case spv::OpConvertFToU:
1469                                 dst.emplace(i, As<SIMD::Float>(SIMD::UInt(val)));
1470                                 break;
1471                         case spv::OpConvertFToS:
1472                                 dst.emplace(i, As<SIMD::Float>(SIMD::Int(val)));
1473                                 break;
1474                         case spv::OpConvertSToF:
1475                                 dst.emplace(i, SIMD::Float(As<SIMD::Int>(val)));
1476                                 break;
1477                         case spv::OpConvertUToF:
1478                                 dst.emplace(i, SIMD::Float(As<SIMD::UInt>(val)));
1479                                 break;
1480                         case spv::OpBitcast:
1481                                 dst.emplace(i, val);
1482                                 break;
1483                         case spv::OpIsInf:
1484                                 dst.emplace(i, As<SIMD::Float>(IsInf(val)));
1485                                 break;
1486                         case spv::OpIsNan:
1487                                 dst.emplace(i, As<SIMD::Float>(IsNan(val)));
1488                                 break;
1489                         default:
1490                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
1491                         }
1492                 }
1493         }
1494
1495         void SpirvShader::EmitBinaryOp(InsnIterator insn, SpirvRoutine *routine) const
1496         {
1497                 auto &type = getType(insn.word(1));
1498                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1499                 auto &lhsType = getType(getObject(insn.word(3)).type);
1500                 auto srcLHS = GenericValue(this, routine, insn.word(3));
1501                 auto srcRHS = GenericValue(this, routine, insn.word(4));
1502
1503                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
1504                 {
1505                         auto lhs = srcLHS[i];
1506                         auto rhs = srcRHS[i];
1507
1508                         switch (insn.opcode())
1509                         {
1510                         case spv::OpIAdd:
1511                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) + As<SIMD::Int>(rhs)));
1512                                 break;
1513                         case spv::OpISub:
1514                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) - As<SIMD::Int>(rhs)));
1515                                 break;
1516                         case spv::OpIMul:
1517                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) * As<SIMD::Int>(rhs)));
1518                                 break;
1519                         case spv::OpSDiv:
1520                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) / As<SIMD::Int>(rhs)));
1521                                 break;
1522                         case spv::OpUDiv:
1523                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) / As<SIMD::UInt>(rhs)));
1524                                 break;
1525                         case spv::OpUMod:
1526                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) % As<SIMD::UInt>(rhs)));
1527                                 break;
1528                         case spv::OpIEqual:
1529                         case spv::OpLogicalEqual:
1530                                 dst.emplace(i, As<SIMD::Float>(CmpEQ(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1531                                 break;
1532                         case spv::OpINotEqual:
1533                         case spv::OpLogicalNotEqual:
1534                                 dst.emplace(i, As<SIMD::Float>(CmpNEQ(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1535                                 break;
1536                         case spv::OpUGreaterThan:
1537                                 dst.emplace(i, As<SIMD::Float>(CmpGT(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1538                                 break;
1539                         case spv::OpSGreaterThan:
1540                                 dst.emplace(i, As<SIMD::Float>(CmpGT(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1541                                 break;
1542                         case spv::OpUGreaterThanEqual:
1543                                 dst.emplace(i, As<SIMD::Float>(CmpGE(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1544                                 break;
1545                         case spv::OpSGreaterThanEqual:
1546                                 dst.emplace(i, As<SIMD::Float>(CmpGE(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1547                                 break;
1548                         case spv::OpULessThan:
1549                                 dst.emplace(i, As<SIMD::Float>(CmpLT(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1550                                 break;
1551                         case spv::OpSLessThan:
1552                                 dst.emplace(i, As<SIMD::Float>(CmpLT(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1553                                 break;
1554                         case spv::OpULessThanEqual:
1555                                 dst.emplace(i, As<SIMD::Float>(CmpLE(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1556                                 break;
1557                         case spv::OpSLessThanEqual:
1558                                 dst.emplace(i, As<SIMD::Float>(CmpLE(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1559                                 break;
1560                         case spv::OpFAdd:
1561                                 dst.emplace(i, lhs + rhs);
1562                                 break;
1563                         case spv::OpFSub:
1564                                 dst.emplace(i, lhs - rhs);
1565                                 break;
1566                         case spv::OpFMul:
1567                                 dst.emplace(i, lhs * rhs);
1568                                 break;
1569                         case spv::OpFDiv:
1570                                 dst.emplace(i, lhs / rhs);
1571                                 break;
1572                         case spv::OpFOrdEqual:
1573                                 dst.emplace(i, As<SIMD::Float>(CmpEQ(lhs, rhs)));
1574                                 break;
1575                         case spv::OpFUnordEqual:
1576                                 dst.emplace(i, As<SIMD::Float>(CmpUEQ(lhs, rhs)));
1577                                 break;
1578                         case spv::OpFOrdNotEqual:
1579                                 dst.emplace(i, As<SIMD::Float>(CmpNEQ(lhs, rhs)));
1580                                 break;
1581                         case spv::OpFUnordNotEqual:
1582                                 dst.emplace(i, As<SIMD::Float>(CmpUNEQ(lhs, rhs)));
1583                                 break;
1584                         case spv::OpFOrdLessThan:
1585                                 dst.emplace(i, As<SIMD::Float>(CmpLT(lhs, rhs)));
1586                                 break;
1587                         case spv::OpFUnordLessThan:
1588                                 dst.emplace(i, As<SIMD::Float>(CmpULT(lhs, rhs)));
1589                                 break;
1590                         case spv::OpFOrdGreaterThan:
1591                                 dst.emplace(i, As<SIMD::Float>(CmpGT(lhs, rhs)));
1592                                 break;
1593                         case spv::OpFUnordGreaterThan:
1594                                 dst.emplace(i, As<SIMD::Float>(CmpUGT(lhs, rhs)));
1595                                 break;
1596                         case spv::OpFOrdLessThanEqual:
1597                                 dst.emplace(i, As<SIMD::Float>(CmpLE(lhs, rhs)));
1598                                 break;
1599                         case spv::OpFUnordLessThanEqual:
1600                                 dst.emplace(i, As<SIMD::Float>(CmpULE(lhs, rhs)));
1601                                 break;
1602                         case spv::OpFOrdGreaterThanEqual:
1603                                 dst.emplace(i, As<SIMD::Float>(CmpGE(lhs, rhs)));
1604                                 break;
1605                         case spv::OpFUnordGreaterThanEqual:
1606                                 dst.emplace(i, As<SIMD::Float>(CmpUGE(lhs, rhs)));
1607                                 break;
1608                         case spv::OpShiftRightLogical:
1609                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) >> As<SIMD::UInt>(rhs)));
1610                                 break;
1611                         case spv::OpShiftRightArithmetic:
1612                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) >> As<SIMD::Int>(rhs)));
1613                                 break;
1614                         case spv::OpShiftLeftLogical:
1615                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) << As<SIMD::UInt>(rhs)));
1616                                 break;
1617                         case spv::OpBitwiseOr:
1618                         case spv::OpLogicalOr:
1619                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) | As<SIMD::UInt>(rhs)));
1620                                 break;
1621                         case spv::OpBitwiseXor:
1622                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) ^ As<SIMD::UInt>(rhs)));
1623                                 break;
1624                         case spv::OpBitwiseAnd:
1625                         case spv::OpLogicalAnd:
1626                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) & As<SIMD::UInt>(rhs)));
1627                                 break;
1628                         case spv::OpSMulExtended:
1629                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
1630                                 // In our flat view then, component i is the i'th component of the first member;
1631                                 // component i + N is the i'th component of the second member.
1632                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::Int>(lhs) * As<SIMD::Int>(rhs)));
1633                                 dst.emplace(i + lhsType.sizeInComponents, As<SIMD::Float>(MulHigh(As<SIMD::Int>(lhs), As<SIMD::Int>(rhs))));
1634                                 break;
1635                         case spv::OpUMulExtended:
1636                                 dst.emplace(i, As<SIMD::Float>(As<SIMD::UInt>(lhs) * As<SIMD::UInt>(rhs)));
1637                                 dst.emplace(i + lhsType.sizeInComponents, As<SIMD::Float>(MulHigh(As<SIMD::UInt>(lhs), As<SIMD::UInt>(rhs))));
1638                                 break;
1639                         default:
1640                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
1641                         }
1642                 }
1643         }
1644
1645         void SpirvShader::EmitDot(InsnIterator insn, SpirvRoutine *routine) const
1646         {
1647                 auto &type = getType(insn.word(1));
1648                 assert(type.sizeInComponents == 1);
1649                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1650                 auto &lhsType = getType(getObject(insn.word(3)).type);
1651                 auto srcLHS = GenericValue(this, routine, insn.word(3));
1652                 auto srcRHS = GenericValue(this, routine, insn.word(4));
1653
1654                 SIMD::Float result = srcLHS[0] * srcRHS[0];
1655
1656                 for (auto i = 1u; i < lhsType.sizeInComponents; i++)
1657                 {
1658                         result += srcLHS[i] * srcRHS[i];
1659                 }
1660
1661                 dst.emplace(0, result);
1662         }
1663
1664         void SpirvShader::EmitSelect(InsnIterator insn, SpirvRoutine *routine) const
1665         {
1666                 auto &type = getType(insn.word(1));
1667                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1668                 auto srcCond = GenericValue(this, routine, insn.word(3));
1669                 auto srcLHS = GenericValue(this, routine, insn.word(4));
1670                 auto srcRHS = GenericValue(this, routine, insn.word(5));
1671
1672                 for (auto i = 0u; i < type.sizeInComponents; i++)
1673                 {
1674                         auto cond = As<SIMD::Int>(srcCond[i]);
1675                         auto lhs = srcLHS[i];
1676                         auto rhs = srcRHS[i];
1677                         auto out = (cond & As<Int4>(lhs)) | (~cond & As<Int4>(rhs));   // FIXME: IfThenElse()
1678                         dst.emplace(i, As<SIMD::Float>(out));
1679                 }
1680         }
1681
1682         void SpirvShader::EmitExtendedInstruction(InsnIterator insn, SpirvRoutine *routine) const
1683         {
1684                 auto &type = getType(insn.word(1));
1685                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1686                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
1687
1688                 switch (extInstIndex)
1689                 {
1690                 case GLSLstd450FAbs:
1691                 {
1692                         auto src = GenericValue(this, routine, insn.word(5));
1693                         for (auto i = 0u; i < type.sizeInComponents; i++)
1694                         {
1695                                 dst.emplace(i, Abs(src[i]));
1696                         }
1697                         break;
1698                 }
1699                 case GLSLstd450SAbs:
1700                 {
1701                         auto src = GenericValue(this, routine, insn.word(5));
1702                         for (auto i = 0u; i < type.sizeInComponents; i++)
1703                         {
1704                                 dst.emplace(i, As<SIMD::Float>(Abs(As<SIMD::Int>(src[i]))));
1705                         }
1706                         break;
1707                 }
1708                 case GLSLstd450Cross:
1709                 {
1710                         auto lhs = GenericValue(this, routine, insn.word(5));
1711                         auto rhs = GenericValue(this, routine, insn.word(6));
1712                         dst.emplace(0, lhs[1] * rhs[2] - rhs[1] * lhs[2]);
1713                         dst.emplace(1, lhs[2] * rhs[0] - rhs[2] * lhs[0]);
1714                         dst.emplace(2, lhs[0] * rhs[1] - rhs[0] * lhs[1]);
1715                         break;
1716                 }
1717                 case GLSLstd450Floor:
1718                 {
1719                         auto src = GenericValue(this, routine, insn.word(5));
1720                         for (auto i = 0u; i < type.sizeInComponents; i++)
1721                         {
1722                                 dst.emplace(i, Floor(src[i]));
1723                         }
1724                         break;
1725                 }
1726                 case GLSLstd450Trunc:
1727                 {
1728                         auto src = GenericValue(this, routine, insn.word(5));
1729                         for (auto i = 0u; i < type.sizeInComponents; i++)
1730                         {
1731                                 dst.emplace(i, Trunc(src[i]));
1732                         }
1733                         break;
1734                 }
1735                 case GLSLstd450Ceil:
1736                 {
1737                         auto src = GenericValue(this, routine, insn.word(5));
1738                         for (auto i = 0u; i < type.sizeInComponents; i++)
1739                         {
1740                                 dst.emplace(i, Ceil(src[i]));
1741                         }
1742                         break;
1743                 }
1744                 case GLSLstd450Fract:
1745                 {
1746                         auto src = GenericValue(this, routine, insn.word(5));
1747                         for (auto i = 0u; i < type.sizeInComponents; i++)
1748                         {
1749                                 dst.emplace(i, Frac(src[i]));
1750                         }
1751                         break;
1752                 }
1753                 case GLSLstd450Round:
1754                 {
1755                         auto src = GenericValue(this, routine, insn.word(5));
1756                         for (auto i = 0u; i < type.sizeInComponents; i++)
1757                         {
1758                                 dst.emplace(i, Round(src[i]));
1759                         }
1760                         break;
1761                 }
1762                 case GLSLstd450RoundEven:
1763                 {
1764                         auto src = GenericValue(this, routine, insn.word(5));
1765                         for (auto i = 0u; i < type.sizeInComponents; i++)
1766                         {
1767                                 auto x = Round(src[i]);
1768                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
1769                                 dst.emplace(i, x + ((SIMD::Float(CmpLT(x, src[i]) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
1770                                                 SIMD::Float(CmpEQ(Frac(src[i]), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
1771                         }
1772                         break;
1773                 }
1774                 case GLSLstd450FMin:
1775                 {
1776                         auto lhs = GenericValue(this, routine, insn.word(5));
1777                         auto rhs = GenericValue(this, routine, insn.word(6));
1778                         for (auto i = 0u; i < type.sizeInComponents; i++)
1779                         {
1780                                 dst.emplace(i, Min(lhs[i], rhs[i]));
1781                         }
1782                         break;
1783                 }
1784                 case GLSLstd450FMax:
1785                 {
1786                         auto lhs = GenericValue(this, routine, insn.word(5));
1787                         auto rhs = GenericValue(this, routine, insn.word(6));
1788                         for (auto i = 0u; i < type.sizeInComponents; i++)
1789                         {
1790                                 dst.emplace(i, Max(lhs[i], rhs[i]));
1791                         }
1792                         break;
1793                 }
1794                 case GLSLstd450SMin:
1795                 {
1796                         auto lhs = GenericValue(this, routine, insn.word(5));
1797                         auto rhs = GenericValue(this, routine, insn.word(6));
1798                         for (auto i = 0u; i < type.sizeInComponents; i++)
1799                         {
1800                                 dst.emplace(i, As<SIMD::Float>(Min(As<SIMD::Int>(lhs[i]), As<SIMD::Int>(rhs[i]))));
1801                         }
1802                         break;
1803                 }
1804                 case GLSLstd450SMax:
1805                 {
1806                         auto lhs = GenericValue(this, routine, insn.word(5));
1807                         auto rhs = GenericValue(this, routine, insn.word(6));
1808                         for (auto i = 0u; i < type.sizeInComponents; i++)
1809                         {
1810                                 dst.emplace(i, As<SIMD::Float>(Max(As<SIMD::Int>(lhs[i]), As<SIMD::Int>(rhs[i]))));
1811                         }
1812                         break;
1813                 }
1814                 case GLSLstd450UMin:
1815                 {
1816                         auto lhs = GenericValue(this, routine, insn.word(5));
1817                         auto rhs = GenericValue(this, routine, insn.word(6));
1818                         for (auto i = 0u; i < type.sizeInComponents; i++)
1819                         {
1820                                 dst.emplace(i, As<SIMD::Float>(Min(As<SIMD::UInt>(lhs[i]), As<SIMD::UInt>(rhs[i]))));
1821                         }
1822                         break;
1823                 }
1824                 case GLSLstd450UMax:
1825                 {
1826                         auto lhs = GenericValue(this, routine, insn.word(5));
1827                         auto rhs = GenericValue(this, routine, insn.word(6));
1828                         for (auto i = 0u; i < type.sizeInComponents; i++)
1829                         {
1830                                 dst.emplace(i, As<SIMD::Float>(Max(As<SIMD::UInt>(lhs[i]), As<SIMD::UInt>(rhs[i]))));
1831                         }
1832                         break;
1833                 }
1834                 case GLSLstd450Step:
1835                 {
1836                         auto edge = GenericValue(this, routine, insn.word(5));
1837                         auto x = GenericValue(this, routine, insn.word(6));
1838                         for (auto i = 0u; i < type.sizeInComponents; i++)
1839                         {
1840                                 dst.emplace(i, As<SIMD::Float>(CmpNLT(x[i], edge[i]) & As<SIMD::Int>(SIMD::Float(1.0f))));
1841                         }
1842                         break;
1843                 }
1844                 case GLSLstd450SmoothStep:
1845                 {
1846                         auto edge0 = GenericValue(this, routine, insn.word(5));
1847                         auto edge1 = GenericValue(this, routine, insn.word(6));
1848                         auto x = GenericValue(this, routine, insn.word(7));
1849                         for (auto i = 0u; i < type.sizeInComponents; i++)
1850                         {
1851                                 auto tx = Min(Max((x[i] - edge0[i]) / (edge1[i] - edge0[i]), SIMD::Float(0.0f)), SIMD::Float(1.0f));
1852                                 dst.emplace(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
1853                         }
1854                         break;
1855                 }
1856                 case GLSLstd450FMix:
1857                 {
1858                         auto x = GenericValue(this, routine, insn.word(5));
1859                         auto y = GenericValue(this, routine, insn.word(6));
1860                         auto a = GenericValue(this, routine, insn.word(7));
1861                         for (auto i = 0u; i < type.sizeInComponents; i++)
1862                         {
1863                                 dst.emplace(i, a[i] * (y[i] - x[i]) + x[i]);
1864                         }
1865                         break;
1866                 }
1867                 case GLSLstd450FClamp:
1868                 {
1869                         auto x = GenericValue(this, routine, insn.word(5));
1870                         auto minVal = GenericValue(this, routine, insn.word(6));
1871                         auto maxVal = GenericValue(this, routine, insn.word(7));
1872                         for (auto i = 0u; i < type.sizeInComponents; i++)
1873                         {
1874                                 dst.emplace(i, Min(Max(x[i], minVal[i]), maxVal[i]));
1875                         }
1876                         break;
1877                 }
1878                 case GLSLstd450SClamp:
1879                 {
1880                         auto x = GenericValue(this, routine, insn.word(5));
1881                         auto minVal = GenericValue(this, routine, insn.word(6));
1882                         auto maxVal = GenericValue(this, routine, insn.word(7));
1883                         for (auto i = 0u; i < type.sizeInComponents; i++)
1884                         {
1885                                 dst.emplace(i, As<SIMD::Float>(Min(Max(As<SIMD::Int>(x[i]), As<SIMD::Int>(minVal[i])), As<SIMD::Int>(maxVal[i]))));
1886                         }
1887                         break;
1888                 }
1889                 case GLSLstd450UClamp:
1890                 {
1891                         auto x = GenericValue(this, routine, insn.word(5));
1892                         auto minVal = GenericValue(this, routine, insn.word(6));
1893                         auto maxVal = GenericValue(this, routine, insn.word(7));
1894                         for (auto i = 0u; i < type.sizeInComponents; i++)
1895                         {
1896                                 dst.emplace(i, As<SIMD::Float>(Min(Max(As<SIMD::UInt>(x[i]), As<SIMD::UInt>(minVal[i])), As<SIMD::UInt>(maxVal[i]))));
1897                         }
1898                         break;
1899                 }
1900                 case GLSLstd450FSign:
1901                 {
1902                         auto src = GenericValue(this, routine, insn.word(5));
1903                         for (auto i = 0u; i < type.sizeInComponents; i++)
1904                         {
1905                                 auto neg = As<SIMD::Int>(CmpLT(src[i], SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
1906                                 auto pos = As<SIMD::Int>(CmpNLE(src[i], SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
1907                                 dst.emplace(i, As<SIMD::Float>(neg | pos));
1908                         }
1909                         break;
1910                 }
1911                 case GLSLstd450SSign:
1912                 {
1913                         auto src = GenericValue(this, routine, insn.word(5));
1914                         for (auto i = 0u; i < type.sizeInComponents; i++)
1915                         {
1916                                 auto neg = CmpLT(As<SIMD::Int>(src[i]), SIMD::Int(0)) & SIMD::Int(-1);
1917                                 auto pos = CmpNLE(As<SIMD::Int>(src[i]), SIMD::Int(0)) & SIMD::Int(1);
1918                                 dst.emplace(i, As<SIMD::Float>(neg | pos));
1919                         }
1920                         break;
1921                 }
1922                 default:
1923                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
1924                 }
1925         }
1926
1927         void SpirvShader::EmitAny(InsnIterator insn, SpirvRoutine *routine) const
1928         {
1929                 auto &type = getType(insn.word(1));
1930                 assert(type.sizeInComponents == 1);
1931                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1932                 auto &srcType = getType(getObject(insn.word(3)).type);
1933                 auto src = GenericValue(this, routine, insn.word(3));
1934
1935                 SIMD::UInt result = As<SIMD::UInt>(src[0]);
1936
1937                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
1938                 {
1939                         result |= As<SIMD::UInt>(src[i]);
1940                 }
1941
1942                 dst.emplace(0, As<SIMD::Float>(result));
1943         }
1944
1945         void SpirvShader::EmitAll(InsnIterator insn, SpirvRoutine *routine) const
1946         {
1947                 auto &type = getType(insn.word(1));
1948                 assert(type.sizeInComponents == 1);
1949                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1950                 auto &srcType = getType(getObject(insn.word(3)).type);
1951                 auto src = GenericValue(this, routine, insn.word(3));
1952
1953                 SIMD::UInt result = As<SIMD::UInt>(src[0]);
1954
1955                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
1956                 {
1957                         result &= As<SIMD::UInt>(src[i]);
1958                 }
1959
1960                 dst.emplace(0, As<SIMD::Float>(result));
1961         }
1962
1963         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
1964         {
1965                 for (auto insn : *this)
1966                 {
1967                         switch (insn.opcode())
1968                         {
1969                         case spv::OpVariable:
1970                         {
1971                                 ObjectID resultId = insn.word(2);
1972                                 auto &object = getObject(resultId);
1973                                 auto &objectTy = getType(object.type);
1974                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
1975                                 {
1976                                         auto &dst = routine->getValue(resultId);
1977                                         int offset = 0;
1978                                         VisitInterface(resultId,
1979                                                                    [&](Decorations const &d, AttribType type) {
1980                                                                            auto scalarSlot = d.Location << 2 | d.Component;
1981                                                                            routine->outputs[scalarSlot] = dst[offset++];
1982                                                                    });
1983                                 }
1984                                 break;
1985                         }
1986                         default:
1987                                 break;
1988                         }
1989                 }
1990         }
1991
1992         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
1993                 pipelineLayout(pipelineLayout)
1994         {
1995         }
1996
1997 }