OSDN Git Service

Refactor variable type lookup
[android-x86/external-swiftshader.git] / src / Pipeline / SpirvShader.cpp
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <spirv/unified1/spirv.hpp>
16 #include <spirv/unified1/GLSL.std.450.h>
17 #include "SpirvShader.hpp"
18 #include "System/Math.hpp"
19 #include "Vulkan/VkBuffer.hpp"
20 #include "Vulkan/VkDebug.hpp"
21 #include "Vulkan/VkPipelineLayout.hpp"
22 #include "Device/Config.hpp"
23
24 namespace sw
25 {
26         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
27
28         SpirvShader::SpirvShader(InsnStore const &insns)
29                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
30                           outputs{MAX_INTERFACE_COMPONENTS},
31                           serialID{serialCounter++}, modes{}
32         {
33                 ASSERT(insns.size() > 0);
34
35                 // Simplifying assumptions (to be satisfied by earlier transformations)
36                 // - There is exactly one entrypoint in the module, and it's the one we want
37                 // - The only input/output OpVariables present are those used by the entrypoint
38
39                 Block::ID currentBlock;
40                 InsnIterator blockStart;
41
42                 for (auto insn : *this)
43                 {
44                         switch (insn.opcode())
45                         {
46                         case spv::OpExecutionMode:
47                                 ProcessExecutionMode(insn);
48                                 break;
49
50                         case spv::OpDecorate:
51                         {
52                                 TypeOrObjectID targetId = insn.word(1);
53                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
54                                 decorations[targetId].Apply(
55                                                 decoration,
56                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
57
58                                 if (decoration == spv::DecorationCentroid)
59                                         modes.NeedsCentroid = true;
60                                 break;
61                         }
62
63                         case spv::OpMemberDecorate:
64                         {
65                                 Type::ID targetId = insn.word(1);
66                                 auto memberIndex = insn.word(2);
67                                 auto &d = memberDecorations[targetId];
68                                 if (memberIndex >= d.size())
69                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
70                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
71                                 d[memberIndex].Apply(
72                                                 decoration,
73                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
74
75                                 if (decoration == spv::DecorationCentroid)
76                                         modes.NeedsCentroid = true;
77                                 break;
78                         }
79
80                         case spv::OpDecorationGroup:
81                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
82                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
83                                 // we might think about introducing this as a real Object.
84                                 break;
85
86                         case spv::OpGroupDecorate:
87                         {
88                                 auto const &srcDecorations = decorations[insn.word(1)];
89                                 for (auto i = 2u; i < insn.wordCount(); i++)
90                                 {
91                                         // remaining operands are targets to apply the group to.
92                                         decorations[insn.word(i)].Apply(srcDecorations);
93                                 }
94                                 break;
95                         }
96
97                         case spv::OpGroupMemberDecorate:
98                         {
99                                 auto const &srcDecorations = decorations[insn.word(1)];
100                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
101                                 {
102                                         // remaining operands are pairs of <id>, literal for members to apply to.
103                                         auto &d = memberDecorations[insn.word(i)];
104                                         auto memberIndex = insn.word(i + 1);
105                                         if (memberIndex >= d.size())
106                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
107                                         d[memberIndex].Apply(srcDecorations);
108                                 }
109                                 break;
110                         }
111
112                         case spv::OpLabel:
113                         {
114                                 ASSERT(currentBlock.value() == 0);
115                                 currentBlock = Block::ID(insn.word(1));
116                                 blockStart = insn;
117                                 break;
118                         }
119
120                         // Branch Instructions (subset of Termination Instructions):
121                         case spv::OpBranch:
122                         case spv::OpBranchConditional:
123                         case spv::OpSwitch:
124                         case spv::OpReturn:
125                         // fallthrough
126
127                         // Termination instruction:
128                         case spv::OpKill:
129                         case spv::OpUnreachable:
130                         {
131                                 ASSERT(currentBlock.value() != 0);
132                                 auto blockEnd = insn; blockEnd++;
133                                 blocks[currentBlock] = Block(blockStart, blockEnd);
134                                 currentBlock = Block::ID(0);
135
136                                 if (insn.opcode() == spv::OpKill)
137                                 {
138                                         modes.ContainsKill = true;
139                                 }
140                                 break;
141                         }
142
143                         case spv::OpTypeVoid:
144                         case spv::OpTypeBool:
145                         case spv::OpTypeInt:
146                         case spv::OpTypeFloat:
147                         case spv::OpTypeVector:
148                         case spv::OpTypeMatrix:
149                         case spv::OpTypeImage:
150                         case spv::OpTypeSampler:
151                         case spv::OpTypeSampledImage:
152                         case spv::OpTypeArray:
153                         case spv::OpTypeRuntimeArray:
154                         case spv::OpTypeStruct:
155                         case spv::OpTypePointer:
156                         case spv::OpTypeFunction:
157                                 DeclareType(insn);
158                                 break;
159
160                         case spv::OpVariable:
161                         {
162                                 Type::ID typeId = insn.word(1);
163                                 Object::ID resultId = insn.word(2);
164                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
165                                 if (insn.wordCount() > 4)
166                                         UNIMPLEMENTED("Variable initializers not yet supported");
167
168                                 auto &object = defs[resultId];
169                                 object.kind = Object::Kind::Variable;
170                                 object.definition = insn;
171                                 object.type = typeId;
172                                 object.pointerBase = insn.word(2);      // base is itself
173
174                                 ASSERT(getType(typeId).storageClass == storageClass);
175
176                                 switch (storageClass)
177                                 {
178                                 case spv::StorageClassInput:
179                                 case spv::StorageClassOutput:
180                                         ProcessInterfaceVariable(object);
181                                         break;
182                                 case spv::StorageClassUniform:
183                                 case spv::StorageClassStorageBuffer:
184                                 case spv::StorageClassPushConstant:
185                                         object.kind = Object::Kind::PhysicalPointer;
186                                         break;
187
188                                 case spv::StorageClassPrivate:
189                                 case spv::StorageClassFunction:
190                                         break; // Correctly handled.
191
192                                 case spv::StorageClassUniformConstant:
193                                 case spv::StorageClassWorkgroup:
194                                 case spv::StorageClassCrossWorkgroup:
195                                 case spv::StorageClassGeneric:
196                                 case spv::StorageClassAtomicCounter:
197                                 case spv::StorageClassImage:
198                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
199                                         break;
200
201                                 default:
202                                         UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
203                                         break;
204                                 }
205                                 break;
206                         }
207
208                         case spv::OpConstant:
209                                 CreateConstant(insn).constantValue[0] = insn.word(3);
210                                 break;
211                         case spv::OpConstantFalse:
212                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
213                                 break;
214                         case spv::OpConstantTrue:
215                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
216                                 break;
217                         case spv::OpConstantNull:
218                         case spv::OpUndef:
219                         {
220                                 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
221                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
222                                 auto &object = CreateConstant(insn);
223                                 auto &objectTy = getType(object.type);
224                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
225                                 {
226                                         object.constantValue[i] = 0;
227                                 }
228                                 break;
229                         }
230                         case spv::OpConstantComposite:
231                         {
232                                 auto &object = CreateConstant(insn);
233                                 auto offset = 0u;
234                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
235                                 {
236                                         auto &constituent = getObject(insn.word(i + 3));
237                                         auto &constituentTy = getType(constituent.type);
238                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
239                                                 object.constantValue[offset++] = constituent.constantValue[j];
240                                 }
241
242                                 auto objectId = Object::ID(insn.word(2));
243                                 auto decorationsIt = decorations.find(objectId);
244                                 if (decorationsIt != decorations.end() &&
245                                         decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
246                                 {
247                                         // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
248                                         // Decorating an object with the WorkgroupSize built-in
249                                         // decoration will make that object contain the dimensions
250                                         // of a local workgroup. If an object is decorated with the
251                                         // WorkgroupSize decoration, this must take precedence over
252                                         // any execution mode set for LocalSize.
253                                         // The object decorated with WorkgroupSize must be declared
254                                         // as a three-component vector of 32-bit integers.
255                                         ASSERT(getType(object.type).sizeInComponents == 3);
256                                         modes.WorkgroupSizeX = object.constantValue[0];
257                                         modes.WorkgroupSizeY = object.constantValue[1];
258                                         modes.WorkgroupSizeZ = object.constantValue[2];
259                                 }
260                                 break;
261                         }
262
263                         case spv::OpCapability:
264                                 break; // Various capabilities will be declared, but none affect our code generation at this point.
265                         case spv::OpMemoryModel:
266                                 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
267
268                         case spv::OpEntryPoint:
269                                 break;
270                         case spv::OpFunction:
271                                 ASSERT(mainBlockId.value() == 0); // Multiple functions found
272                                 // Scan forward to find the function's label.
273                                 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
274                                 {
275                                         switch (it.opcode())
276                                         {
277                                         case spv::OpFunction:
278                                         case spv::OpFunctionParameter:
279                                                 break;
280                                         case spv::OpLabel:
281                                                 mainBlockId = Block::ID(it.word(1));
282                                                 break;
283                                         default:
284                                                 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
285                                         }
286                                 }
287                                 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
288                                 break;
289                         case spv::OpFunctionEnd:
290                                 // Due to preprocessing, the entrypoint and its function provide no value.
291                                 break;
292                         case spv::OpExtInstImport:
293                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
294                                 // Valid shaders will not attempt to import any other instruction sets.
295                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
296                                 {
297                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
298                                 }
299                                 break;
300                         case spv::OpName:
301                         case spv::OpMemberName:
302                         case spv::OpSource:
303                         case spv::OpSourceContinued:
304                         case spv::OpSourceExtension:
305                         case spv::OpLine:
306                         case spv::OpNoLine:
307                         case spv::OpModuleProcessed:
308                         case spv::OpString:
309                                 // No semantic impact
310                                 break;
311
312                         case spv::OpFunctionParameter:
313                         case spv::OpFunctionCall:
314                         case spv::OpSpecConstant:
315                         case spv::OpSpecConstantComposite:
316                         case spv::OpSpecConstantFalse:
317                         case spv::OpSpecConstantOp:
318                         case spv::OpSpecConstantTrue:
319                                 // These should have all been removed by preprocessing passes. If we see them here,
320                                 // our assumptions are wrong and we will probably generate wrong code.
321                                 UNIMPLEMENTED("These instructions should have already been lowered.");
322                                 break;
323
324                         case spv::OpFConvert:
325                         case spv::OpSConvert:
326                         case spv::OpUConvert:
327                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
328                                 break;
329
330                         case spv::OpLoad:
331                         case spv::OpAccessChain:
332                         case spv::OpInBoundsAccessChain:
333                         case spv::OpCompositeConstruct:
334                         case spv::OpCompositeInsert:
335                         case spv::OpCompositeExtract:
336                         case spv::OpVectorShuffle:
337                         case spv::OpVectorTimesScalar:
338                         case spv::OpVectorExtractDynamic:
339                         case spv::OpVectorInsertDynamic:
340                         case spv::OpNot: // Unary ops
341                         case spv::OpSNegate:
342                         case spv::OpFNegate:
343                         case spv::OpLogicalNot:
344                         case spv::OpIAdd: // Binary ops
345                         case spv::OpISub:
346                         case spv::OpIMul:
347                         case spv::OpSDiv:
348                         case spv::OpUDiv:
349                         case spv::OpFAdd:
350                         case spv::OpFSub:
351                         case spv::OpFMul:
352                         case spv::OpFDiv:
353                         case spv::OpFMod:
354                         case spv::OpFRem:
355                         case spv::OpFOrdEqual:
356                         case spv::OpFUnordEqual:
357                         case spv::OpFOrdNotEqual:
358                         case spv::OpFUnordNotEqual:
359                         case spv::OpFOrdLessThan:
360                         case spv::OpFUnordLessThan:
361                         case spv::OpFOrdGreaterThan:
362                         case spv::OpFUnordGreaterThan:
363                         case spv::OpFOrdLessThanEqual:
364                         case spv::OpFUnordLessThanEqual:
365                         case spv::OpFOrdGreaterThanEqual:
366                         case spv::OpFUnordGreaterThanEqual:
367                         case spv::OpSMod:
368                         case spv::OpSRem:
369                         case spv::OpUMod:
370                         case spv::OpIEqual:
371                         case spv::OpINotEqual:
372                         case spv::OpUGreaterThan:
373                         case spv::OpSGreaterThan:
374                         case spv::OpUGreaterThanEqual:
375                         case spv::OpSGreaterThanEqual:
376                         case spv::OpULessThan:
377                         case spv::OpSLessThan:
378                         case spv::OpULessThanEqual:
379                         case spv::OpSLessThanEqual:
380                         case spv::OpShiftRightLogical:
381                         case spv::OpShiftRightArithmetic:
382                         case spv::OpShiftLeftLogical:
383                         case spv::OpBitwiseOr:
384                         case spv::OpBitwiseXor:
385                         case spv::OpBitwiseAnd:
386                         case spv::OpLogicalOr:
387                         case spv::OpLogicalAnd:
388                         case spv::OpLogicalEqual:
389                         case spv::OpLogicalNotEqual:
390                         case spv::OpUMulExtended:
391                         case spv::OpSMulExtended:
392                         case spv::OpDot:
393                         case spv::OpConvertFToU:
394                         case spv::OpConvertFToS:
395                         case spv::OpConvertSToF:
396                         case spv::OpConvertUToF:
397                         case spv::OpBitcast:
398                         case spv::OpSelect:
399                         case spv::OpExtInst:
400                         case spv::OpIsInf:
401                         case spv::OpIsNan:
402                         case spv::OpAny:
403                         case spv::OpAll:
404                         case spv::OpDPdx:
405                         case spv::OpDPdxCoarse:
406                         case spv::OpDPdy:
407                         case spv::OpDPdyCoarse:
408                         case spv::OpFwidth:
409                         case spv::OpFwidthCoarse:
410                         case spv::OpDPdxFine:
411                         case spv::OpDPdyFine:
412                         case spv::OpFwidthFine:
413                                 // Instructions that yield an intermediate value
414                         {
415                                 Type::ID typeId = insn.word(1);
416                                 Object::ID resultId = insn.word(2);
417                                 auto &object = defs[resultId];
418                                 object.type = typeId;
419                                 object.kind = Object::Kind::Value;
420                                 object.definition = insn;
421
422                                 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
423                                 {
424                                         // interior ptr has two parts:
425                                         // - logical base ptr, common across all lanes and known at compile time
426                                         // - per-lane offset
427                                         Object::ID baseId = insn.word(3);
428                                         object.pointerBase = getObject(baseId).pointerBase;
429                                 }
430                                 break;
431                         }
432
433                         case spv::OpStore:
434                                 // Don't need to do anything during analysis pass
435                                 break;
436
437                         default:
438                                 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
439                         }
440                 }
441         }
442
443         void SpirvShader::DeclareType(InsnIterator insn)
444         {
445                 Type::ID resultId = insn.word(1);
446
447                 auto &type = types[resultId];
448                 type.definition = insn;
449                 type.sizeInComponents = ComputeTypeSize(insn);
450
451                 // A structure is a builtin block if it has a builtin
452                 // member. All members of such a structure are builtins.
453                 switch (insn.opcode())
454                 {
455                 case spv::OpTypeStruct:
456                 {
457                         auto d = memberDecorations.find(resultId);
458                         if (d != memberDecorations.end())
459                         {
460                                 for (auto &m : d->second)
461                                 {
462                                         if (m.HasBuiltIn)
463                                         {
464                                                 type.isBuiltInBlock = true;
465                                                 break;
466                                         }
467                                 }
468                         }
469                         break;
470                 }
471                 case spv::OpTypePointer:
472                 {
473                         Type::ID elementTypeId = insn.word(3);
474                         type.element = elementTypeId;
475                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
476                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
477                         break;
478                 }
479                 case spv::OpTypeVector:
480                 case spv::OpTypeMatrix:
481                 case spv::OpTypeArray:
482                 case spv::OpTypeRuntimeArray:
483                 {
484                         Type::ID elementTypeId = insn.word(2);
485                         type.element = elementTypeId;
486                         break;
487                 }
488                 default:
489                         break;
490                 }
491         }
492
493         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
494         {
495                 Type::ID typeId = insn.word(1);
496                 Object::ID resultId = insn.word(2);
497                 auto &object = defs[resultId];
498                 auto &objectTy = getType(typeId);
499                 object.type = typeId;
500                 object.kind = Object::Kind::Constant;
501                 object.definition = insn;
502                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
503                 return object;
504         }
505
506         void SpirvShader::ProcessInterfaceVariable(Object &object)
507         {
508                 auto &objectTy = getType(object.type);
509                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
510
511                 ASSERT(objectTy.opcode() == spv::OpTypePointer);
512                 auto pointeeTy = getType(objectTy.element);
513
514                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
515                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
516
517                 ASSERT(object.opcode() == spv::OpVariable);
518                 Object::ID resultId = object.definition.word(2);
519
520                 if (objectTy.isBuiltInBlock)
521                 {
522                         // walk the builtin block, registering each of its members separately.
523                         auto m = memberDecorations.find(objectTy.element);
524                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
525                         auto &structType = pointeeTy.definition;
526                         auto offset = 0u;
527                         auto word = 2u;
528                         for (auto &member : m->second)
529                         {
530                                 auto &memberType = getType(structType.word(word));
531
532                                 if (member.HasBuiltIn)
533                                 {
534                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
535                                 }
536
537                                 offset += memberType.sizeInComponents;
538                                 ++word;
539                         }
540                         return;
541                 }
542
543                 auto d = decorations.find(resultId);
544                 if (d != decorations.end() && d->second.HasBuiltIn)
545                 {
546                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
547                 }
548                 else
549                 {
550                         object.kind = Object::Kind::InterfaceVariable;
551                         VisitInterface(resultId,
552                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
553                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
554                                                            auto scalarSlot = (d.Location << 2) | d.Component;
555                                                            ASSERT(scalarSlot >= 0 &&
556                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
557
558                                                            auto &slot = userDefinedInterface[scalarSlot];
559                                                            slot.Type = type;
560                                                            slot.Flat = d.Flat;
561                                                            slot.NoPerspective = d.NoPerspective;
562                                                            slot.Centroid = d.Centroid;
563                                                    });
564                 }
565         }
566
567         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
568         {
569                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
570                 switch (mode)
571                 {
572                 case spv::ExecutionModeEarlyFragmentTests:
573                         modes.EarlyFragmentTests = true;
574                         break;
575                 case spv::ExecutionModeDepthReplacing:
576                         modes.DepthReplacing = true;
577                         break;
578                 case spv::ExecutionModeDepthGreater:
579                         modes.DepthGreater = true;
580                         break;
581                 case spv::ExecutionModeDepthLess:
582                         modes.DepthLess = true;
583                         break;
584                 case spv::ExecutionModeDepthUnchanged:
585                         modes.DepthUnchanged = true;
586                         break;
587                 case spv::ExecutionModeLocalSize:
588                         modes.WorkgroupSizeX = insn.word(3);
589                         modes.WorkgroupSizeY = insn.word(4);
590                         modes.WorkgroupSizeZ = insn.word(5);
591                         break;
592                 case spv::ExecutionModeOriginUpperLeft:
593                         // This is always the case for a Vulkan shader. Do nothing.
594                         break;
595                 default:
596                         UNIMPLEMENTED("No other execution modes are permitted");
597                 }
598         }
599
600         uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
601         {
602                 // Types are always built from the bottom up (with the exception of forward ptrs, which
603                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
604                 // already been described (and so their sizes determined)
605                 switch (insn.opcode())
606                 {
607                 case spv::OpTypeVoid:
608                 case spv::OpTypeSampler:
609                 case spv::OpTypeImage:
610                 case spv::OpTypeSampledImage:
611                 case spv::OpTypeFunction:
612                 case spv::OpTypeRuntimeArray:
613                         // Objects that don't consume any space.
614                         // Descriptor-backed objects currently only need exist at compile-time.
615                         // Runtime arrays don't appear in places where their size would be interesting
616                         return 0;
617
618                 case spv::OpTypeBool:
619                 case spv::OpTypeFloat:
620                 case spv::OpTypeInt:
621                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
622                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
623                         return 1;
624
625                 case spv::OpTypeVector:
626                 case spv::OpTypeMatrix:
627                         // Vectors and matrices both consume element count * element size.
628                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
629
630                 case spv::OpTypeArray:
631                 {
632                         // Element count * element size. Array sizes come from constant ids.
633                         auto arraySize = GetConstantInt(insn.word(3));
634                         return getType(insn.word(2)).sizeInComponents * arraySize;
635                 }
636
637                 case spv::OpTypeStruct:
638                 {
639                         uint32_t size = 0;
640                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
641                         {
642                                 size += getType(insn.word(i)).sizeInComponents;
643                         }
644                         return size;
645                 }
646
647                 case spv::OpTypePointer:
648                         // Runtime representation of a pointer is a per-lane index.
649                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
650                         return 1;
651
652                 default:
653                         // Some other random insn.
654                         UNIMPLEMENTED("Only types are supported");
655                         return 0;
656                 }
657         }
658
659         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
660         {
661                 switch (storageClass)
662                 {
663                 case spv::StorageClassUniform:
664                 case spv::StorageClassStorageBuffer:
665                 case spv::StorageClassPushConstant:
666                         return false;
667                 default:
668                         return true;
669                 }
670         }
671
672         template<typename F>
673         int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
674         {
675                 // Recursively walks variable definition and its type tree, taking into account
676                 // any explicit Location or Component decorations encountered; where explicit
677                 // Locations or Components are not specified, assigns them sequentially.
678                 // Collected decorations are carried down toward the leaves and across
679                 // siblings; Effect of decorations intentionally does not flow back up the tree.
680                 //
681                 // F is a functor to be called with the effective decoration set for every component.
682                 //
683                 // Returns the next available location, and calls f().
684
685                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
686
687                 ApplyDecorationsForId(&d, id);
688
689                 auto const &obj = getType(id);
690                 switch(obj.opcode())
691                 {
692                 case spv::OpTypePointer:
693                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
694                 case spv::OpTypeMatrix:
695                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
696                         {
697                                 // consumes same components of N consecutive locations
698                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
699                         }
700                         return d.Location;
701                 case spv::OpTypeVector:
702                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
703                         {
704                                 // consumes N consecutive components in the same location
705                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
706                         }
707                         return d.Location + 1;
708                 case spv::OpTypeFloat:
709                         f(d, ATTRIBTYPE_FLOAT);
710                         return d.Location + 1;
711                 case spv::OpTypeInt:
712                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
713                         return d.Location + 1;
714                 case spv::OpTypeBool:
715                         f(d, ATTRIBTYPE_UINT);
716                         return d.Location + 1;
717                 case spv::OpTypeStruct:
718                 {
719                         // iterate over members, which may themselves have Location/Component decorations
720                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
721                         {
722                                 ApplyDecorationsForIdMember(&d, id, i);
723                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
724                                 d.Component = 0;    // Implicit locations always have component=0
725                         }
726                         return d.Location;
727                 }
728                 case spv::OpTypeArray:
729                 {
730                         auto arraySize = GetConstantInt(obj.definition.word(3));
731                         for (auto i = 0u; i < arraySize; i++)
732                         {
733                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
734                         }
735                         return d.Location;
736                 }
737                 default:
738                         // Intentionally partial; most opcodes do not participate in type hierarchies
739                         return 0;
740                 }
741         }
742
743         template<typename F>
744         void SpirvShader::VisitInterface(Object::ID id, F f) const
745         {
746                 // Walk a variable definition and call f for each component in it.
747                 Decorations d{};
748                 ApplyDecorationsForId(&d, id);
749
750                 auto def = getObject(id).definition;
751                 ASSERT(def.opcode() == spv::OpVariable);
752                 VisitInterfaceInner<F>(def.word(1), d, f);
753         }
754
755         SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
756         {
757                 // Produce a offset into external memory in sizeof(float) units
758
759                 int constantOffset = 0;
760                 SIMD::Int dynamicOffset = SIMD::Int(0);
761                 auto &baseObject = getObject(id);
762                 Type::ID typeId = getType(baseObject.type).element;
763                 Decorations d{};
764                 ApplyDecorationsForId(&d, baseObject.type);
765
766                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
767                 // Start with its offset and build from there.
768                 if (baseObject.kind == Object::Kind::Value)
769                 {
770                         dynamicOffset += routine->getIntermediate(id).Int(0);
771                 }
772
773                 for (auto i = 0u; i < numIndexes; i++)
774                 {
775                         auto & type = getType(typeId);
776                         switch (type.definition.opcode())
777                         {
778                         case spv::OpTypeStruct:
779                         {
780                                 int memberIndex = GetConstantInt(indexIds[i]);
781                                 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
782                                 ASSERT(d.HasOffset);
783                                 constantOffset += d.Offset / sizeof(float);
784                                 typeId = type.definition.word(2u + memberIndex);
785                                 break;
786                         }
787                         case spv::OpTypeArray:
788                         case spv::OpTypeRuntimeArray:
789                         {
790                                 // TODO: b/127950082: Check bounds.
791                                 ApplyDecorationsForId(&d, typeId);
792                                 ASSERT(d.HasArrayStride);
793                                 auto & obj = getObject(indexIds[i]);
794                                 if (obj.kind == Object::Kind::Constant)
795                                         constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
796                                 else
797                                         dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
798                                 typeId = type.element;
799                                 break;
800                         }
801                         case spv::OpTypeMatrix:
802                         {
803                                 // TODO: b/127950082: Check bounds.
804                                 ApplyDecorationsForId(&d, typeId);
805                                 ASSERT(d.HasMatrixStride);
806                                 auto & obj = getObject(indexIds[i]);
807                                 if (obj.kind == Object::Kind::Constant)
808                                         constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
809                                 else
810                                         dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
811                                 typeId = type.element;
812                                 break;
813                         }
814                         case spv::OpTypeVector:
815                         {
816                                 auto & obj = getObject(indexIds[i]);
817                                 if (obj.kind == Object::Kind::Constant)
818                                         constantOffset += GetConstantInt(indexIds[i]);
819                                 else
820                                         dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
821                                 typeId = type.element;
822                                 break;
823                         }
824                         default:
825                                 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
826                         }
827                 }
828
829                 return dynamicOffset + SIMD::Int(constantOffset);
830         }
831
832         SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
833         {
834                 // TODO: avoid doing per-lane work in some cases if we can?
835                 // Produce a *component* offset into location-oriented memory
836
837                 int constantOffset = 0;
838                 SIMD::Int dynamicOffset = SIMD::Int(0);
839                 auto &baseObject = getObject(id);
840                 Type::ID typeId = getType(baseObject.type).element;
841
842                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
843                 // Start with its offset and build from there.
844                 if (baseObject.kind == Object::Kind::Value)
845                 {
846                         dynamicOffset += routine->getIntermediate(id).Int(0);
847                 }
848
849                 for (auto i = 0u; i < numIndexes; i++)
850                 {
851                         auto & type = getType(typeId);
852                         switch(type.opcode())
853                         {
854                         case spv::OpTypeStruct:
855                         {
856                                 int memberIndex = GetConstantInt(indexIds[i]);
857                                 int offsetIntoStruct = 0;
858                                 for (auto j = 0; j < memberIndex; j++) {
859                                         auto memberType = type.definition.word(2u + j);
860                                         offsetIntoStruct += getType(memberType).sizeInComponents;
861                                 }
862                                 constantOffset += offsetIntoStruct;
863                                 typeId = type.definition.word(2u + memberIndex);
864                                 break;
865                         }
866
867                         case spv::OpTypeVector:
868                         case spv::OpTypeMatrix:
869                         case spv::OpTypeArray:
870                         case spv::OpTypeRuntimeArray:
871                         {
872                                 // TODO: b/127950082: Check bounds.
873                                 auto stride = getType(type.element).sizeInComponents;
874                                 auto & obj = getObject(indexIds[i]);
875                                 if (obj.kind == Object::Kind::Constant)
876                                         constantOffset += stride * GetConstantInt(indexIds[i]);
877                                 else
878                                         dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
879                                 typeId = type.element;
880                                 break;
881                         }
882
883                         default:
884                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
885                         }
886                 }
887
888                 return dynamicOffset + SIMD::Int(constantOffset);
889         }
890
891         uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
892         {
893                 uint32_t constantOffset = 0;
894
895                 for (auto i = 0u; i < numIndexes; i++)
896                 {
897                         auto & type = getType(typeId);
898                         switch(type.opcode())
899                         {
900                         case spv::OpTypeStruct:
901                         {
902                                 int memberIndex = indexes[i];
903                                 int offsetIntoStruct = 0;
904                                 for (auto j = 0; j < memberIndex; j++) {
905                                         auto memberType = type.definition.word(2u + j);
906                                         offsetIntoStruct += getType(memberType).sizeInComponents;
907                                 }
908                                 constantOffset += offsetIntoStruct;
909                                 typeId = type.definition.word(2u + memberIndex);
910                                 break;
911                         }
912
913                         case spv::OpTypeVector:
914                         case spv::OpTypeMatrix:
915                         case spv::OpTypeArray:
916                         {
917                                 auto elementType = type.definition.word(2);
918                                 auto stride = getType(elementType).sizeInComponents;
919                                 constantOffset += stride * indexes[i];
920                                 typeId = elementType;
921                                 break;
922                         }
923
924                         default:
925                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
926                         }
927                 }
928
929                 return constantOffset;
930         }
931
932         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
933         {
934                 switch (decoration)
935                 {
936                 case spv::DecorationLocation:
937                         HasLocation = true;
938                         Location = static_cast<int32_t>(arg);
939                         break;
940                 case spv::DecorationComponent:
941                         HasComponent = true;
942                         Component = arg;
943                         break;
944                 case spv::DecorationDescriptorSet:
945                         HasDescriptorSet = true;
946                         DescriptorSet = arg;
947                         break;
948                 case spv::DecorationBinding:
949                         HasBinding = true;
950                         Binding = arg;
951                         break;
952                 case spv::DecorationBuiltIn:
953                         HasBuiltIn = true;
954                         BuiltIn = static_cast<spv::BuiltIn>(arg);
955                         break;
956                 case spv::DecorationFlat:
957                         Flat = true;
958                         break;
959                 case spv::DecorationNoPerspective:
960                         NoPerspective = true;
961                         break;
962                 case spv::DecorationCentroid:
963                         Centroid = true;
964                         break;
965                 case spv::DecorationBlock:
966                         Block = true;
967                         break;
968                 case spv::DecorationBufferBlock:
969                         BufferBlock = true;
970                         break;
971                 case spv::DecorationOffset:
972                         HasOffset = true;
973                         Offset = static_cast<int32_t>(arg);
974                         break;
975                 case spv::DecorationArrayStride:
976                         HasArrayStride = true;
977                         ArrayStride = static_cast<int32_t>(arg);
978                         break;
979                 case spv::DecorationMatrixStride:
980                         HasMatrixStride = true;
981                         MatrixStride = static_cast<int32_t>(arg);
982                         break;
983                 default:
984                         // Intentionally partial, there are many decorations we just don't care about.
985                         break;
986                 }
987         }
988
989         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
990         {
991                 // Apply a decoration group to this set of decorations
992                 if (src.HasBuiltIn)
993                 {
994                         HasBuiltIn = true;
995                         BuiltIn = src.BuiltIn;
996                 }
997
998                 if (src.HasLocation)
999                 {
1000                         HasLocation = true;
1001                         Location = src.Location;
1002                 }
1003
1004                 if (src.HasComponent)
1005                 {
1006                         HasComponent = true;
1007                         Component = src.Component;
1008                 }
1009
1010                 if (src.HasDescriptorSet)
1011                 {
1012                         HasDescriptorSet = true;
1013                         DescriptorSet = src.DescriptorSet;
1014                 }
1015
1016                 if (src.HasBinding)
1017                 {
1018                         HasBinding = true;
1019                         Binding = src.Binding;
1020                 }
1021
1022                 if (src.HasOffset)
1023                 {
1024                         HasOffset = true;
1025                         Offset = src.Offset;
1026                 }
1027
1028                 if (src.HasArrayStride)
1029                 {
1030                         HasArrayStride = true;
1031                         ArrayStride = src.ArrayStride;
1032                 }
1033
1034                 if (src.HasMatrixStride)
1035                 {
1036                         HasMatrixStride = true;
1037                         MatrixStride = src.MatrixStride;
1038                 }
1039
1040                 Flat |= src.Flat;
1041                 NoPerspective |= src.NoPerspective;
1042                 Centroid |= src.Centroid;
1043                 Block |= src.Block;
1044                 BufferBlock |= src.BufferBlock;
1045         }
1046
1047         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1048         {
1049                 auto it = decorations.find(id);
1050                 if (it != decorations.end())
1051                         d->Apply(it->second);
1052         }
1053
1054         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1055         {
1056                 auto it = memberDecorations.find(id);
1057                 if (it != memberDecorations.end() && member < it->second.size())
1058                 {
1059                         d->Apply(it->second[member]);
1060                 }
1061         }
1062
1063         uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1064         {
1065                 // Slightly hackish access to constants very early in translation.
1066                 // General consumption of constants by other instructions should
1067                 // probably be just lowered to Reactor.
1068
1069                 // TODO: not encountered yet since we only use this for array sizes etc,
1070                 // but is possible to construct integer constant 0 via OpConstantNull.
1071                 auto insn = getObject(id).definition;
1072                 ASSERT(insn.opcode() == spv::OpConstant);
1073                 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1074                 return insn.word(3);
1075         }
1076
1077         // emit-time
1078
1079         void SpirvShader::emitProlog(SpirvRoutine *routine) const
1080         {
1081                 for (auto insn : *this)
1082                 {
1083                         switch (insn.opcode())
1084                         {
1085                         case spv::OpVariable:
1086                         {
1087                                 Type::ID resultPointerTypeId = insn.word(1);
1088                                 auto resultPointerType = getType(resultPointerTypeId);
1089                                 auto pointeeType = getType(resultPointerType.element);
1090
1091                                 if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
1092                                 {
1093                                         Object::ID resultId = insn.word(2);
1094                                         routine->createLvalue(resultId, pointeeType.sizeInComponents);
1095                                 }
1096                                 break;
1097                         }
1098                         default:
1099                                 // Nothing else produces interface variables, so can all be safely ignored.
1100                                 break;
1101                         }
1102                 }
1103         }
1104
1105         void SpirvShader::emit(SpirvRoutine *routine) const
1106         {
1107                 // Emit everything up to the first label
1108                 // TODO: Separate out dispatch of block from non-block instructions?
1109                 for (auto insn : *this)
1110                 {
1111                         if (insn.opcode() == spv::OpLabel)
1112                         {
1113                                 break;
1114                         }
1115                         EmitInstruction(routine, insn);
1116                 }
1117
1118                 // Emit the main function block
1119                 EmitBlock(routine, getBlock(mainBlockId));
1120         }
1121
1122         void SpirvShader::EmitBlock(SpirvRoutine *routine, Block const &block) const
1123         {
1124                 for (auto insn : block)
1125                 {
1126                         EmitInstruction(routine, insn);
1127                 }
1128         }
1129
1130         void SpirvShader::EmitInstruction(SpirvRoutine *routine, InsnIterator insn) const
1131         {
1132                 switch (insn.opcode())
1133                 {
1134                 case spv::OpTypeVoid:
1135                 case spv::OpTypeInt:
1136                 case spv::OpTypeFloat:
1137                 case spv::OpTypeBool:
1138                 case spv::OpTypeVector:
1139                 case spv::OpTypeArray:
1140                 case spv::OpTypeRuntimeArray:
1141                 case spv::OpTypeMatrix:
1142                 case spv::OpTypeStruct:
1143                 case spv::OpTypePointer:
1144                 case spv::OpTypeFunction:
1145                 case spv::OpExecutionMode:
1146                 case spv::OpMemoryModel:
1147                 case spv::OpFunction:
1148                 case spv::OpFunctionEnd:
1149                 case spv::OpConstant:
1150                 case spv::OpConstantNull:
1151                 case spv::OpConstantTrue:
1152                 case spv::OpConstantFalse:
1153                 case spv::OpConstantComposite:
1154                 case spv::OpUndef:
1155                 case spv::OpExtension:
1156                 case spv::OpCapability:
1157                 case spv::OpEntryPoint:
1158                 case spv::OpExtInstImport:
1159                 case spv::OpDecorate:
1160                 case spv::OpMemberDecorate:
1161                 case spv::OpGroupDecorate:
1162                 case spv::OpGroupMemberDecorate:
1163                 case spv::OpDecorationGroup:
1164                 case spv::OpName:
1165                 case spv::OpMemberName:
1166                 case spv::OpSource:
1167                 case spv::OpSourceContinued:
1168                 case spv::OpSourceExtension:
1169                 case spv::OpLine:
1170                 case spv::OpNoLine:
1171                 case spv::OpModuleProcessed:
1172                 case spv::OpString:
1173                         // Nothing to do at emit time. These are either fully handled at analysis time,
1174                         // or don't require any work at all.
1175                         break;
1176
1177                 case spv::OpLabel:
1178                 case spv::OpReturn:
1179                         // TODO: when we do control flow, will need to do some work here.
1180                         // Until then, there is nothing to do -- we expect there to be an initial OpLabel
1181                         // in the entrypoint function, for which we do nothing; and a final OpReturn at the
1182                         // end of the entrypoint function, for which we do nothing.
1183                         break;
1184
1185                 case spv::OpVariable:
1186                         EmitVariable(insn, routine);
1187                         break;
1188
1189                 case spv::OpLoad:
1190                         EmitLoad(insn, routine);
1191                         break;
1192
1193                 case spv::OpStore:
1194                         EmitStore(insn, routine);
1195                         break;
1196
1197                 case spv::OpAccessChain:
1198                 case spv::OpInBoundsAccessChain:
1199                         EmitAccessChain(insn, routine);
1200                         break;
1201
1202                 case spv::OpCompositeConstruct:
1203                         EmitCompositeConstruct(insn, routine);
1204                         break;
1205
1206                 case spv::OpCompositeInsert:
1207                         EmitCompositeInsert(insn, routine);
1208                         break;
1209
1210                 case spv::OpCompositeExtract:
1211                         EmitCompositeExtract(insn, routine);
1212                         break;
1213
1214                 case spv::OpVectorShuffle:
1215                         EmitVectorShuffle(insn, routine);
1216                         break;
1217
1218                 case spv::OpVectorExtractDynamic:
1219                         EmitVectorExtractDynamic(insn, routine);
1220                         break;
1221
1222                 case spv::OpVectorInsertDynamic:
1223                         EmitVectorInsertDynamic(insn, routine);
1224                         break;
1225
1226                 case spv::OpVectorTimesScalar:
1227                         EmitVectorTimesScalar(insn, routine);
1228                         break;
1229
1230                 case spv::OpNot:
1231                 case spv::OpSNegate:
1232                 case spv::OpFNegate:
1233                 case spv::OpLogicalNot:
1234                 case spv::OpConvertFToU:
1235                 case spv::OpConvertFToS:
1236                 case spv::OpConvertSToF:
1237                 case spv::OpConvertUToF:
1238                 case spv::OpBitcast:
1239                 case spv::OpIsInf:
1240                 case spv::OpIsNan:
1241                 case spv::OpDPdx:
1242                 case spv::OpDPdxCoarse:
1243                 case spv::OpDPdy:
1244                 case spv::OpDPdyCoarse:
1245                 case spv::OpFwidth:
1246                 case spv::OpFwidthCoarse:
1247                 case spv::OpDPdxFine:
1248                 case spv::OpDPdyFine:
1249                 case spv::OpFwidthFine:
1250                         EmitUnaryOp(insn, routine);
1251                         break;
1252
1253                 case spv::OpIAdd:
1254                 case spv::OpISub:
1255                 case spv::OpIMul:
1256                 case spv::OpSDiv:
1257                 case spv::OpUDiv:
1258                 case spv::OpFAdd:
1259                 case spv::OpFSub:
1260                 case spv::OpFMul:
1261                 case spv::OpFDiv:
1262                 case spv::OpFMod:
1263                 case spv::OpFRem:
1264                 case spv::OpFOrdEqual:
1265                 case spv::OpFUnordEqual:
1266                 case spv::OpFOrdNotEqual:
1267                 case spv::OpFUnordNotEqual:
1268                 case spv::OpFOrdLessThan:
1269                 case spv::OpFUnordLessThan:
1270                 case spv::OpFOrdGreaterThan:
1271                 case spv::OpFUnordGreaterThan:
1272                 case spv::OpFOrdLessThanEqual:
1273                 case spv::OpFUnordLessThanEqual:
1274                 case spv::OpFOrdGreaterThanEqual:
1275                 case spv::OpFUnordGreaterThanEqual:
1276                 case spv::OpSMod:
1277                 case spv::OpSRem:
1278                 case spv::OpUMod:
1279                 case spv::OpIEqual:
1280                 case spv::OpINotEqual:
1281                 case spv::OpUGreaterThan:
1282                 case spv::OpSGreaterThan:
1283                 case spv::OpUGreaterThanEqual:
1284                 case spv::OpSGreaterThanEqual:
1285                 case spv::OpULessThan:
1286                 case spv::OpSLessThan:
1287                 case spv::OpULessThanEqual:
1288                 case spv::OpSLessThanEqual:
1289                 case spv::OpShiftRightLogical:
1290                 case spv::OpShiftRightArithmetic:
1291                 case spv::OpShiftLeftLogical:
1292                 case spv::OpBitwiseOr:
1293                 case spv::OpBitwiseXor:
1294                 case spv::OpBitwiseAnd:
1295                 case spv::OpLogicalOr:
1296                 case spv::OpLogicalAnd:
1297                 case spv::OpLogicalEqual:
1298                 case spv::OpLogicalNotEqual:
1299                 case spv::OpUMulExtended:
1300                 case spv::OpSMulExtended:
1301                         EmitBinaryOp(insn, routine);
1302                         break;
1303
1304                 case spv::OpDot:
1305                         EmitDot(insn, routine);
1306                         break;
1307
1308                 case spv::OpSelect:
1309                         EmitSelect(insn, routine);
1310                         break;
1311
1312                 case spv::OpExtInst:
1313                         EmitExtendedInstruction(insn, routine);
1314                         break;
1315
1316                 case spv::OpAny:
1317                         EmitAny(insn, routine);
1318                         break;
1319
1320                 case spv::OpAll:
1321                         EmitAll(insn, routine);
1322                         break;
1323
1324                 case spv::OpBranch:
1325                         EmitBranch(insn, routine);
1326                         break;
1327
1328                 default:
1329                         UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1330                         break;
1331                 }
1332         }
1333
1334         void SpirvShader::EmitVariable(InsnIterator insn, SpirvRoutine *routine) const
1335         {
1336                 Object::ID resultId = insn.word(2);
1337                 auto &object = getObject(resultId);
1338                 auto &objectTy = getType(object.type);
1339                 switch (objectTy.storageClass)
1340                 {
1341                 case spv::StorageClassInput:
1342                 {
1343                         if (object.kind == Object::Kind::InterfaceVariable)
1344                         {
1345                                 auto &dst = routine->getValue(resultId);
1346                                 int offset = 0;
1347                                 VisitInterface(resultId,
1348                                                                 [&](Decorations const &d, AttribType type) {
1349                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1350                                                                         dst[offset++] = routine->inputs[scalarSlot];
1351                                                                 });
1352                         }
1353                         break;
1354                 }
1355                 case spv::StorageClassUniform:
1356                 case spv::StorageClassStorageBuffer:
1357                 {
1358                         Decorations d{};
1359                         ApplyDecorationsForId(&d, resultId);
1360                         ASSERT(d.DescriptorSet >= 0);
1361                         ASSERT(d.Binding >= 0);
1362
1363                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1364
1365                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1366                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1367                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1368                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1369                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1370                         Pointer<Byte> address = data + offset;
1371                         routine->physicalPointers[resultId] = address;
1372                         break;
1373                 }
1374                 case spv::StorageClassPushConstant:
1375                 {
1376                         routine->physicalPointers[resultId] = routine->pushConstants;
1377                         break;
1378                 }
1379                 default:
1380                         break;
1381                 }
1382         }
1383
1384         void SpirvShader::EmitLoad(InsnIterator insn, SpirvRoutine *routine) const
1385         {
1386                 Object::ID objectId = insn.word(2);
1387                 Object::ID pointerId = insn.word(3);
1388                 auto &object = getObject(objectId);
1389                 auto &objectTy = getType(object.type);
1390                 auto &pointer = getObject(pointerId);
1391                 auto &pointerBase = getObject(pointer.pointerBase);
1392                 auto &pointerBaseTy = getType(pointerBase.type);
1393
1394                 ASSERT(getType(pointer.type).element == object.type);
1395                 ASSERT(Type::ID(insn.word(1)) == object.type);
1396
1397                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1398                 {
1399                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1400                 }
1401
1402                 Pointer<Float> ptrBase;
1403                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1404                 {
1405                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1406                 }
1407                 else
1408                 {
1409                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1410                 }
1411
1412                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1413                 auto anyInactiveLanes = SignMask(~routine->activeLaneMask) != 0;
1414
1415                 auto load = SpirvRoutine::Value(objectTy.sizeInComponents);
1416
1417                 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1418                 {
1419                         // Divergent offsets or masked lanes.
1420                         auto offsets = pointer.kind == Object::Kind::Value ?
1421                                         As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1422                                         RValue<SIMD::Int>(SIMD::Int(0));
1423                         for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1424                         {
1425                                 // i wish i had a Float,Float,Float,Float constructor here..
1426                                 for (int j = 0; j < SIMD::Width; j++)
1427                                 {
1428                                         If(Extract(routine->activeLaneMask, j) != 0)
1429                                         {
1430                                                 Int offset = Int(i) + Extract(offsets, j);
1431                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1432                                                 load[i] = Insert(load[i], ptrBase[offset], j);
1433                                         }
1434                                 }
1435                         }
1436                 }
1437                 Else
1438                 {
1439                         // No divergent offsets or masked lanes.
1440                         if (interleavedByLane)
1441                         {
1442                                 // Lane-interleaved data.
1443                                 Pointer<SIMD::Float> src = ptrBase;
1444                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1445                                 {
1446                                         load[i] = src[i];
1447                                 }
1448                         }
1449                         else
1450                         {
1451                                 // Non-interleaved data.
1452                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1453                                 {
1454                                         load[i] = RValue<SIMD::Float>(ptrBase[i]);
1455                                 }
1456                         }
1457                 }
1458
1459                 auto &dst = routine->createIntermediate(objectId, objectTy.sizeInComponents);
1460                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
1461                 {
1462                         dst.emplace(i, load[i]);
1463                 }
1464         }
1465
1466         void SpirvShader::EmitAccessChain(InsnIterator insn, SpirvRoutine *routine) const
1467         {
1468                 Type::ID typeId = insn.word(1);
1469                 Object::ID resultId = insn.word(2);
1470                 Object::ID baseId = insn.word(3);
1471                 uint32_t numIndexes = insn.wordCount() - 4;
1472                 const uint32_t *indexes = insn.wordPointer(4);
1473                 auto &type = getType(typeId);
1474                 ASSERT(type.sizeInComponents == 1);
1475                 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1476
1477                 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1478
1479                 if(type.storageClass == spv::StorageClassPushConstant ||
1480                    type.storageClass == spv::StorageClassUniform ||
1481                    type.storageClass == spv::StorageClassStorageBuffer)
1482                 {
1483                         dst.emplace(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1484                 }
1485                 else
1486                 {
1487                         dst.emplace(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1488                 }
1489         }
1490
1491         void SpirvShader::EmitStore(InsnIterator insn, SpirvRoutine *routine) const
1492         {
1493                 Object::ID pointerId = insn.word(1);
1494                 Object::ID objectId = insn.word(2);
1495                 auto &object = getObject(objectId);
1496                 auto &pointer = getObject(pointerId);
1497                 auto &pointerTy = getType(pointer.type);
1498                 auto &elementTy = getType(pointerTy.element);
1499                 auto &pointerBase = getObject(pointer.pointerBase);
1500                 auto &pointerBaseTy = getType(pointerBase.type);
1501
1502                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1503                 {
1504                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1505                 }
1506
1507                 Pointer<Float> ptrBase;
1508                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1509                 {
1510                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1511                 }
1512                 else
1513                 {
1514                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1515                 }
1516
1517                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1518                 auto anyInactiveLanes = SignMask(~routine->activeLaneMask) != 0;
1519
1520                 if (object.kind == Object::Kind::Constant)
1521                 {
1522                         // Constant source data.
1523                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1524                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1525                         {
1526                                 // Divergent offsets or masked lanes.
1527                                 auto offsets = pointer.kind == Object::Kind::Value ?
1528                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1529                                                 RValue<SIMD::Int>(SIMD::Int(0));
1530                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1531                                 {
1532                                         for (int j = 0; j < SIMD::Width; j++)
1533                                         {
1534                                                 If(Extract(routine->activeLaneMask, j) != 0)
1535                                                 {
1536                                                         Int offset = Int(i) + Extract(offsets, j);
1537                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1538                                                         ptrBase[offset] = RValue<Float>(src[i]);
1539                                                 }
1540                                         }
1541                                 }
1542                         }
1543                         Else
1544                         {
1545                                 // Constant source data.
1546                                 // No divergent offsets or masked lanes.
1547                                 Pointer<SIMD::Float> dst = ptrBase;
1548                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1549                                 {
1550                                         dst[i] = RValue<SIMD::Float>(src[i]);
1551                                 }
1552                         }
1553                 }
1554                 else
1555                 {
1556                         // Intermediate source data.
1557                         auto &src = routine->getIntermediate(objectId);
1558                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1559                         {
1560                                 // Divergent offsets or masked lanes.
1561                                 auto offsets = pointer.kind == Object::Kind::Value ?
1562                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1563                                                 RValue<SIMD::Int>(SIMD::Int(0));
1564                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1565                                 {
1566                                         for (int j = 0; j < SIMD::Width; j++)
1567                                         {
1568                                                 If(Extract(routine->activeLaneMask, j) != 0)
1569                                                 {
1570                                                         Int offset = Int(i) + Extract(offsets, j);
1571                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1572                                                         ptrBase[offset] = Extract(src.Float(i), j);
1573                                                 }
1574                                         }
1575                                 }
1576                         }
1577                         Else
1578                         {
1579                                 // No divergent offsets or masked lanes.
1580                                 if (interleavedByLane)
1581                                 {
1582                                         // Lane-interleaved data.
1583                                         Pointer<SIMD::Float> dst = ptrBase;
1584                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1585                                         {
1586                                                 dst[i] = src.Float(i);
1587                                         }
1588                                 }
1589                                 else
1590                                 {
1591                                         // Intermediate source data. Non-interleaved data.
1592                                         Pointer<SIMD::Float> dst = ptrBase;
1593                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1594                                         {
1595                                                 dst[i] = SIMD::Float(src.Float(i));
1596                                         }
1597                                 }
1598                         }
1599                 }
1600         }
1601
1602         void SpirvShader::EmitCompositeConstruct(InsnIterator insn, SpirvRoutine *routine) const
1603         {
1604                 auto &type = getType(insn.word(1));
1605                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1606                 auto offset = 0u;
1607
1608                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1609                 {
1610                         Object::ID srcObjectId = insn.word(3u + i);
1611                         auto & srcObject = getObject(srcObjectId);
1612                         auto & srcObjectTy = getType(srcObject.type);
1613                         GenericValue srcObjectAccess(this, routine, srcObjectId);
1614
1615                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1616                         {
1617                                 dst.emplace(offset++, srcObjectAccess.Float(j));
1618                         }
1619                 }
1620         }
1621
1622         void SpirvShader::EmitCompositeInsert(InsnIterator insn, SpirvRoutine *routine) const
1623         {
1624                 Type::ID resultTypeId = insn.word(1);
1625                 auto &type = getType(resultTypeId);
1626                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1627                 auto &newPartObject = getObject(insn.word(3));
1628                 auto &newPartObjectTy = getType(newPartObject.type);
1629                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1630
1631                 GenericValue srcObjectAccess(this, routine, insn.word(4));
1632                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1633
1634                 // old components before
1635                 for (auto i = 0u; i < firstNewComponent; i++)
1636                 {
1637                         dst.emplace(i, srcObjectAccess.Float(i));
1638                 }
1639                 // new part
1640                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1641                 {
1642                         dst.emplace(firstNewComponent + i, newPartObjectAccess.Float(i));
1643                 }
1644                 // old components after
1645                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1646                 {
1647                         dst.emplace(i, srcObjectAccess.Float(i));
1648                 }
1649         }
1650
1651         void SpirvShader::EmitCompositeExtract(InsnIterator insn, SpirvRoutine *routine) const
1652         {
1653                 auto &type = getType(insn.word(1));
1654                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1655                 auto &compositeObject = getObject(insn.word(3));
1656                 Type::ID compositeTypeId = compositeObject.definition.word(1);
1657                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1658
1659                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1660                 for (auto i = 0u; i < type.sizeInComponents; i++)
1661                 {
1662                         dst.emplace(i, compositeObjectAccess.Float(firstComponent + i));
1663                 }
1664         }
1665
1666         void SpirvShader::EmitVectorShuffle(InsnIterator insn, SpirvRoutine *routine) const
1667         {
1668                 auto &type = getType(insn.word(1));
1669                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1670
1671                 // Note: number of components in result type, first half type, and second
1672                 // half type are all independent.
1673                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1674
1675                 GenericValue firstHalfAccess(this, routine, insn.word(3));
1676                 GenericValue secondHalfAccess(this, routine, insn.word(4));
1677
1678                 for (auto i = 0u; i < type.sizeInComponents; i++)
1679                 {
1680                         auto selector = insn.word(5 + i);
1681                         if (selector == static_cast<uint32_t>(-1))
1682                         {
1683                                 // Undefined value. Until we decide to do real undef values, zero is as good
1684                                 // a value as any
1685                                 dst.emplace(i, RValue<SIMD::Float>(0.0f));
1686                         }
1687                         else if (selector < firstHalfType.sizeInComponents)
1688                         {
1689                                 dst.emplace(i, firstHalfAccess.Float(selector));
1690                         }
1691                         else
1692                         {
1693                                 dst.emplace(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
1694                         }
1695                 }
1696         }
1697
1698         void SpirvShader::EmitVectorExtractDynamic(sw::SpirvShader::InsnIterator insn, sw::SpirvRoutine *routine) const
1699         {
1700                 auto &type = getType(insn.word(1));
1701                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1702                 auto &srcType = getType(getObject(insn.word(3)).type);
1703
1704                 GenericValue src(this, routine, insn.word(3));
1705                 GenericValue index(this, routine, insn.word(4));
1706
1707                 SIMD::UInt v = SIMD::UInt(0);
1708
1709                 for (auto i = 0u; i < srcType.sizeInComponents; i++)
1710                 {
1711                         v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
1712                 }
1713
1714                 dst.emplace(0, v);
1715         }
1716
1717         void SpirvShader::EmitVectorInsertDynamic(sw::SpirvShader::InsnIterator insn, sw::SpirvRoutine *routine) const
1718         {
1719                 auto &type = getType(insn.word(1));
1720                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1721
1722                 GenericValue src(this, routine, insn.word(3));
1723                 GenericValue component(this, routine, insn.word(4));
1724                 GenericValue index(this, routine, insn.word(5));
1725
1726                 for (auto i = 0u; i < type.sizeInComponents; i++)
1727                 {
1728                         SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
1729                         dst.emplace(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
1730                 }
1731         }
1732
1733         void SpirvShader::EmitVectorTimesScalar(InsnIterator insn, SpirvRoutine *routine) const
1734         {
1735                 auto &type = getType(insn.word(1));
1736                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1737                 auto lhs = GenericValue(this, routine, insn.word(3));
1738                 auto rhs = GenericValue(this, routine, insn.word(4));
1739
1740                 for (auto i = 0u; i < type.sizeInComponents; i++)
1741                 {
1742                         dst.emplace(i, lhs.Float(i) * rhs.Float(0));
1743                 }
1744         }
1745
1746         void SpirvShader::EmitUnaryOp(InsnIterator insn, SpirvRoutine *routine) const
1747         {
1748                 auto &type = getType(insn.word(1));
1749                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1750                 auto src = GenericValue(this, routine, insn.word(3));
1751
1752                 for (auto i = 0u; i < type.sizeInComponents; i++)
1753                 {
1754                         switch (insn.opcode())
1755                         {
1756                         case spv::OpNot:
1757                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
1758                                 dst.emplace(i, ~src.UInt(i));
1759                                 break;
1760                         case spv::OpSNegate:
1761                                 dst.emplace(i, -src.Int(i));
1762                                 break;
1763                         case spv::OpFNegate:
1764                                 dst.emplace(i, -src.Float(i));
1765                                 break;
1766                         case spv::OpConvertFToU:
1767                                 dst.emplace(i, SIMD::UInt(src.Float(i)));
1768                                 break;
1769                         case spv::OpConvertFToS:
1770                                 dst.emplace(i, SIMD::Int(src.Float(i)));
1771                                 break;
1772                         case spv::OpConvertSToF:
1773                                 dst.emplace(i, SIMD::Float(src.Int(i)));
1774                                 break;
1775                         case spv::OpConvertUToF:
1776                                 dst.emplace(i, SIMD::Float(src.UInt(i)));
1777                                 break;
1778                         case spv::OpBitcast:
1779                                 dst.emplace(i, src.Float(i));
1780                                 break;
1781                         case spv::OpIsInf:
1782                                 dst.emplace(i, IsInf(src.Float(i)));
1783                                 break;
1784                         case spv::OpIsNan:
1785                                 dst.emplace(i, IsNan(src.Float(i)));
1786                                 break;
1787                         case spv::OpDPdx:
1788                         case spv::OpDPdxCoarse:
1789                                 // Derivative instructions: FS invocations are laid out like so:
1790                                 //    0 1
1791                                 //    2 3
1792                                 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
1793                                 dst.emplace(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
1794                                 break;
1795                         case spv::OpDPdy:
1796                         case spv::OpDPdyCoarse:
1797                                 dst.emplace(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
1798                                 break;
1799                         case spv::OpFwidth:
1800                         case spv::OpFwidthCoarse:
1801                                 dst.emplace(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
1802                                                         + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
1803                                 break;
1804                         case spv::OpDPdxFine:
1805                         {
1806                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1807                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1808                                 SIMD::Float v = SIMD::Float(firstRow);
1809                                 v = Insert(v, secondRow, 2);
1810                                 v = Insert(v, secondRow, 3);
1811                                 dst.emplace(i, v);
1812                                 break;
1813                         }
1814                         case spv::OpDPdyFine:
1815                         {
1816                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1817                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1818                                 SIMD::Float v = SIMD::Float(firstColumn);
1819                                 v = Insert(v, secondColumn, 1);
1820                                 v = Insert(v, secondColumn, 3);
1821                                 dst.emplace(i, v);
1822                                 break;
1823                         }
1824                         case spv::OpFwidthFine:
1825                         {
1826                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1827                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1828                                 SIMD::Float dpdx = SIMD::Float(firstRow);
1829                                 dpdx = Insert(dpdx, secondRow, 2);
1830                                 dpdx = Insert(dpdx, secondRow, 3);
1831                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1832                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1833                                 SIMD::Float dpdy = SIMD::Float(firstColumn);
1834                                 dpdy = Insert(dpdy, secondColumn, 1);
1835                                 dpdy = Insert(dpdy, secondColumn, 3);
1836                                 dst.emplace(i, Abs(dpdx) + Abs(dpdy));
1837                                 break;
1838                         }
1839                         default:
1840                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
1841                         }
1842                 }
1843         }
1844
1845         void SpirvShader::EmitBinaryOp(InsnIterator insn, SpirvRoutine *routine) const
1846         {
1847                 auto &type = getType(insn.word(1));
1848                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1849                 auto &lhsType = getType(getObject(insn.word(3)).type);
1850                 auto lhs = GenericValue(this, routine, insn.word(3));
1851                 auto rhs = GenericValue(this, routine, insn.word(4));
1852
1853                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
1854                 {
1855                         switch (insn.opcode())
1856                         {
1857                         case spv::OpIAdd:
1858                                 dst.emplace(i, lhs.Int(i) + rhs.Int(i));
1859                                 break;
1860                         case spv::OpISub:
1861                                 dst.emplace(i, lhs.Int(i) - rhs.Int(i));
1862                                 break;
1863                         case spv::OpIMul:
1864                                 dst.emplace(i, lhs.Int(i) * rhs.Int(i));
1865                                 break;
1866                         case spv::OpSDiv:
1867                         {
1868                                 SIMD::Int a = lhs.Int(i);
1869                                 SIMD::Int b = rhs.Int(i);
1870                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
1871                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
1872                                 dst.emplace(i, a / b);
1873                                 break;
1874                         }
1875                         case spv::OpUDiv:
1876                         {
1877                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
1878                                 dst.emplace(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
1879                                 break;
1880                         }
1881                         case spv::OpSRem:
1882                         {
1883                                 SIMD::Int a = lhs.Int(i);
1884                                 SIMD::Int b = rhs.Int(i);
1885                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
1886                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
1887                                 dst.emplace(i, a % b);
1888                                 break;
1889                         }
1890                         case spv::OpSMod:
1891                         {
1892                                 SIMD::Int a = lhs.Int(i);
1893                                 SIMD::Int b = rhs.Int(i);
1894                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
1895                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
1896                                 auto mod = a % b;
1897                                 // If a and b have opposite signs, the remainder operation takes
1898                                 // the sign from a but OpSMod is supposed to take the sign of b.
1899                                 // Adding b will ensure that the result has the correct sign and
1900                                 // that it is still congruent to a modulo b.
1901                                 //
1902                                 // See also http://mathforum.org/library/drmath/view/52343.html
1903                                 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
1904                                 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
1905                                 dst.emplace(i, As<SIMD::Float>(fixedMod));
1906                                 break;
1907                         }
1908                         case spv::OpUMod:
1909                         {
1910                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
1911                                 dst.emplace(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
1912                                 break;
1913                         }
1914                         case spv::OpIEqual:
1915                         case spv::OpLogicalEqual:
1916                                 dst.emplace(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
1917                                 break;
1918                         case spv::OpINotEqual:
1919                         case spv::OpLogicalNotEqual:
1920                                 dst.emplace(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
1921                                 break;
1922                         case spv::OpUGreaterThan:
1923                                 dst.emplace(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
1924                                 break;
1925                         case spv::OpSGreaterThan:
1926                                 dst.emplace(i, CmpGT(lhs.Int(i), rhs.Int(i)));
1927                                 break;
1928                         case spv::OpUGreaterThanEqual:
1929                                 dst.emplace(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
1930                                 break;
1931                         case spv::OpSGreaterThanEqual:
1932                                 dst.emplace(i, CmpGE(lhs.Int(i), rhs.Int(i)));
1933                                 break;
1934                         case spv::OpULessThan:
1935                                 dst.emplace(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
1936                                 break;
1937                         case spv::OpSLessThan:
1938                                 dst.emplace(i, CmpLT(lhs.Int(i), rhs.Int(i)));
1939                                 break;
1940                         case spv::OpULessThanEqual:
1941                                 dst.emplace(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
1942                                 break;
1943                         case spv::OpSLessThanEqual:
1944                                 dst.emplace(i, CmpLE(lhs.Int(i), rhs.Int(i)));
1945                                 break;
1946                         case spv::OpFAdd:
1947                                 dst.emplace(i, lhs.Float(i) + rhs.Float(i));
1948                                 break;
1949                         case spv::OpFSub:
1950                                 dst.emplace(i, lhs.Float(i) - rhs.Float(i));
1951                                 break;
1952                         case spv::OpFMul:
1953                                 dst.emplace(i, lhs.Float(i) * rhs.Float(i));
1954                                 break;
1955                         case spv::OpFDiv:
1956                                 dst.emplace(i, lhs.Float(i) / rhs.Float(i));
1957                                 break;
1958                         case spv::OpFMod:
1959                                 // TODO(b/126873455): inaccurate for values greater than 2^24
1960                                 dst.emplace(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
1961                                 break;
1962                         case spv::OpFRem:
1963                                 dst.emplace(i, lhs.Float(i) % rhs.Float(i));
1964                                 break;
1965                         case spv::OpFOrdEqual:
1966                                 dst.emplace(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
1967                                 break;
1968                         case spv::OpFUnordEqual:
1969                                 dst.emplace(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
1970                                 break;
1971                         case spv::OpFOrdNotEqual:
1972                                 dst.emplace(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
1973                                 break;
1974                         case spv::OpFUnordNotEqual:
1975                                 dst.emplace(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
1976                                 break;
1977                         case spv::OpFOrdLessThan:
1978                                 dst.emplace(i, CmpLT(lhs.Float(i), rhs.Float(i)));
1979                                 break;
1980                         case spv::OpFUnordLessThan:
1981                                 dst.emplace(i, CmpULT(lhs.Float(i), rhs.Float(i)));
1982                                 break;
1983                         case spv::OpFOrdGreaterThan:
1984                                 dst.emplace(i, CmpGT(lhs.Float(i), rhs.Float(i)));
1985                                 break;
1986                         case spv::OpFUnordGreaterThan:
1987                                 dst.emplace(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
1988                                 break;
1989                         case spv::OpFOrdLessThanEqual:
1990                                 dst.emplace(i, CmpLE(lhs.Float(i), rhs.Float(i)));
1991                                 break;
1992                         case spv::OpFUnordLessThanEqual:
1993                                 dst.emplace(i, CmpULE(lhs.Float(i), rhs.Float(i)));
1994                                 break;
1995                         case spv::OpFOrdGreaterThanEqual:
1996                                 dst.emplace(i, CmpGE(lhs.Float(i), rhs.Float(i)));
1997                                 break;
1998                         case spv::OpFUnordGreaterThanEqual:
1999                                 dst.emplace(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2000                                 break;
2001                         case spv::OpShiftRightLogical:
2002                                 dst.emplace(i, lhs.UInt(i) >> rhs.UInt(i));
2003                                 break;
2004                         case spv::OpShiftRightArithmetic:
2005                                 dst.emplace(i, lhs.Int(i) >> rhs.Int(i));
2006                                 break;
2007                         case spv::OpShiftLeftLogical:
2008                                 dst.emplace(i, lhs.UInt(i) << rhs.UInt(i));
2009                                 break;
2010                         case spv::OpBitwiseOr:
2011                         case spv::OpLogicalOr:
2012                                 dst.emplace(i, lhs.UInt(i) | rhs.UInt(i));
2013                                 break;
2014                         case spv::OpBitwiseXor:
2015                                 dst.emplace(i, lhs.UInt(i) ^ rhs.UInt(i));
2016                                 break;
2017                         case spv::OpBitwiseAnd:
2018                         case spv::OpLogicalAnd:
2019                                 dst.emplace(i, lhs.UInt(i) & rhs.UInt(i));
2020                                 break;
2021                         case spv::OpSMulExtended:
2022                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2023                                 // In our flat view then, component i is the i'th component of the first member;
2024                                 // component i + N is the i'th component of the second member.
2025                                 dst.emplace(i, lhs.Int(i) * rhs.Int(i));
2026                                 dst.emplace(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2027                                 break;
2028                         case spv::OpUMulExtended:
2029                                 dst.emplace(i, lhs.UInt(i) * rhs.UInt(i));
2030                                 dst.emplace(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2031                                 break;
2032                         default:
2033                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2034                         }
2035                 }
2036         }
2037
2038         void SpirvShader::EmitDot(InsnIterator insn, SpirvRoutine *routine) const
2039         {
2040                 auto &type = getType(insn.word(1));
2041                 assert(type.sizeInComponents == 1);
2042                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2043                 auto &lhsType = getType(getObject(insn.word(3)).type);
2044                 auto lhs = GenericValue(this, routine, insn.word(3));
2045                 auto rhs = GenericValue(this, routine, insn.word(4));
2046
2047                 dst.emplace(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2048         }
2049
2050         void SpirvShader::EmitSelect(InsnIterator insn, SpirvRoutine *routine) const
2051         {
2052                 auto &type = getType(insn.word(1));
2053                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2054                 auto cond = GenericValue(this, routine, insn.word(3));
2055                 auto lhs = GenericValue(this, routine, insn.word(4));
2056                 auto rhs = GenericValue(this, routine, insn.word(5));
2057
2058                 for (auto i = 0u; i < type.sizeInComponents; i++)
2059                 {
2060                         dst.emplace(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i)));   // FIXME: IfThenElse()
2061                 }
2062         }
2063
2064         void SpirvShader::EmitExtendedInstruction(InsnIterator insn, SpirvRoutine *routine) const
2065         {
2066                 auto &type = getType(insn.word(1));
2067                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2068                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2069
2070                 switch (extInstIndex)
2071                 {
2072                 case GLSLstd450FAbs:
2073                 {
2074                         auto src = GenericValue(this, routine, insn.word(5));
2075                         for (auto i = 0u; i < type.sizeInComponents; i++)
2076                         {
2077                                 dst.emplace(i, Abs(src.Float(i)));
2078                         }
2079                         break;
2080                 }
2081                 case GLSLstd450SAbs:
2082                 {
2083                         auto src = GenericValue(this, routine, insn.word(5));
2084                         for (auto i = 0u; i < type.sizeInComponents; i++)
2085                         {
2086                                 dst.emplace(i, Abs(src.Int(i)));
2087                         }
2088                         break;
2089                 }
2090                 case GLSLstd450Cross:
2091                 {
2092                         auto lhs = GenericValue(this, routine, insn.word(5));
2093                         auto rhs = GenericValue(this, routine, insn.word(6));
2094                         dst.emplace(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2095                         dst.emplace(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2096                         dst.emplace(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2097                         break;
2098                 }
2099                 case GLSLstd450Floor:
2100                 {
2101                         auto src = GenericValue(this, routine, insn.word(5));
2102                         for (auto i = 0u; i < type.sizeInComponents; i++)
2103                         {
2104                                 dst.emplace(i, Floor(src.Float(i)));
2105                         }
2106                         break;
2107                 }
2108                 case GLSLstd450Trunc:
2109                 {
2110                         auto src = GenericValue(this, routine, insn.word(5));
2111                         for (auto i = 0u; i < type.sizeInComponents; i++)
2112                         {
2113                                 dst.emplace(i, Trunc(src.Float(i)));
2114                         }
2115                         break;
2116                 }
2117                 case GLSLstd450Ceil:
2118                 {
2119                         auto src = GenericValue(this, routine, insn.word(5));
2120                         for (auto i = 0u; i < type.sizeInComponents; i++)
2121                         {
2122                                 dst.emplace(i, Ceil(src.Float(i)));
2123                         }
2124                         break;
2125                 }
2126                 case GLSLstd450Fract:
2127                 {
2128                         auto src = GenericValue(this, routine, insn.word(5));
2129                         for (auto i = 0u; i < type.sizeInComponents; i++)
2130                         {
2131                                 dst.emplace(i, Frac(src.Float(i)));
2132                         }
2133                         break;
2134                 }
2135                 case GLSLstd450Round:
2136                 {
2137                         auto src = GenericValue(this, routine, insn.word(5));
2138                         for (auto i = 0u; i < type.sizeInComponents; i++)
2139                         {
2140                                 dst.emplace(i, Round(src.Float(i)));
2141                         }
2142                         break;
2143                 }
2144                 case GLSLstd450RoundEven:
2145                 {
2146                         auto src = GenericValue(this, routine, insn.word(5));
2147                         for (auto i = 0u; i < type.sizeInComponents; i++)
2148                         {
2149                                 auto x = Round(src.Float(i));
2150                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2151                                 dst.emplace(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2152                                                 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2153                         }
2154                         break;
2155                 }
2156                 case GLSLstd450FMin:
2157                 {
2158                         auto lhs = GenericValue(this, routine, insn.word(5));
2159                         auto rhs = GenericValue(this, routine, insn.word(6));
2160                         for (auto i = 0u; i < type.sizeInComponents; i++)
2161                         {
2162                                 dst.emplace(i, Min(lhs.Float(i), rhs.Float(i)));
2163                         }
2164                         break;
2165                 }
2166                 case GLSLstd450FMax:
2167                 {
2168                         auto lhs = GenericValue(this, routine, insn.word(5));
2169                         auto rhs = GenericValue(this, routine, insn.word(6));
2170                         for (auto i = 0u; i < type.sizeInComponents; i++)
2171                         {
2172                                 dst.emplace(i, Max(lhs.Float(i), rhs.Float(i)));
2173                         }
2174                         break;
2175                 }
2176                 case GLSLstd450SMin:
2177                 {
2178                         auto lhs = GenericValue(this, routine, insn.word(5));
2179                         auto rhs = GenericValue(this, routine, insn.word(6));
2180                         for (auto i = 0u; i < type.sizeInComponents; i++)
2181                         {
2182                                 dst.emplace(i, Min(lhs.Int(i), rhs.Int(i)));
2183                         }
2184                         break;
2185                 }
2186                 case GLSLstd450SMax:
2187                 {
2188                         auto lhs = GenericValue(this, routine, insn.word(5));
2189                         auto rhs = GenericValue(this, routine, insn.word(6));
2190                         for (auto i = 0u; i < type.sizeInComponents; i++)
2191                         {
2192                                 dst.emplace(i, Max(lhs.Int(i), rhs.Int(i)));
2193                         }
2194                         break;
2195                 }
2196                 case GLSLstd450UMin:
2197                 {
2198                         auto lhs = GenericValue(this, routine, insn.word(5));
2199                         auto rhs = GenericValue(this, routine, insn.word(6));
2200                         for (auto i = 0u; i < type.sizeInComponents; i++)
2201                         {
2202                                 dst.emplace(i, Min(lhs.UInt(i), rhs.UInt(i)));
2203                         }
2204                         break;
2205                 }
2206                 case GLSLstd450UMax:
2207                 {
2208                         auto lhs = GenericValue(this, routine, insn.word(5));
2209                         auto rhs = GenericValue(this, routine, insn.word(6));
2210                         for (auto i = 0u; i < type.sizeInComponents; i++)
2211                         {
2212                                 dst.emplace(i, Max(lhs.UInt(i), rhs.UInt(i)));
2213                         }
2214                         break;
2215                 }
2216                 case GLSLstd450Step:
2217                 {
2218                         auto edge = GenericValue(this, routine, insn.word(5));
2219                         auto x = GenericValue(this, routine, insn.word(6));
2220                         for (auto i = 0u; i < type.sizeInComponents; i++)
2221                         {
2222                                 dst.emplace(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2223                         }
2224                         break;
2225                 }
2226                 case GLSLstd450SmoothStep:
2227                 {
2228                         auto edge0 = GenericValue(this, routine, insn.word(5));
2229                         auto edge1 = GenericValue(this, routine, insn.word(6));
2230                         auto x = GenericValue(this, routine, insn.word(7));
2231                         for (auto i = 0u; i < type.sizeInComponents; i++)
2232                         {
2233                                 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2234                                                 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2235                                 dst.emplace(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2236                         }
2237                         break;
2238                 }
2239                 case GLSLstd450FMix:
2240                 {
2241                         auto x = GenericValue(this, routine, insn.word(5));
2242                         auto y = GenericValue(this, routine, insn.word(6));
2243                         auto a = GenericValue(this, routine, insn.word(7));
2244                         for (auto i = 0u; i < type.sizeInComponents; i++)
2245                         {
2246                                 dst.emplace(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2247                         }
2248                         break;
2249                 }
2250                 case GLSLstd450FClamp:
2251                 {
2252                         auto x = GenericValue(this, routine, insn.word(5));
2253                         auto minVal = GenericValue(this, routine, insn.word(6));
2254                         auto maxVal = GenericValue(this, routine, insn.word(7));
2255                         for (auto i = 0u; i < type.sizeInComponents; i++)
2256                         {
2257                                 dst.emplace(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2258                         }
2259                         break;
2260                 }
2261                 case GLSLstd450SClamp:
2262                 {
2263                         auto x = GenericValue(this, routine, insn.word(5));
2264                         auto minVal = GenericValue(this, routine, insn.word(6));
2265                         auto maxVal = GenericValue(this, routine, insn.word(7));
2266                         for (auto i = 0u; i < type.sizeInComponents; i++)
2267                         {
2268                                 dst.emplace(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2269                         }
2270                         break;
2271                 }
2272                 case GLSLstd450UClamp:
2273                 {
2274                         auto x = GenericValue(this, routine, insn.word(5));
2275                         auto minVal = GenericValue(this, routine, insn.word(6));
2276                         auto maxVal = GenericValue(this, routine, insn.word(7));
2277                         for (auto i = 0u; i < type.sizeInComponents; i++)
2278                         {
2279                                 dst.emplace(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2280                         }
2281                         break;
2282                 }
2283                 case GLSLstd450FSign:
2284                 {
2285                         auto src = GenericValue(this, routine, insn.word(5));
2286                         for (auto i = 0u; i < type.sizeInComponents; i++)
2287                         {
2288                                 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2289                                 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2290                                 dst.emplace(i, neg | pos);
2291                         }
2292                         break;
2293                 }
2294                 case GLSLstd450SSign:
2295                 {
2296                         auto src = GenericValue(this, routine, insn.word(5));
2297                         for (auto i = 0u; i < type.sizeInComponents; i++)
2298                         {
2299                                 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2300                                 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2301                                 dst.emplace(i, neg | pos);
2302                         }
2303                         break;
2304                 }
2305                 case GLSLstd450Reflect:
2306                 {
2307                         auto I = GenericValue(this, routine, insn.word(5));
2308                         auto N = GenericValue(this, routine, insn.word(6));
2309
2310                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2311
2312                         for (auto i = 0u; i < type.sizeInComponents; i++)
2313                         {
2314                                 dst.emplace(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2315                         }
2316                         break;
2317                 }
2318                 case GLSLstd450Refract:
2319                 {
2320                         auto I = GenericValue(this, routine, insn.word(5));
2321                         auto N = GenericValue(this, routine, insn.word(6));
2322                         auto eta = GenericValue(this, routine, insn.word(7));
2323
2324                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2325                         SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2326                         SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2327                         SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2328
2329                         for (auto i = 0u; i < type.sizeInComponents; i++)
2330                         {
2331                                 dst.emplace(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2332                         }
2333                         break;
2334                 }
2335                 case GLSLstd450FaceForward:
2336                 {
2337                         auto N = GenericValue(this, routine, insn.word(5));
2338                         auto I = GenericValue(this, routine, insn.word(6));
2339                         auto Nref = GenericValue(this, routine, insn.word(7));
2340
2341                         SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2342                         SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2343
2344                         for (auto i = 0u; i < type.sizeInComponents; i++)
2345                         {
2346                                 auto n = N.Float(i);
2347                                 dst.emplace(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2348                         }
2349                         break;
2350                 }
2351                 case GLSLstd450Length:
2352                 {
2353                         auto x = GenericValue(this, routine, insn.word(5));
2354                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2355
2356                         dst.emplace(0, Sqrt(d));
2357                         break;
2358                 }
2359                 case GLSLstd450Normalize:
2360                 {
2361                         auto x = GenericValue(this, routine, insn.word(5));
2362                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2363                         SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2364
2365                         for (auto i = 0u; i < type.sizeInComponents; i++)
2366                         {
2367                                 dst.emplace(i, invLength * x.Float(i));
2368                         }
2369                         break;
2370                 }
2371                 case GLSLstd450Distance:
2372                 {
2373                         auto p0 = GenericValue(this, routine, insn.word(5));
2374                         auto p1 = GenericValue(this, routine, insn.word(6));
2375                         auto p0Type = getType(getObject(insn.word(5)).type);
2376
2377                         // sqrt(dot(p0-p1, p0-p1))
2378                         SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2379
2380                         for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2381                         {
2382                                 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2383                         }
2384
2385                         dst.emplace(0, Sqrt(d));
2386                         break;
2387                 }
2388                 default:
2389                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2390                 }
2391         }
2392
2393         SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2394         {
2395                 SIMD::Float d = x.Float(0) * y.Float(0);
2396
2397                 for (auto i = 1u; i < numComponents; i++)
2398                 {
2399                         d += x.Float(i) * y.Float(i);
2400                 }
2401
2402                 return d;
2403         }
2404
2405         void SpirvShader::EmitAny(InsnIterator insn, SpirvRoutine *routine) const
2406         {
2407                 auto &type = getType(insn.word(1));
2408                 assert(type.sizeInComponents == 1);
2409                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2410                 auto &srcType = getType(getObject(insn.word(3)).type);
2411                 auto src = GenericValue(this, routine, insn.word(3));
2412
2413                 SIMD::UInt result = src.UInt(0);
2414
2415                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2416                 {
2417                         result |= src.UInt(i);
2418                 }
2419
2420                 dst.emplace(0, result);
2421         }
2422
2423         void SpirvShader::EmitAll(InsnIterator insn, SpirvRoutine *routine) const
2424         {
2425                 auto &type = getType(insn.word(1));
2426                 assert(type.sizeInComponents == 1);
2427                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2428                 auto &srcType = getType(getObject(insn.word(3)).type);
2429                 auto src = GenericValue(this, routine, insn.word(3));
2430
2431                 SIMD::UInt result = src.UInt(0);
2432
2433                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2434                 {
2435                         result &= src.UInt(i);
2436                 }
2437
2438                 dst.emplace(0, result);
2439         }
2440
2441         void SpirvShader::EmitBranch(InsnIterator insn, SpirvRoutine *routine) const
2442         {
2443                 auto blockId = Block::ID(insn.word(1));
2444                 EmitBlock(routine, getBlock(blockId));
2445         }
2446
2447         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2448         {
2449                 for (auto insn : *this)
2450                 {
2451                         switch (insn.opcode())
2452                         {
2453                         case spv::OpVariable:
2454                         {
2455                                 Object::ID resultId = insn.word(2);
2456                                 auto &object = getObject(resultId);
2457                                 auto &objectTy = getType(object.type);
2458                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2459                                 {
2460                                         auto &dst = routine->getValue(resultId);
2461                                         int offset = 0;
2462                                         VisitInterface(resultId,
2463                                                                    [&](Decorations const &d, AttribType type) {
2464                                                                            auto scalarSlot = d.Location << 2 | d.Component;
2465                                                                            routine->outputs[scalarSlot] = dst[offset++];
2466                                                                    });
2467                                 }
2468                                 break;
2469                         }
2470                         default:
2471                                 break;
2472                         }
2473                 }
2474         }
2475
2476         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
2477                 pipelineLayout(pipelineLayout)
2478         {
2479         }
2480
2481 }