src/Pipeline/SpirvShader.cpp

   1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //    http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include <spirv/unified1/spirv.hpp>
  16 #include <spirv/unified1/GLSL.std.450.h>
  17 #include "SpirvShader.hpp"
  18 #include "System/Math.hpp"
  19 #include "Vulkan/VkBuffer.hpp"
  20 #include "Vulkan/VkDebug.hpp"
  21 #include "Vulkan/VkPipelineLayout.hpp"
  22 #include "Device/Config.hpp"
  23
  24 #include <queue>
  25
  26 #ifdef Bool
  27 #undef Bool // b/127920555
  28 #endif
  29
  30 namespace sw
  31 {
  32         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
  33
  34         SpirvShader::SpirvShader(InsnStore const &insns)
  35                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
  36                           outputs{MAX_INTERFACE_COMPONENTS},
  37                           serialID{serialCounter++}, modes{}
  38         {
  39                 ASSERT(insns.size() > 0);
  40
  41                 // Simplifying assumptions (to be satisfied by earlier transformations)
  42                 // - There is exactly one entrypoint in the module, and it's the one we want
  43                 // - The only input/output OpVariables present are those used by the entrypoint
  44
  45                 Block::ID currentBlock;
  46                 InsnIterator blockStart;
  47
  48                 for (auto insn : *this)
  49                 {
  50                         switch (insn.opcode())
  51                         {
  52                         case spv::OpExecutionMode:
  53                                 ProcessExecutionMode(insn);
  54                                 break;
  55
  56                         case spv::OpDecorate:
  57                         {
  58                                 TypeOrObjectID targetId = insn.word(1);
  59                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
  60                                 decorations[targetId].Apply(
  61                                                 decoration,
  62                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
  63
  64                                 if (decoration == spv::DecorationCentroid)
  65                                         modes.NeedsCentroid = true;
  66                                 break;
  67                         }
  68
  69                         case spv::OpMemberDecorate:
  70                         {
  71                                 Type::ID targetId = insn.word(1);
  72                                 auto memberIndex = insn.word(2);
  73                                 auto &d = memberDecorations[targetId];
  74                                 if (memberIndex >= d.size())
  75                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
  76                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
  77                                 d[memberIndex].Apply(
  78                                                 decoration,
  79                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
  80
  81                                 if (decoration == spv::DecorationCentroid)
  82                                         modes.NeedsCentroid = true;
  83                                 break;
  84                         }
  85
  86                         case spv::OpDecorationGroup:
  87                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
  88                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
  89                                 // we might think about introducing this as a real Object.
  90                                 break;
  91
  92                         case spv::OpGroupDecorate:
  93                         {
  94                                 auto const &srcDecorations = decorations[insn.word(1)];
  95                                 for (auto i = 2u; i < insn.wordCount(); i++)
  96                                 {
  97                                         // remaining operands are targets to apply the group to.
  98                                         decorations[insn.word(i)].Apply(srcDecorations);
  99                                 }
 100                                 break;
 101                         }
 102
 103                         case spv::OpGroupMemberDecorate:
 104                         {
 105                                 auto const &srcDecorations = decorations[insn.word(1)];
 106                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
 107                                 {
 108                                         // remaining operands are pairs of <id>, literal for members to apply to.
 109                                         auto &d = memberDecorations[insn.word(i)];
 110                                         auto memberIndex = insn.word(i + 1);
 111                                         if (memberIndex >= d.size())
 112                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
 113                                         d[memberIndex].Apply(srcDecorations);
 114                                 }
 115                                 break;
 116                         }
 117
 118                         case spv::OpLabel:
 119                         {
 120                                 ASSERT(currentBlock.value() == 0);
 121                                 currentBlock = Block::ID(insn.word(1));
 122                                 blockStart = insn;
 123                                 break;
 124                         }
 125
 126                         // Branch Instructions (subset of Termination Instructions):
 127                         case spv::OpBranch:
 128                         case spv::OpBranchConditional:
 129                         case spv::OpSwitch:
 130                         case spv::OpReturn:
 131                         // fallthrough
 132
 133                         // Termination instruction:
 134                         case spv::OpKill:
 135                         case spv::OpUnreachable:
 136                         {
 137                                 ASSERT(currentBlock.value() != 0);
 138                                 auto blockEnd = insn; blockEnd++;
 139                                 blocks[currentBlock] = Block(blockStart, blockEnd);
 140                                 currentBlock = Block::ID(0);
 141
 142                                 if (insn.opcode() == spv::OpKill)
 143                                 {
 144                                         modes.ContainsKill = true;
 145                                 }
 146                                 break;
 147                         }
 148
 149                         case spv::OpSelectionMerge:
 150                                 break; // Nothing to do in analysis pass.
 151
 152                         case spv::OpTypeVoid:
 153                         case spv::OpTypeBool:
 154                         case spv::OpTypeInt:
 155                         case spv::OpTypeFloat:
 156                         case spv::OpTypeVector:
 157                         case spv::OpTypeMatrix:
 158                         case spv::OpTypeImage:
 159                         case spv::OpTypeSampler:
 160                         case spv::OpTypeSampledImage:
 161                         case spv::OpTypeArray:
 162                         case spv::OpTypeRuntimeArray:
 163                         case spv::OpTypeStruct:
 164                         case spv::OpTypePointer:
 165                         case spv::OpTypeFunction:
 166                                 DeclareType(insn);
 167                                 break;
 168
 169                         case spv::OpVariable:
 170                         {
 171                                 Type::ID typeId = insn.word(1);
 172                                 Object::ID resultId = insn.word(2);
 173                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
 174                                 if (insn.wordCount() > 4)
 175                                         UNIMPLEMENTED("Variable initializers not yet supported");
 176
 177                                 auto &object = defs[resultId];
 178                                 object.kind = Object::Kind::Variable;
 179                                 object.definition = insn;
 180                                 object.type = typeId;
 181                                 object.pointerBase = insn.word(2);      // base is itself
 182
 183                                 ASSERT(getType(typeId).storageClass == storageClass);
 184
 185                                 switch (storageClass)
 186                                 {
 187                                 case spv::StorageClassInput:
 188                                 case spv::StorageClassOutput:
 189                                         ProcessInterfaceVariable(object);
 190                                         break;
 191                                 case spv::StorageClassUniform:
 192                                 case spv::StorageClassStorageBuffer:
 193                                 case spv::StorageClassPushConstant:
 194                                         object.kind = Object::Kind::PhysicalPointer;
 195                                         break;
 196
 197                                 case spv::StorageClassPrivate:
 198                                 case spv::StorageClassFunction:
 199                                         break; // Correctly handled.
 200
 201                                 case spv::StorageClassUniformConstant:
 202                                 case spv::StorageClassWorkgroup:
 203                                 case spv::StorageClassCrossWorkgroup:
 204                                 case spv::StorageClassGeneric:
 205                                 case spv::StorageClassAtomicCounter:
 206                                 case spv::StorageClassImage:
 207                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
 208                                         break;
 209
 210                                 default:
 211                                         UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
 212                                         break;
 213                                 }
 214                                 break;
 215                         }
 216
 217                         case spv::OpConstant:
 218                                 CreateConstant(insn).constantValue[0] = insn.word(3);
 219                                 break;
 220                         case spv::OpConstantFalse:
 221                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
 222                                 break;
 223                         case spv::OpConstantTrue:
 224                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
 225                                 break;
 226                         case spv::OpConstantNull:
 227                         case spv::OpUndef:
 228                         {
 229                                 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
 230                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
 231                                 auto &object = CreateConstant(insn);
 232                                 auto &objectTy = getType(object.type);
 233                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
 234                                 {
 235                                         object.constantValue[i] = 0;
 236                                 }
 237                                 break;
 238                         }
 239                         case spv::OpConstantComposite:
 240                         {
 241                                 auto &object = CreateConstant(insn);
 242                                 auto offset = 0u;
 243                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
 244                                 {
 245                                         auto &constituent = getObject(insn.word(i + 3));
 246                                         auto &constituentTy = getType(constituent.type);
 247                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
 248                                                 object.constantValue[offset++] = constituent.constantValue[j];
 249                                 }
 250
 251                                 auto objectId = Object::ID(insn.word(2));
 252                                 auto decorationsIt = decorations.find(objectId);
 253                                 if (decorationsIt != decorations.end() &&
 254                                         decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
 255                                 {
 256                                         // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
 257                                         // Decorating an object with the WorkgroupSize built-in
 258                                         // decoration will make that object contain the dimensions
 259                                         // of a local workgroup. If an object is decorated with the
 260                                         // WorkgroupSize decoration, this must take precedence over
 261                                         // any execution mode set for LocalSize.
 262                                         // The object decorated with WorkgroupSize must be declared
 263                                         // as a three-component vector of 32-bit integers.
 264                                         ASSERT(getType(object.type).sizeInComponents == 3);
 265                                         modes.WorkgroupSizeX = object.constantValue[0];
 266                                         modes.WorkgroupSizeY = object.constantValue[1];
 267                                         modes.WorkgroupSizeZ = object.constantValue[2];
 268                                 }
 269                                 break;
 270                         }
 271
 272                         case spv::OpCapability:
 273                                 break; // Various capabilities will be declared, but none affect our code generation at this point.
 274                         case spv::OpMemoryModel:
 275                                 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
 276
 277                         case spv::OpEntryPoint:
 278                                 break;
 279                         case spv::OpFunction:
 280                                 ASSERT(mainBlockId.value() == 0); // Multiple functions found
 281                                 // Scan forward to find the function's label.
 282                                 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
 283                                 {
 284                                         switch (it.opcode())
 285                                         {
 286                                         case spv::OpFunction:
 287                                         case spv::OpFunctionParameter:
 288                                                 break;
 289                                         case spv::OpLabel:
 290                                                 mainBlockId = Block::ID(it.word(1));
 291                                                 break;
 292                                         default:
 293                                                 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
 294                                         }
 295                                 }
 296                                 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
 297                                 break;
 298                         case spv::OpFunctionEnd:
 299                                 // Due to preprocessing, the entrypoint and its function provide no value.
 300                                 break;
 301                         case spv::OpExtInstImport:
 302                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
 303                                 // Valid shaders will not attempt to import any other instruction sets.
 304                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
 305                                 {
 306                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
 307                                 }
 308                                 break;
 309                         case spv::OpName:
 310                         case spv::OpMemberName:
 311                         case spv::OpSource:
 312                         case spv::OpSourceContinued:
 313                         case spv::OpSourceExtension:
 314                         case spv::OpLine:
 315                         case spv::OpNoLine:
 316                         case spv::OpModuleProcessed:
 317                         case spv::OpString:
 318                                 // No semantic impact
 319                                 break;
 320
 321                         case spv::OpFunctionParameter:
 322                         case spv::OpFunctionCall:
 323                         case spv::OpSpecConstant:
 324                         case spv::OpSpecConstantComposite:
 325                         case spv::OpSpecConstantFalse:
 326                         case spv::OpSpecConstantOp:
 327                         case spv::OpSpecConstantTrue:
 328                                 // These should have all been removed by preprocessing passes. If we see them here,
 329                                 // our assumptions are wrong and we will probably generate wrong code.
 330                                 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
 331                                 break;
 332
 333                         case spv::OpFConvert:
 334                         case spv::OpSConvert:
 335                         case spv::OpUConvert:
 336                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
 337                                 break;
 338
 339                         case spv::OpLoad:
 340                         case spv::OpAccessChain:
 341                         case spv::OpInBoundsAccessChain:
 342                         case spv::OpCompositeConstruct:
 343                         case spv::OpCompositeInsert:
 344                         case spv::OpCompositeExtract:
 345                         case spv::OpVectorShuffle:
 346                         case spv::OpVectorTimesScalar:
 347                         case spv::OpVectorExtractDynamic:
 348                         case spv::OpVectorInsertDynamic:
 349                         case spv::OpNot: // Unary ops
 350                         case spv::OpSNegate:
 351                         case spv::OpFNegate:
 352                         case spv::OpLogicalNot:
 353                         case spv::OpIAdd: // Binary ops
 354                         case spv::OpISub:
 355                         case spv::OpIMul:
 356                         case spv::OpSDiv:
 357                         case spv::OpUDiv:
 358                         case spv::OpFAdd:
 359                         case spv::OpFSub:
 360                         case spv::OpFMul:
 361                         case spv::OpFDiv:
 362                         case spv::OpFMod:
 363                         case spv::OpFRem:
 364                         case spv::OpFOrdEqual:
 365                         case spv::OpFUnordEqual:
 366                         case spv::OpFOrdNotEqual:
 367                         case spv::OpFUnordNotEqual:
 368                         case spv::OpFOrdLessThan:
 369                         case spv::OpFUnordLessThan:
 370                         case spv::OpFOrdGreaterThan:
 371                         case spv::OpFUnordGreaterThan:
 372                         case spv::OpFOrdLessThanEqual:
 373                         case spv::OpFUnordLessThanEqual:
 374                         case spv::OpFOrdGreaterThanEqual:
 375                         case spv::OpFUnordGreaterThanEqual:
 376                         case spv::OpSMod:
 377                         case spv::OpSRem:
 378                         case spv::OpUMod:
 379                         case spv::OpIEqual:
 380                         case spv::OpINotEqual:
 381                         case spv::OpUGreaterThan:
 382                         case spv::OpSGreaterThan:
 383                         case spv::OpUGreaterThanEqual:
 384                         case spv::OpSGreaterThanEqual:
 385                         case spv::OpULessThan:
 386                         case spv::OpSLessThan:
 387                         case spv::OpULessThanEqual:
 388                         case spv::OpSLessThanEqual:
 389                         case spv::OpShiftRightLogical:
 390                         case spv::OpShiftRightArithmetic:
 391                         case spv::OpShiftLeftLogical:
 392                         case spv::OpBitwiseOr:
 393                         case spv::OpBitwiseXor:
 394                         case spv::OpBitwiseAnd:
 395                         case spv::OpLogicalOr:
 396                         case spv::OpLogicalAnd:
 397                         case spv::OpLogicalEqual:
 398                         case spv::OpLogicalNotEqual:
 399                         case spv::OpUMulExtended:
 400                         case spv::OpSMulExtended:
 401                         case spv::OpDot:
 402                         case spv::OpConvertFToU:
 403                         case spv::OpConvertFToS:
 404                         case spv::OpConvertSToF:
 405                         case spv::OpConvertUToF:
 406                         case spv::OpBitcast:
 407                         case spv::OpSelect:
 408                         case spv::OpExtInst:
 409                         case spv::OpIsInf:
 410                         case spv::OpIsNan:
 411                         case spv::OpAny:
 412                         case spv::OpAll:
 413                         case spv::OpDPdx:
 414                         case spv::OpDPdxCoarse:
 415                         case spv::OpDPdy:
 416                         case spv::OpDPdyCoarse:
 417                         case spv::OpFwidth:
 418                         case spv::OpFwidthCoarse:
 419                         case spv::OpDPdxFine:
 420                         case spv::OpDPdyFine:
 421                         case spv::OpFwidthFine:
 422                         case spv::OpAtomicLoad:
 423                         case spv::OpPhi:
 424                                 // Instructions that yield an intermediate value
 425                         {
 426                                 Type::ID typeId = insn.word(1);
 427                                 Object::ID resultId = insn.word(2);
 428                                 auto &object = defs[resultId];
 429                                 object.type = typeId;
 430                                 object.kind = Object::Kind::Value;
 431                                 object.definition = insn;
 432
 433                                 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
 434                                 {
 435                                         // interior ptr has two parts:
 436                                         // - logical base ptr, common across all lanes and known at compile time
 437                                         // - per-lane offset
 438                                         Object::ID baseId = insn.word(3);
 439                                         object.pointerBase = getObject(baseId).pointerBase;
 440                                 }
 441                                 break;
 442                         }
 443
 444                         case spv::OpStore:
 445                         case spv::OpAtomicStore:
 446                                 // Don't need to do anything during analysis pass
 447                                 break;
 448
 449                         default:
 450                                 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
 451                         }
 452                 }
 453
 454                 // Assign all Block::ins
 455                 for (auto &it : blocks)
 456                 {
 457                         auto &blockId = it.first;
 458                         auto &block = it.second;
 459                         for (auto &outId : block.outs)
 460                         {
 461                                 auto outIt = blocks.find(outId);
 462                                 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
 463                                 auto &out = outIt->second;
 464                                 out.ins.emplace(blockId);
 465                         }
 466                 }
 467         }
 468
 469         void SpirvShader::DeclareType(InsnIterator insn)
 470         {
 471                 Type::ID resultId = insn.word(1);
 472
 473                 auto &type = types[resultId];
 474                 type.definition = insn;
 475                 type.sizeInComponents = ComputeTypeSize(insn);
 476
 477                 // A structure is a builtin block if it has a builtin
 478                 // member. All members of such a structure are builtins.
 479                 switch (insn.opcode())
 480                 {
 481                 case spv::OpTypeStruct:
 482                 {
 483                         auto d = memberDecorations.find(resultId);
 484                         if (d != memberDecorations.end())
 485                         {
 486                                 for (auto &m : d->second)
 487                                 {
 488                                         if (m.HasBuiltIn)
 489                                         {
 490                                                 type.isBuiltInBlock = true;
 491                                                 break;
 492                                         }
 493                                 }
 494                         }
 495                         break;
 496                 }
 497                 case spv::OpTypePointer:
 498                 {
 499                         Type::ID elementTypeId = insn.word(3);
 500                         type.element = elementTypeId;
 501                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
 502                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
 503                         break;
 504                 }
 505                 case spv::OpTypeVector:
 506                 case spv::OpTypeMatrix:
 507                 case spv::OpTypeArray:
 508                 case spv::OpTypeRuntimeArray:
 509                 {
 510                         Type::ID elementTypeId = insn.word(2);
 511                         type.element = elementTypeId;
 512                         break;
 513                 }
 514                 default:
 515                         break;
 516                 }
 517         }
 518
 519         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
 520         {
 521                 Type::ID typeId = insn.word(1);
 522                 Object::ID resultId = insn.word(2);
 523                 auto &object = defs[resultId];
 524                 auto &objectTy = getType(typeId);
 525                 object.type = typeId;
 526                 object.kind = Object::Kind::Constant;
 527                 object.definition = insn;
 528                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
 529                 return object;
 530         }
 531
 532         void SpirvShader::ProcessInterfaceVariable(Object &object)
 533         {
 534                 auto &objectTy = getType(object.type);
 535                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
 536
 537                 ASSERT(objectTy.opcode() == spv::OpTypePointer);
 538                 auto pointeeTy = getType(objectTy.element);
 539
 540                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
 541                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
 542
 543                 ASSERT(object.opcode() == spv::OpVariable);
 544                 Object::ID resultId = object.definition.word(2);
 545
 546                 if (objectTy.isBuiltInBlock)
 547                 {
 548                         // walk the builtin block, registering each of its members separately.
 549                         auto m = memberDecorations.find(objectTy.element);
 550                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
 551                         auto &structType = pointeeTy.definition;
 552                         auto offset = 0u;
 553                         auto word = 2u;
 554                         for (auto &member : m->second)
 555                         {
 556                                 auto &memberType = getType(structType.word(word));
 557
 558                                 if (member.HasBuiltIn)
 559                                 {
 560                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
 561                                 }
 562
 563                                 offset += memberType.sizeInComponents;
 564                                 ++word;
 565                         }
 566                         return;
 567                 }
 568
 569                 auto d = decorations.find(resultId);
 570                 if (d != decorations.end() && d->second.HasBuiltIn)
 571                 {
 572                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
 573                 }
 574                 else
 575                 {
 576                         object.kind = Object::Kind::InterfaceVariable;
 577                         VisitInterface(resultId,
 578                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
 579                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
 580                                                            auto scalarSlot = (d.Location << 2) | d.Component;
 581                                                            ASSERT(scalarSlot >= 0 &&
 582                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
 583
 584                                                            auto &slot = userDefinedInterface[scalarSlot];
 585                                                            slot.Type = type;
 586                                                            slot.Flat = d.Flat;
 587                                                            slot.NoPerspective = d.NoPerspective;
 588                                                            slot.Centroid = d.Centroid;
 589                                                    });
 590                 }
 591         }
 592
 593         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
 594         {
 595                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
 596                 switch (mode)
 597                 {
 598                 case spv::ExecutionModeEarlyFragmentTests:
 599                         modes.EarlyFragmentTests = true;
 600                         break;
 601                 case spv::ExecutionModeDepthReplacing:
 602                         modes.DepthReplacing = true;
 603                         break;
 604                 case spv::ExecutionModeDepthGreater:
 605                         modes.DepthGreater = true;
 606                         break;
 607                 case spv::ExecutionModeDepthLess:
 608                         modes.DepthLess = true;
 609                         break;
 610                 case spv::ExecutionModeDepthUnchanged:
 611                         modes.DepthUnchanged = true;
 612                         break;
 613                 case spv::ExecutionModeLocalSize:
 614                         modes.WorkgroupSizeX = insn.word(3);
 615                         modes.WorkgroupSizeY = insn.word(4);
 616                         modes.WorkgroupSizeZ = insn.word(5);
 617                         break;
 618                 case spv::ExecutionModeOriginUpperLeft:
 619                         // This is always the case for a Vulkan shader. Do nothing.
 620                         break;
 621                 default:
 622                         UNIMPLEMENTED("No other execution modes are permitted");
 623                 }
 624         }
 625
 626         uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
 627         {
 628                 // Types are always built from the bottom up (with the exception of forward ptrs, which
 629                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
 630                 // already been described (and so their sizes determined)
 631                 switch (insn.opcode())
 632                 {
 633                 case spv::OpTypeVoid:
 634                 case spv::OpTypeSampler:
 635                 case spv::OpTypeImage:
 636                 case spv::OpTypeSampledImage:
 637                 case spv::OpTypeFunction:
 638                 case spv::OpTypeRuntimeArray:
 639                         // Objects that don't consume any space.
 640                         // Descriptor-backed objects currently only need exist at compile-time.
 641                         // Runtime arrays don't appear in places where their size would be interesting
 642                         return 0;
 643
 644                 case spv::OpTypeBool:
 645                 case spv::OpTypeFloat:
 646                 case spv::OpTypeInt:
 647                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
 648                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
 649                         return 1;
 650
 651                 case spv::OpTypeVector:
 652                 case spv::OpTypeMatrix:
 653                         // Vectors and matrices both consume element count * element size.
 654                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
 655
 656                 case spv::OpTypeArray:
 657                 {
 658                         // Element count * element size. Array sizes come from constant ids.
 659                         auto arraySize = GetConstantInt(insn.word(3));
 660                         return getType(insn.word(2)).sizeInComponents * arraySize;
 661                 }
 662
 663                 case spv::OpTypeStruct:
 664                 {
 665                         uint32_t size = 0;
 666                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
 667                         {
 668                                 size += getType(insn.word(i)).sizeInComponents;
 669                         }
 670                         return size;
 671                 }
 672
 673                 case spv::OpTypePointer:
 674                         // Runtime representation of a pointer is a per-lane index.
 675                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
 676                         return 1;
 677
 678                 default:
 679                         // Some other random insn.
 680                         UNIMPLEMENTED("Only types are supported");
 681                         return 0;
 682                 }
 683         }
 684
 685         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
 686         {
 687                 switch (storageClass)
 688                 {
 689                 case spv::StorageClassUniform:
 690                 case spv::StorageClassStorageBuffer:
 691                 case spv::StorageClassPushConstant:
 692                         return false;
 693                 default:
 694                         return true;
 695                 }
 696         }
 697
 698         template<typename F>
 699         int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
 700         {
 701                 // Recursively walks variable definition and its type tree, taking into account
 702                 // any explicit Location or Component decorations encountered; where explicit
 703                 // Locations or Components are not specified, assigns them sequentially.
 704                 // Collected decorations are carried down toward the leaves and across
 705                 // siblings; Effect of decorations intentionally does not flow back up the tree.
 706                 //
 707                 // F is a functor to be called with the effective decoration set for every component.
 708                 //
 709                 // Returns the next available location, and calls f().
 710
 711                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
 712
 713                 ApplyDecorationsForId(&d, id);
 714
 715                 auto const &obj = getType(id);
 716                 switch(obj.opcode())
 717                 {
 718                 case spv::OpTypePointer:
 719                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
 720                 case spv::OpTypeMatrix:
 721                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
 722                         {
 723                                 // consumes same components of N consecutive locations
 724                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
 725                         }
 726                         return d.Location;
 727                 case spv::OpTypeVector:
 728                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
 729                         {
 730                                 // consumes N consecutive components in the same location
 731                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
 732                         }
 733                         return d.Location + 1;
 734                 case spv::OpTypeFloat:
 735                         f(d, ATTRIBTYPE_FLOAT);
 736                         return d.Location + 1;
 737                 case spv::OpTypeInt:
 738                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
 739                         return d.Location + 1;
 740                 case spv::OpTypeBool:
 741                         f(d, ATTRIBTYPE_UINT);
 742                         return d.Location + 1;
 743                 case spv::OpTypeStruct:
 744                 {
 745                         // iterate over members, which may themselves have Location/Component decorations
 746                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
 747                         {
 748                                 ApplyDecorationsForIdMember(&d, id, i);
 749                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
 750                                 d.Component = 0;    // Implicit locations always have component=0
 751                         }
 752                         return d.Location;
 753                 }
 754                 case spv::OpTypeArray:
 755                 {
 756                         auto arraySize = GetConstantInt(obj.definition.word(3));
 757                         for (auto i = 0u; i < arraySize; i++)
 758                         {
 759                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
 760                         }
 761                         return d.Location;
 762                 }
 763                 default:
 764                         // Intentionally partial; most opcodes do not participate in type hierarchies
 765                         return 0;
 766                 }
 767         }
 768
 769         template<typename F>
 770         void SpirvShader::VisitInterface(Object::ID id, F f) const
 771         {
 772                 // Walk a variable definition and call f for each component in it.
 773                 Decorations d{};
 774                 ApplyDecorationsForId(&d, id);
 775
 776                 auto def = getObject(id).definition;
 777                 ASSERT(def.opcode() == spv::OpVariable);
 778                 VisitInterfaceInner<F>(def.word(1), d, f);
 779         }
 780
 781         SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
 782         {
 783                 // Produce a offset into external memory in sizeof(float) units
 784
 785                 int constantOffset = 0;
 786                 SIMD::Int dynamicOffset = SIMD::Int(0);
 787                 auto &baseObject = getObject(id);
 788                 Type::ID typeId = getType(baseObject.type).element;
 789                 Decorations d{};
 790                 ApplyDecorationsForId(&d, baseObject.type);
 791
 792                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
 793                 // Start with its offset and build from there.
 794                 if (baseObject.kind == Object::Kind::Value)
 795                 {
 796                         dynamicOffset += routine->getIntermediate(id).Int(0);
 797                 }
 798
 799                 for (auto i = 0u; i < numIndexes; i++)
 800                 {
 801                         auto & type = getType(typeId);
 802                         switch (type.definition.opcode())
 803                         {
 804                         case spv::OpTypeStruct:
 805                         {
 806                                 int memberIndex = GetConstantInt(indexIds[i]);
 807                                 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
 808                                 ASSERT(d.HasOffset);
 809                                 constantOffset += d.Offset / sizeof(float);
 810                                 typeId = type.definition.word(2u + memberIndex);
 811                                 break;
 812                         }
 813                         case spv::OpTypeArray:
 814                         case spv::OpTypeRuntimeArray:
 815                         {
 816                                 // TODO: b/127950082: Check bounds.
 817                                 ApplyDecorationsForId(&d, typeId);
 818                                 ASSERT(d.HasArrayStride);
 819                                 auto & obj = getObject(indexIds[i]);
 820                                 if (obj.kind == Object::Kind::Constant)
 821                                         constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
 822                                 else
 823                                         dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
 824                                 typeId = type.element;
 825                                 break;
 826                         }
 827                         case spv::OpTypeMatrix:
 828                         {
 829                                 // TODO: b/127950082: Check bounds.
 830                                 ApplyDecorationsForId(&d, typeId);
 831                                 ASSERT(d.HasMatrixStride);
 832                                 auto & obj = getObject(indexIds[i]);
 833                                 if (obj.kind == Object::Kind::Constant)
 834                                         constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
 835                                 else
 836                                         dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
 837                                 typeId = type.element;
 838                                 break;
 839                         }
 840                         case spv::OpTypeVector:
 841                         {
 842                                 auto & obj = getObject(indexIds[i]);
 843                                 if (obj.kind == Object::Kind::Constant)
 844                                         constantOffset += GetConstantInt(indexIds[i]);
 845                                 else
 846                                         dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
 847                                 typeId = type.element;
 848                                 break;
 849                         }
 850                         default:
 851                                 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
 852                         }
 853                 }
 854
 855                 return dynamicOffset + SIMD::Int(constantOffset);
 856         }
 857
 858         SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
 859         {
 860                 // TODO: avoid doing per-lane work in some cases if we can?
 861                 // Produce a *component* offset into location-oriented memory
 862
 863                 int constantOffset = 0;
 864                 SIMD::Int dynamicOffset = SIMD::Int(0);
 865                 auto &baseObject = getObject(id);
 866                 Type::ID typeId = getType(baseObject.type).element;
 867
 868                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
 869                 // Start with its offset and build from there.
 870                 if (baseObject.kind == Object::Kind::Value)
 871                 {
 872                         dynamicOffset += routine->getIntermediate(id).Int(0);
 873                 }
 874
 875                 for (auto i = 0u; i < numIndexes; i++)
 876                 {
 877                         auto & type = getType(typeId);
 878                         switch(type.opcode())
 879                         {
 880                         case spv::OpTypeStruct:
 881                         {
 882                                 int memberIndex = GetConstantInt(indexIds[i]);
 883                                 int offsetIntoStruct = 0;
 884                                 for (auto j = 0; j < memberIndex; j++) {
 885                                         auto memberType = type.definition.word(2u + j);
 886                                         offsetIntoStruct += getType(memberType).sizeInComponents;
 887                                 }
 888                                 constantOffset += offsetIntoStruct;
 889                                 typeId = type.definition.word(2u + memberIndex);
 890                                 break;
 891                         }
 892
 893                         case spv::OpTypeVector:
 894                         case spv::OpTypeMatrix:
 895                         case spv::OpTypeArray:
 896                         case spv::OpTypeRuntimeArray:
 897                         {
 898                                 // TODO: b/127950082: Check bounds.
 899                                 auto stride = getType(type.element).sizeInComponents;
 900                                 auto & obj = getObject(indexIds[i]);
 901                                 if (obj.kind == Object::Kind::Constant)
 902                                         constantOffset += stride * GetConstantInt(indexIds[i]);
 903                                 else
 904                                         dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
 905                                 typeId = type.element;
 906                                 break;
 907                         }
 908
 909                         default:
 910                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
 911                         }
 912                 }
 913
 914                 return dynamicOffset + SIMD::Int(constantOffset);
 915         }
 916
 917         uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
 918         {
 919                 uint32_t constantOffset = 0;
 920
 921                 for (auto i = 0u; i < numIndexes; i++)
 922                 {
 923                         auto & type = getType(typeId);
 924                         switch(type.opcode())
 925                         {
 926                         case spv::OpTypeStruct:
 927                         {
 928                                 int memberIndex = indexes[i];
 929                                 int offsetIntoStruct = 0;
 930                                 for (auto j = 0; j < memberIndex; j++) {
 931                                         auto memberType = type.definition.word(2u + j);
 932                                         offsetIntoStruct += getType(memberType).sizeInComponents;
 933                                 }
 934                                 constantOffset += offsetIntoStruct;
 935                                 typeId = type.definition.word(2u + memberIndex);
 936                                 break;
 937                         }
 938
 939                         case spv::OpTypeVector:
 940                         case spv::OpTypeMatrix:
 941                         case spv::OpTypeArray:
 942                         {
 943                                 auto elementType = type.definition.word(2);
 944                                 auto stride = getType(elementType).sizeInComponents;
 945                                 constantOffset += stride * indexes[i];
 946                                 typeId = elementType;
 947                                 break;
 948                         }
 949
 950                         default:
 951                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
 952                         }
 953                 }
 954
 955                 return constantOffset;
 956         }
 957
 958         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
 959         {
 960                 switch (decoration)
 961                 {
 962                 case spv::DecorationLocation:
 963                         HasLocation = true;
 964                         Location = static_cast<int32_t>(arg);
 965                         break;
 966                 case spv::DecorationComponent:
 967                         HasComponent = true;
 968                         Component = arg;
 969                         break;
 970                 case spv::DecorationDescriptorSet:
 971                         HasDescriptorSet = true;
 972                         DescriptorSet = arg;
 973                         break;
 974                 case spv::DecorationBinding:
 975                         HasBinding = true;
 976                         Binding = arg;
 977                         break;
 978                 case spv::DecorationBuiltIn:
 979                         HasBuiltIn = true;
 980                         BuiltIn = static_cast<spv::BuiltIn>(arg);
 981                         break;
 982                 case spv::DecorationFlat:
 983                         Flat = true;
 984                         break;
 985                 case spv::DecorationNoPerspective:
 986                         NoPerspective = true;
 987                         break;
 988                 case spv::DecorationCentroid:
 989                         Centroid = true;
 990                         break;
 991                 case spv::DecorationBlock:
 992                         Block = true;
 993                         break;
 994                 case spv::DecorationBufferBlock:
 995                         BufferBlock = true;
 996                         break;
 997                 case spv::DecorationOffset:
 998                         HasOffset = true;
 999                         Offset = static_cast<int32_t>(arg);
1000                         break;
1001                 case spv::DecorationArrayStride:
1002                         HasArrayStride = true;
1003                         ArrayStride = static_cast<int32_t>(arg);
1004                         break;
1005                 case spv::DecorationMatrixStride:
1006                         HasMatrixStride = true;
1007                         MatrixStride = static_cast<int32_t>(arg);
1008                         break;
1009                 default:
1010                         // Intentionally partial, there are many decorations we just don't care about.
1011                         break;
1012                 }
1013         }
1014
1015         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1016         {
1017                 // Apply a decoration group to this set of decorations
1018                 if (src.HasBuiltIn)
1019                 {
1020                         HasBuiltIn = true;
1021                         BuiltIn = src.BuiltIn;
1022                 }
1023
1024                 if (src.HasLocation)
1025                 {
1026                         HasLocation = true;
1027                         Location = src.Location;
1028                 }
1029
1030                 if (src.HasComponent)
1031                 {
1032                         HasComponent = true;
1033                         Component = src.Component;
1034                 }
1035
1036                 if (src.HasDescriptorSet)
1037                 {
1038                         HasDescriptorSet = true;
1039                         DescriptorSet = src.DescriptorSet;
1040                 }
1041
1042                 if (src.HasBinding)
1043                 {
1044                         HasBinding = true;
1045                         Binding = src.Binding;
1046                 }
1047
1048                 if (src.HasOffset)
1049                 {
1050                         HasOffset = true;
1051                         Offset = src.Offset;
1052                 }
1053
1054                 if (src.HasArrayStride)
1055                 {
1056                         HasArrayStride = true;
1057                         ArrayStride = src.ArrayStride;
1058                 }
1059
1060                 if (src.HasMatrixStride)
1061                 {
1062                         HasMatrixStride = true;
1063                         MatrixStride = src.MatrixStride;
1064                 }
1065
1066                 Flat |= src.Flat;
1067                 NoPerspective |= src.NoPerspective;
1068                 Centroid |= src.Centroid;
1069                 Block |= src.Block;
1070                 BufferBlock |= src.BufferBlock;
1071         }
1072
1073         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1074         {
1075                 auto it = decorations.find(id);
1076                 if (it != decorations.end())
1077                         d->Apply(it->second);
1078         }
1079
1080         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1081         {
1082                 auto it = memberDecorations.find(id);
1083                 if (it != memberDecorations.end() && member < it->second.size())
1084                 {
1085                         d->Apply(it->second[member]);
1086                 }
1087         }
1088
1089         uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1090         {
1091                 // Slightly hackish access to constants very early in translation.
1092                 // General consumption of constants by other instructions should
1093                 // probably be just lowered to Reactor.
1094
1095                 // TODO: not encountered yet since we only use this for array sizes etc,
1096                 // but is possible to construct integer constant 0 via OpConstantNull.
1097                 auto insn = getObject(id).definition;
1098                 ASSERT(insn.opcode() == spv::OpConstant);
1099                 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1100                 return insn.word(3);
1101         }
1102
1103         // emit-time
1104
1105         void SpirvShader::emitProlog(SpirvRoutine *routine) const
1106         {
1107                 for (auto insn : *this)
1108                 {
1109                         switch (insn.opcode())
1110                         {
1111                         case spv::OpVariable:
1112                         {
1113                                 Type::ID resultPointerTypeId = insn.word(1);
1114                                 auto resultPointerType = getType(resultPointerTypeId);
1115                                 auto pointeeType = getType(resultPointerType.element);
1116
1117                                 if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
1118                                 {
1119                                         Object::ID resultId = insn.word(2);
1120                                         routine->createLvalue(resultId, pointeeType.sizeInComponents);
1121                                 }
1122                                 break;
1123                         }
1124                         default:
1125                                 // Nothing else produces interface variables, so can all be safely ignored.
1126                                 break;
1127                         }
1128                 }
1129         }
1130
1131         void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1132         {
1133                 EmitState state;
1134                 state.setActiveLaneMask(activeLaneMask);
1135                 state.routine = routine;
1136
1137                 // Emit everything up to the first label
1138                 // TODO: Separate out dispatch of block from non-block instructions?
1139                 for (auto insn : *this)
1140                 {
1141                         if (insn.opcode() == spv::OpLabel)
1142                         {
1143                                 break;
1144                         }
1145                         EmitInstruction(insn, &state);
1146                 }
1147
1148                 // Emit all the blocks in BFS order, starting with the main block.
1149                 std::queue<Block::ID> pending;
1150                 pending.push(mainBlockId);
1151                 while (pending.size() > 0)
1152                 {
1153                         auto id = pending.front();
1154                         pending.pop();
1155                         if (state.visited.count(id) == 0)
1156                         {
1157                                 EmitBlock(id, &state);
1158                                 for (auto it : getBlock(id).outs)
1159                                 {
1160                                         pending.push(it);
1161                                 }
1162                         }
1163                 }
1164         }
1165
1166         void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1167         {
1168                 if (state->visited.count(id) > 0)
1169                 {
1170                         return; // Already processed this block.
1171                 }
1172
1173                 state->visited.emplace(id);
1174
1175                 auto &block = getBlock(id);
1176
1177                 switch (block.kind)
1178                 {
1179                         case Block::Simple:
1180                         case Block::StructuredBranchConditional:
1181                         case Block::UnstructuredBranchConditional:
1182                         case Block::StructuredSwitch:
1183                         case Block::UnstructuredSwitch:
1184                                 if (id != mainBlockId)
1185                                 {
1186                                         // Emit all preceeding blocks and set the activeLaneMask.
1187                                         Intermediate activeLaneMask(1);
1188                                         activeLaneMask.move(0, SIMD::Int(0));
1189                                         for (auto in : block.ins)
1190                                         {
1191                                                 EmitBlock(in, state);
1192                                                 auto inMask = state->getActiveLaneMaskEdge(in, id);
1193                                                 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1194                                         }
1195                                         state->setActiveLaneMask(activeLaneMask.Int(0));
1196                                 }
1197                                 state->currentBlock = id;
1198                                 EmitInstructions(block.begin(), block.end(), state);
1199                                 break;
1200
1201                         default:
1202                                 UNIMPLEMENTED("Unhandled Block Kind: %d", int(block.kind));
1203                 }
1204         }
1205
1206         void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1207         {
1208                 for (auto insn = begin; insn != end; insn++)
1209                 {
1210                         auto res = EmitInstruction(insn, state);
1211                         switch (res)
1212                         {
1213                         case EmitResult::Continue:
1214                                 continue;
1215                         case EmitResult::Terminator:
1216                                 break;
1217                         default:
1218                                 UNREACHABLE("Unexpected EmitResult %d", int(res));
1219                                 break;
1220                         }
1221                 }
1222         }
1223
1224         SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1225         {
1226                 switch (insn.opcode())
1227                 {
1228                 case spv::OpTypeVoid:
1229                 case spv::OpTypeInt:
1230                 case spv::OpTypeFloat:
1231                 case spv::OpTypeBool:
1232                 case spv::OpTypeVector:
1233                 case spv::OpTypeArray:
1234                 case spv::OpTypeRuntimeArray:
1235                 case spv::OpTypeMatrix:
1236                 case spv::OpTypeStruct:
1237                 case spv::OpTypePointer:
1238                 case spv::OpTypeFunction:
1239                 case spv::OpExecutionMode:
1240                 case spv::OpMemoryModel:
1241                 case spv::OpFunction:
1242                 case spv::OpFunctionEnd:
1243                 case spv::OpConstant:
1244                 case spv::OpConstantNull:
1245                 case spv::OpConstantTrue:
1246                 case spv::OpConstantFalse:
1247                 case spv::OpConstantComposite:
1248                 case spv::OpUndef:
1249                 case spv::OpExtension:
1250                 case spv::OpCapability:
1251                 case spv::OpEntryPoint:
1252                 case spv::OpExtInstImport:
1253                 case spv::OpDecorate:
1254                 case spv::OpMemberDecorate:
1255                 case spv::OpGroupDecorate:
1256                 case spv::OpGroupMemberDecorate:
1257                 case spv::OpDecorationGroup:
1258                 case spv::OpName:
1259                 case spv::OpMemberName:
1260                 case spv::OpSource:
1261                 case spv::OpSourceContinued:
1262                 case spv::OpSourceExtension:
1263                 case spv::OpLine:
1264                 case spv::OpNoLine:
1265                 case spv::OpModuleProcessed:
1266                 case spv::OpString:
1267                         // Nothing to do at emit time. These are either fully handled at analysis time,
1268                         // or don't require any work at all.
1269                         return EmitResult::Continue;
1270
1271                 case spv::OpLabel:
1272                         return EmitResult::Continue;
1273
1274                 case spv::OpVariable:
1275                         return EmitVariable(insn, state);
1276
1277                 case spv::OpLoad:
1278                 case spv::OpAtomicLoad:
1279                         return EmitLoad(insn, state);
1280
1281                 case spv::OpStore:
1282                 case spv::OpAtomicStore:
1283                         return EmitStore(insn, state);
1284
1285                 case spv::OpAccessChain:
1286                 case spv::OpInBoundsAccessChain:
1287                         return EmitAccessChain(insn, state);
1288
1289                 case spv::OpCompositeConstruct:
1290                         return EmitCompositeConstruct(insn, state);
1291
1292                 case spv::OpCompositeInsert:
1293                         return EmitCompositeInsert(insn, state);
1294
1295                 case spv::OpCompositeExtract:
1296                         return EmitCompositeExtract(insn, state);
1297
1298                 case spv::OpVectorShuffle:
1299                         return EmitVectorShuffle(insn, state);
1300
1301                 case spv::OpVectorExtractDynamic:
1302                         return EmitVectorExtractDynamic(insn, state);
1303
1304                 case spv::OpVectorInsertDynamic:
1305                         return EmitVectorInsertDynamic(insn, state);
1306
1307                 case spv::OpVectorTimesScalar:
1308                         return EmitVectorTimesScalar(insn, state);
1309
1310                 case spv::OpNot:
1311                 case spv::OpSNegate:
1312                 case spv::OpFNegate:
1313                 case spv::OpLogicalNot:
1314                 case spv::OpConvertFToU:
1315                 case spv::OpConvertFToS:
1316                 case spv::OpConvertSToF:
1317                 case spv::OpConvertUToF:
1318                 case spv::OpBitcast:
1319                 case spv::OpIsInf:
1320                 case spv::OpIsNan:
1321                 case spv::OpDPdx:
1322                 case spv::OpDPdxCoarse:
1323                 case spv::OpDPdy:
1324                 case spv::OpDPdyCoarse:
1325                 case spv::OpFwidth:
1326                 case spv::OpFwidthCoarse:
1327                 case spv::OpDPdxFine:
1328                 case spv::OpDPdyFine:
1329                 case spv::OpFwidthFine:
1330                         return EmitUnaryOp(insn, state);
1331
1332                 case spv::OpIAdd:
1333                 case spv::OpISub:
1334                 case spv::OpIMul:
1335                 case spv::OpSDiv:
1336                 case spv::OpUDiv:
1337                 case spv::OpFAdd:
1338                 case spv::OpFSub:
1339                 case spv::OpFMul:
1340                 case spv::OpFDiv:
1341                 case spv::OpFMod:
1342                 case spv::OpFRem:
1343                 case spv::OpFOrdEqual:
1344                 case spv::OpFUnordEqual:
1345                 case spv::OpFOrdNotEqual:
1346                 case spv::OpFUnordNotEqual:
1347                 case spv::OpFOrdLessThan:
1348                 case spv::OpFUnordLessThan:
1349                 case spv::OpFOrdGreaterThan:
1350                 case spv::OpFUnordGreaterThan:
1351                 case spv::OpFOrdLessThanEqual:
1352                 case spv::OpFUnordLessThanEqual:
1353                 case spv::OpFOrdGreaterThanEqual:
1354                 case spv::OpFUnordGreaterThanEqual:
1355                 case spv::OpSMod:
1356                 case spv::OpSRem:
1357                 case spv::OpUMod:
1358                 case spv::OpIEqual:
1359                 case spv::OpINotEqual:
1360                 case spv::OpUGreaterThan:
1361                 case spv::OpSGreaterThan:
1362                 case spv::OpUGreaterThanEqual:
1363                 case spv::OpSGreaterThanEqual:
1364                 case spv::OpULessThan:
1365                 case spv::OpSLessThan:
1366                 case spv::OpULessThanEqual:
1367                 case spv::OpSLessThanEqual:
1368                 case spv::OpShiftRightLogical:
1369                 case spv::OpShiftRightArithmetic:
1370                 case spv::OpShiftLeftLogical:
1371                 case spv::OpBitwiseOr:
1372                 case spv::OpBitwiseXor:
1373                 case spv::OpBitwiseAnd:
1374                 case spv::OpLogicalOr:
1375                 case spv::OpLogicalAnd:
1376                 case spv::OpLogicalEqual:
1377                 case spv::OpLogicalNotEqual:
1378                 case spv::OpUMulExtended:
1379                 case spv::OpSMulExtended:
1380                         return EmitBinaryOp(insn, state);
1381
1382                 case spv::OpDot:
1383                         return EmitDot(insn, state);
1384
1385                 case spv::OpSelect:
1386                         return EmitSelect(insn, state);
1387
1388                 case spv::OpExtInst:
1389                         return EmitExtendedInstruction(insn, state);
1390
1391                 case spv::OpAny:
1392                         return EmitAny(insn, state);
1393
1394                 case spv::OpAll:
1395                         return EmitAll(insn, state);
1396
1397                 case spv::OpBranch:
1398                         return EmitBranch(insn, state);
1399
1400                 case spv::OpPhi:
1401                         return EmitPhi(insn, state);
1402
1403                 case spv::OpSelectionMerge:
1404                         return EmitResult::Continue;
1405
1406                 case spv::OpBranchConditional:
1407                         return EmitBranchConditional(insn, state);
1408
1409                 case spv::OpSwitch:
1410                         return EmitSwitch(insn, state);
1411
1412                 case spv::OpUnreachable:
1413                         return EmitUnreachable(insn, state);
1414
1415                 case spv::OpReturn:
1416                         return EmitReturn(insn, state);
1417
1418                 default:
1419                         UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1420                         break;
1421                 }
1422
1423                 return EmitResult::Continue;
1424         }
1425
1426         SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1427         {
1428                 auto routine = state->routine;
1429                 Object::ID resultId = insn.word(2);
1430                 auto &object = getObject(resultId);
1431                 auto &objectTy = getType(object.type);
1432                 switch (objectTy.storageClass)
1433                 {
1434                 case spv::StorageClassInput:
1435                 {
1436                         if (object.kind == Object::Kind::InterfaceVariable)
1437                         {
1438                                 auto &dst = routine->getValue(resultId);
1439                                 int offset = 0;
1440                                 VisitInterface(resultId,
1441                                                                 [&](Decorations const &d, AttribType type) {
1442                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1443                                                                         dst[offset++] = routine->inputs[scalarSlot];
1444                                                                 });
1445                         }
1446                         break;
1447                 }
1448                 case spv::StorageClassUniform:
1449                 case spv::StorageClassStorageBuffer:
1450                 {
1451                         Decorations d{};
1452                         ApplyDecorationsForId(&d, resultId);
1453                         ASSERT(d.DescriptorSet >= 0);
1454                         ASSERT(d.Binding >= 0);
1455
1456                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1457
1458                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1459                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1460                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1461                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1462                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1463                         Pointer<Byte> address = data + offset;
1464                         routine->physicalPointers[resultId] = address;
1465                         break;
1466                 }
1467                 case spv::StorageClassPushConstant:
1468                 {
1469                         routine->physicalPointers[resultId] = routine->pushConstants;
1470                         break;
1471                 }
1472                 default:
1473                         break;
1474                 }
1475
1476                 return EmitResult::Continue;
1477         }
1478
1479         SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1480         {
1481                 auto routine = state->routine;
1482                 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1483                 Object::ID resultId = insn.word(2);
1484                 Object::ID pointerId = insn.word(3);
1485                 auto &result = getObject(resultId);
1486                 auto &resultTy = getType(result.type);
1487                 auto &pointer = getObject(pointerId);
1488                 auto &pointerBase = getObject(pointer.pointerBase);
1489                 auto &pointerBaseTy = getType(pointerBase.type);
1490                 std::memory_order memoryOrder = std::memory_order_relaxed;
1491
1492                 if(atomic)
1493                 {
1494                         Object::ID semanticsId = insn.word(5);
1495                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1496                         memoryOrder = MemoryOrder(memorySemantics);
1497                 }
1498
1499                 ASSERT(getType(pointer.type).element == result.type);
1500                 ASSERT(Type::ID(insn.word(1)) == result.type);
1501                 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1502
1503                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1504                 {
1505                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1506                 }
1507
1508                 Pointer<Float> ptrBase;
1509                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1510                 {
1511                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1512                 }
1513                 else
1514                 {
1515                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1516                 }
1517
1518                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1519                 auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
1520
1521                 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1522
1523                 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1524                 {
1525                         // Divergent offsets or masked lanes.
1526                         auto offsets = pointer.kind == Object::Kind::Value ?
1527                                         As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1528                                         RValue<SIMD::Int>(SIMD::Int(0));
1529                         for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1530                         {
1531                                 // i wish i had a Float,Float,Float,Float constructor here..
1532                                 for (int j = 0; j < SIMD::Width; j++)
1533                                 {
1534                                         If(Extract(state->activeLaneMask(), j) != 0)
1535                                         {
1536                                                 Int offset = Int(i) + Extract(offsets, j);
1537                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1538                                                 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1539                                         }
1540                                 }
1541                         }
1542                 }
1543                 Else
1544                 {
1545                         // No divergent offsets or masked lanes.
1546                         if (interleavedByLane)
1547                         {
1548                                 // Lane-interleaved data.
1549                                 Pointer<SIMD::Float> src = ptrBase;
1550                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1551                                 {
1552                                         load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1553                                 }
1554                         }
1555                         else
1556                         {
1557                                 // Non-interleaved data.
1558                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1559                                 {
1560                                         load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
1561                                 }
1562                         }
1563                 }
1564
1565                 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1566                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1567                 {
1568                         dst.move(i, load[i]);
1569                 }
1570
1571                 return EmitResult::Continue;
1572         }
1573
1574         SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1575         {
1576                 auto routine = state->routine;
1577                 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1578                 Object::ID pointerId = insn.word(1);
1579                 Object::ID objectId = insn.word(atomic ? 4 : 2);
1580                 auto &object = getObject(objectId);
1581                 auto &pointer = getObject(pointerId);
1582                 auto &pointerTy = getType(pointer.type);
1583                 auto &elementTy = getType(pointerTy.element);
1584                 auto &pointerBase = getObject(pointer.pointerBase);
1585                 auto &pointerBaseTy = getType(pointerBase.type);
1586                 std::memory_order memoryOrder = std::memory_order_relaxed;
1587
1588                 if(atomic)
1589                 {
1590                         Object::ID semanticsId = insn.word(3);
1591                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1592                         memoryOrder = MemoryOrder(memorySemantics);
1593                 }
1594
1595                 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1596
1597                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1598                 {
1599                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1600                 }
1601
1602                 Pointer<Float> ptrBase;
1603                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1604                 {
1605                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1606                 }
1607                 else
1608                 {
1609                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1610                 }
1611
1612                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1613                 auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
1614
1615                 if (object.kind == Object::Kind::Constant)
1616                 {
1617                         // Constant source data.
1618                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1619                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1620                         {
1621                                 // Divergent offsets or masked lanes.
1622                                 auto offsets = pointer.kind == Object::Kind::Value ?
1623                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1624                                                 RValue<SIMD::Int>(SIMD::Int(0));
1625                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1626                                 {
1627                                         for (int j = 0; j < SIMD::Width; j++)
1628                                         {
1629                                                 If(Extract(state->activeLaneMask(), j) != 0)
1630                                                 {
1631                                                         Int offset = Int(i) + Extract(offsets, j);
1632                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1633                                                         Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1634                                                 }
1635                                         }
1636                                 }
1637                         }
1638                         Else
1639                         {
1640                                 // Constant source data.
1641                                 // No divergent offsets or masked lanes.
1642                                 Pointer<SIMD::Float> dst = ptrBase;
1643                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1644                                 {
1645                                         Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1646                                 }
1647                         }
1648                 }
1649                 else
1650                 {
1651                         // Intermediate source data.
1652                         auto &src = routine->getIntermediate(objectId);
1653                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1654                         {
1655                                 // Divergent offsets or masked lanes.
1656                                 auto offsets = pointer.kind == Object::Kind::Value ?
1657                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1658                                                 RValue<SIMD::Int>(SIMD::Int(0));
1659                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1660                                 {
1661                                         for (int j = 0; j < SIMD::Width; j++)
1662                                         {
1663                                                 If(Extract(state->activeLaneMask(), j) != 0)
1664                                                 {
1665                                                         Int offset = Int(i) + Extract(offsets, j);
1666                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1667                                                         Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1668                                                 }
1669                                         }
1670                                 }
1671                         }
1672                         Else
1673                         {
1674                                 // No divergent offsets or masked lanes.
1675                                 if (interleavedByLane)
1676                                 {
1677                                         // Lane-interleaved data.
1678                                         Pointer<SIMD::Float> dst = ptrBase;
1679                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1680                                         {
1681                                                 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1682                                         }
1683                                 }
1684                                 else
1685                                 {
1686                                         // Intermediate source data. Non-interleaved data.
1687                                         Pointer<SIMD::Float> dst = ptrBase;
1688                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1689                                         {
1690                                                 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1691                                         }
1692                                 }
1693                         }
1694                 }
1695
1696                 return EmitResult::Continue;
1697         }
1698
1699         SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1700         {
1701                 auto routine = state->routine;
1702                 Type::ID typeId = insn.word(1);
1703                 Object::ID resultId = insn.word(2);
1704                 Object::ID baseId = insn.word(3);
1705                 uint32_t numIndexes = insn.wordCount() - 4;
1706                 const uint32_t *indexes = insn.wordPointer(4);
1707                 auto &type = getType(typeId);
1708                 ASSERT(type.sizeInComponents == 1);
1709                 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1710
1711                 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1712
1713                 if(type.storageClass == spv::StorageClassPushConstant ||
1714                    type.storageClass == spv::StorageClassUniform ||
1715                    type.storageClass == spv::StorageClassStorageBuffer)
1716                 {
1717                         dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1718                 }
1719                 else
1720                 {
1721                         dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1722                 }
1723
1724                 return EmitResult::Continue;
1725         }
1726
1727         SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1728         {
1729                 auto routine = state->routine;
1730                 auto &type = getType(insn.word(1));
1731                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1732                 auto offset = 0u;
1733
1734                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1735                 {
1736                         Object::ID srcObjectId = insn.word(3u + i);
1737                         auto & srcObject = getObject(srcObjectId);
1738                         auto & srcObjectTy = getType(srcObject.type);
1739                         GenericValue srcObjectAccess(this, routine, srcObjectId);
1740
1741                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1742                         {
1743                                 dst.move(offset++, srcObjectAccess.Float(j));
1744                         }
1745                 }
1746
1747                 return EmitResult::Continue;
1748         }
1749
1750         SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1751         {
1752                 auto routine = state->routine;
1753                 Type::ID resultTypeId = insn.word(1);
1754                 auto &type = getType(resultTypeId);
1755                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1756                 auto &newPartObject = getObject(insn.word(3));
1757                 auto &newPartObjectTy = getType(newPartObject.type);
1758                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1759
1760                 GenericValue srcObjectAccess(this, routine, insn.word(4));
1761                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1762
1763                 // old components before
1764                 for (auto i = 0u; i < firstNewComponent; i++)
1765                 {
1766                         dst.move(i, srcObjectAccess.Float(i));
1767                 }
1768                 // new part
1769                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1770                 {
1771                         dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1772                 }
1773                 // old components after
1774                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1775                 {
1776                         dst.move(i, srcObjectAccess.Float(i));
1777                 }
1778
1779                 return EmitResult::Continue;
1780         }
1781
1782         SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1783         {
1784                 auto routine = state->routine;
1785                 auto &type = getType(insn.word(1));
1786                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1787                 auto &compositeObject = getObject(insn.word(3));
1788                 Type::ID compositeTypeId = compositeObject.definition.word(1);
1789                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1790
1791                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1792                 for (auto i = 0u; i < type.sizeInComponents; i++)
1793                 {
1794                         dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1795                 }
1796
1797                 return EmitResult::Continue;
1798         }
1799
1800         SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1801         {
1802                 auto routine = state->routine;
1803                 auto &type = getType(insn.word(1));
1804                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1805
1806                 // Note: number of components in result type, first half type, and second
1807                 // half type are all independent.
1808                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1809
1810                 GenericValue firstHalfAccess(this, routine, insn.word(3));
1811                 GenericValue secondHalfAccess(this, routine, insn.word(4));
1812
1813                 for (auto i = 0u; i < type.sizeInComponents; i++)
1814                 {
1815                         auto selector = insn.word(5 + i);
1816                         if (selector == static_cast<uint32_t>(-1))
1817                         {
1818                                 // Undefined value. Until we decide to do real undef values, zero is as good
1819                                 // a value as any
1820                                 dst.move(i, RValue<SIMD::Float>(0.0f));
1821                         }
1822                         else if (selector < firstHalfType.sizeInComponents)
1823                         {
1824                                 dst.move(i, firstHalfAccess.Float(selector));
1825                         }
1826                         else
1827                         {
1828                                 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
1829                         }
1830                 }
1831
1832                 return EmitResult::Continue;
1833         }
1834
1835         SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
1836         {
1837                 auto routine = state->routine;
1838                 auto &type = getType(insn.word(1));
1839                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1840                 auto &srcType = getType(getObject(insn.word(3)).type);
1841
1842                 GenericValue src(this, routine, insn.word(3));
1843                 GenericValue index(this, routine, insn.word(4));
1844
1845                 SIMD::UInt v = SIMD::UInt(0);
1846
1847                 for (auto i = 0u; i < srcType.sizeInComponents; i++)
1848                 {
1849                         v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
1850                 }
1851
1852                 dst.move(0, v);
1853                 return EmitResult::Continue;
1854         }
1855
1856         SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
1857         {
1858                 auto routine = state->routine;
1859                 auto &type = getType(insn.word(1));
1860                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1861
1862                 GenericValue src(this, routine, insn.word(3));
1863                 GenericValue component(this, routine, insn.word(4));
1864                 GenericValue index(this, routine, insn.word(5));
1865
1866                 for (auto i = 0u; i < type.sizeInComponents; i++)
1867                 {
1868                         SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
1869                         dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
1870                 }
1871                 return EmitResult::Continue;
1872         }
1873
1874         SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
1875         {
1876                 auto routine = state->routine;
1877                 auto &type = getType(insn.word(1));
1878                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1879                 auto lhs = GenericValue(this, routine, insn.word(3));
1880                 auto rhs = GenericValue(this, routine, insn.word(4));
1881
1882                 for (auto i = 0u; i < type.sizeInComponents; i++)
1883                 {
1884                         dst.move(i, lhs.Float(i) * rhs.Float(0));
1885                 }
1886
1887                 return EmitResult::Continue;
1888         }
1889
1890         SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
1891         {
1892                 auto routine = state->routine;
1893                 auto &type = getType(insn.word(1));
1894                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1895                 auto src = GenericValue(this, routine, insn.word(3));
1896
1897                 for (auto i = 0u; i < type.sizeInComponents; i++)
1898                 {
1899                         switch (insn.opcode())
1900                         {
1901                         case spv::OpNot:
1902                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
1903                                 dst.move(i, ~src.UInt(i));
1904                                 break;
1905                         case spv::OpSNegate:
1906                                 dst.move(i, -src.Int(i));
1907                                 break;
1908                         case spv::OpFNegate:
1909                                 dst.move(i, -src.Float(i));
1910                                 break;
1911                         case spv::OpConvertFToU:
1912                                 dst.move(i, SIMD::UInt(src.Float(i)));
1913                                 break;
1914                         case spv::OpConvertFToS:
1915                                 dst.move(i, SIMD::Int(src.Float(i)));
1916                                 break;
1917                         case spv::OpConvertSToF:
1918                                 dst.move(i, SIMD::Float(src.Int(i)));
1919                                 break;
1920                         case spv::OpConvertUToF:
1921                                 dst.move(i, SIMD::Float(src.UInt(i)));
1922                                 break;
1923                         case spv::OpBitcast:
1924                                 dst.move(i, src.Float(i));
1925                                 break;
1926                         case spv::OpIsInf:
1927                                 dst.move(i, IsInf(src.Float(i)));
1928                                 break;
1929                         case spv::OpIsNan:
1930                                 dst.move(i, IsNan(src.Float(i)));
1931                                 break;
1932                         case spv::OpDPdx:
1933                         case spv::OpDPdxCoarse:
1934                                 // Derivative instructions: FS invocations are laid out like so:
1935                                 //    0 1
1936                                 //    2 3
1937                                 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
1938                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
1939                                 break;
1940                         case spv::OpDPdy:
1941                         case spv::OpDPdyCoarse:
1942                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
1943                                 break;
1944                         case spv::OpFwidth:
1945                         case spv::OpFwidthCoarse:
1946                                 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
1947                                                         + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
1948                                 break;
1949                         case spv::OpDPdxFine:
1950                         {
1951                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1952                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1953                                 SIMD::Float v = SIMD::Float(firstRow);
1954                                 v = Insert(v, secondRow, 2);
1955                                 v = Insert(v, secondRow, 3);
1956                                 dst.move(i, v);
1957                                 break;
1958                         }
1959                         case spv::OpDPdyFine:
1960                         {
1961                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1962                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1963                                 SIMD::Float v = SIMD::Float(firstColumn);
1964                                 v = Insert(v, secondColumn, 1);
1965                                 v = Insert(v, secondColumn, 3);
1966                                 dst.move(i, v);
1967                                 break;
1968                         }
1969                         case spv::OpFwidthFine:
1970                         {
1971                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
1972                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
1973                                 SIMD::Float dpdx = SIMD::Float(firstRow);
1974                                 dpdx = Insert(dpdx, secondRow, 2);
1975                                 dpdx = Insert(dpdx, secondRow, 3);
1976                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
1977                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
1978                                 SIMD::Float dpdy = SIMD::Float(firstColumn);
1979                                 dpdy = Insert(dpdy, secondColumn, 1);
1980                                 dpdy = Insert(dpdy, secondColumn, 3);
1981                                 dst.move(i, Abs(dpdx) + Abs(dpdy));
1982                                 break;
1983                         }
1984                         default:
1985                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
1986                         }
1987                 }
1988
1989                 return EmitResult::Continue;
1990         }
1991
1992         SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
1993         {
1994                 auto routine = state->routine;
1995                 auto &type = getType(insn.word(1));
1996                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1997                 auto &lhsType = getType(getObject(insn.word(3)).type);
1998                 auto lhs = GenericValue(this, routine, insn.word(3));
1999                 auto rhs = GenericValue(this, routine, insn.word(4));
2000
2001                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2002                 {
2003                         switch (insn.opcode())
2004                         {
2005                         case spv::OpIAdd:
2006                                 dst.move(i, lhs.Int(i) + rhs.Int(i));
2007                                 break;
2008                         case spv::OpISub:
2009                                 dst.move(i, lhs.Int(i) - rhs.Int(i));
2010                                 break;
2011                         case spv::OpIMul:
2012                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2013                                 break;
2014                         case spv::OpSDiv:
2015                         {
2016                                 SIMD::Int a = lhs.Int(i);
2017                                 SIMD::Int b = rhs.Int(i);
2018                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2019                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2020                                 dst.move(i, a / b);
2021                                 break;
2022                         }
2023                         case spv::OpUDiv:
2024                         {
2025                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2026                                 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2027                                 break;
2028                         }
2029                         case spv::OpSRem:
2030                         {
2031                                 SIMD::Int a = lhs.Int(i);
2032                                 SIMD::Int b = rhs.Int(i);
2033                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2034                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2035                                 dst.move(i, a % b);
2036                                 break;
2037                         }
2038                         case spv::OpSMod:
2039                         {
2040                                 SIMD::Int a = lhs.Int(i);
2041                                 SIMD::Int b = rhs.Int(i);
2042                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2043                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2044                                 auto mod = a % b;
2045                                 // If a and b have opposite signs, the remainder operation takes
2046                                 // the sign from a but OpSMod is supposed to take the sign of b.
2047                                 // Adding b will ensure that the result has the correct sign and
2048                                 // that it is still congruent to a modulo b.
2049                                 //
2050                                 // See also http://mathforum.org/library/drmath/view/52343.html
2051                                 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2052                                 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2053                                 dst.move(i, As<SIMD::Float>(fixedMod));
2054                                 break;
2055                         }
2056                         case spv::OpUMod:
2057                         {
2058                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2059                                 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2060                                 break;
2061                         }
2062                         case spv::OpIEqual:
2063                         case spv::OpLogicalEqual:
2064                                 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2065                                 break;
2066                         case spv::OpINotEqual:
2067                         case spv::OpLogicalNotEqual:
2068                                 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2069                                 break;
2070                         case spv::OpUGreaterThan:
2071                                 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2072                                 break;
2073                         case spv::OpSGreaterThan:
2074                                 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2075                                 break;
2076                         case spv::OpUGreaterThanEqual:
2077                                 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2078                                 break;
2079                         case spv::OpSGreaterThanEqual:
2080                                 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2081                                 break;
2082                         case spv::OpULessThan:
2083                                 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2084                                 break;
2085                         case spv::OpSLessThan:
2086                                 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2087                                 break;
2088                         case spv::OpULessThanEqual:
2089                                 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2090                                 break;
2091                         case spv::OpSLessThanEqual:
2092                                 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2093                                 break;
2094                         case spv::OpFAdd:
2095                                 dst.move(i, lhs.Float(i) + rhs.Float(i));
2096                                 break;
2097                         case spv::OpFSub:
2098                                 dst.move(i, lhs.Float(i) - rhs.Float(i));
2099                                 break;
2100                         case spv::OpFMul:
2101                                 dst.move(i, lhs.Float(i) * rhs.Float(i));
2102                                 break;
2103                         case spv::OpFDiv:
2104                                 dst.move(i, lhs.Float(i) / rhs.Float(i));
2105                                 break;
2106                         case spv::OpFMod:
2107                                 // TODO(b/126873455): inaccurate for values greater than 2^24
2108                                 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2109                                 break;
2110                         case spv::OpFRem:
2111                                 dst.move(i, lhs.Float(i) % rhs.Float(i));
2112                                 break;
2113                         case spv::OpFOrdEqual:
2114                                 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2115                                 break;
2116                         case spv::OpFUnordEqual:
2117                                 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2118                                 break;
2119                         case spv::OpFOrdNotEqual:
2120                                 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2121                                 break;
2122                         case spv::OpFUnordNotEqual:
2123                                 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2124                                 break;
2125                         case spv::OpFOrdLessThan:
2126                                 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2127                                 break;
2128                         case spv::OpFUnordLessThan:
2129                                 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2130                                 break;
2131                         case spv::OpFOrdGreaterThan:
2132                                 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2133                                 break;
2134                         case spv::OpFUnordGreaterThan:
2135                                 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2136                                 break;
2137                         case spv::OpFOrdLessThanEqual:
2138                                 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2139                                 break;
2140                         case spv::OpFUnordLessThanEqual:
2141                                 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2142                                 break;
2143                         case spv::OpFOrdGreaterThanEqual:
2144                                 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2145                                 break;
2146                         case spv::OpFUnordGreaterThanEqual:
2147                                 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2148                                 break;
2149                         case spv::OpShiftRightLogical:
2150                                 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2151                                 break;
2152                         case spv::OpShiftRightArithmetic:
2153                                 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2154                                 break;
2155                         case spv::OpShiftLeftLogical:
2156                                 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2157                                 break;
2158                         case spv::OpBitwiseOr:
2159                         case spv::OpLogicalOr:
2160                                 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2161                                 break;
2162                         case spv::OpBitwiseXor:
2163                                 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2164                                 break;
2165                         case spv::OpBitwiseAnd:
2166                         case spv::OpLogicalAnd:
2167                                 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2168                                 break;
2169                         case spv::OpSMulExtended:
2170                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2171                                 // In our flat view then, component i is the i'th component of the first member;
2172                                 // component i + N is the i'th component of the second member.
2173                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2174                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2175                                 break;
2176                         case spv::OpUMulExtended:
2177                                 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2178                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2179                                 break;
2180                         default:
2181                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2182                         }
2183                 }
2184
2185                 return EmitResult::Continue;
2186         }
2187
2188         SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2189         {
2190                 auto routine = state->routine;
2191                 auto &type = getType(insn.word(1));
2192                 ASSERT(type.sizeInComponents == 1);
2193                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2194                 auto &lhsType = getType(getObject(insn.word(3)).type);
2195                 auto lhs = GenericValue(this, routine, insn.word(3));
2196                 auto rhs = GenericValue(this, routine, insn.word(4));
2197
2198                 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2199                 return EmitResult::Continue;
2200         }
2201
2202         SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2203         {
2204                 auto routine = state->routine;
2205                 auto &type = getType(insn.word(1));
2206                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2207                 auto cond = GenericValue(this, routine, insn.word(3));
2208                 auto lhs = GenericValue(this, routine, insn.word(4));
2209                 auto rhs = GenericValue(this, routine, insn.word(5));
2210
2211                 for (auto i = 0u; i < type.sizeInComponents; i++)
2212                 {
2213                         dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i)));   // FIXME: IfThenElse()
2214                 }
2215
2216                 return EmitResult::Continue;
2217         }
2218
2219         SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2220         {
2221                 auto routine = state->routine;
2222                 auto &type = getType(insn.word(1));
2223                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2224                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2225
2226                 switch (extInstIndex)
2227                 {
2228                 case GLSLstd450FAbs:
2229                 {
2230                         auto src = GenericValue(this, routine, insn.word(5));
2231                         for (auto i = 0u; i < type.sizeInComponents; i++)
2232                         {
2233                                 dst.move(i, Abs(src.Float(i)));
2234                         }
2235                         break;
2236                 }
2237                 case GLSLstd450SAbs:
2238                 {
2239                         auto src = GenericValue(this, routine, insn.word(5));
2240                         for (auto i = 0u; i < type.sizeInComponents; i++)
2241                         {
2242                                 dst.move(i, Abs(src.Int(i)));
2243                         }
2244                         break;
2245                 }
2246                 case GLSLstd450Cross:
2247                 {
2248                         auto lhs = GenericValue(this, routine, insn.word(5));
2249                         auto rhs = GenericValue(this, routine, insn.word(6));
2250                         dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2251                         dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2252                         dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2253                         break;
2254                 }
2255                 case GLSLstd450Floor:
2256                 {
2257                         auto src = GenericValue(this, routine, insn.word(5));
2258                         for (auto i = 0u; i < type.sizeInComponents; i++)
2259                         {
2260                                 dst.move(i, Floor(src.Float(i)));
2261                         }
2262                         break;
2263                 }
2264                 case GLSLstd450Trunc:
2265                 {
2266                         auto src = GenericValue(this, routine, insn.word(5));
2267                         for (auto i = 0u; i < type.sizeInComponents; i++)
2268                         {
2269                                 dst.move(i, Trunc(src.Float(i)));
2270                         }
2271                         break;
2272                 }
2273                 case GLSLstd450Ceil:
2274                 {
2275                         auto src = GenericValue(this, routine, insn.word(5));
2276                         for (auto i = 0u; i < type.sizeInComponents; i++)
2277                         {
2278                                 dst.move(i, Ceil(src.Float(i)));
2279                         }
2280                         break;
2281                 }
2282                 case GLSLstd450Fract:
2283                 {
2284                         auto src = GenericValue(this, routine, insn.word(5));
2285                         for (auto i = 0u; i < type.sizeInComponents; i++)
2286                         {
2287                                 dst.move(i, Frac(src.Float(i)));
2288                         }
2289                         break;
2290                 }
2291                 case GLSLstd450Round:
2292                 {
2293                         auto src = GenericValue(this, routine, insn.word(5));
2294                         for (auto i = 0u; i < type.sizeInComponents; i++)
2295                         {
2296                                 dst.move(i, Round(src.Float(i)));
2297                         }
2298                         break;
2299                 }
2300                 case GLSLstd450RoundEven:
2301                 {
2302                         auto src = GenericValue(this, routine, insn.word(5));
2303                         for (auto i = 0u; i < type.sizeInComponents; i++)
2304                         {
2305                                 auto x = Round(src.Float(i));
2306                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2307                                 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2308                                                 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2309                         }
2310                         break;
2311                 }
2312                 case GLSLstd450FMin:
2313                 {
2314                         auto lhs = GenericValue(this, routine, insn.word(5));
2315                         auto rhs = GenericValue(this, routine, insn.word(6));
2316                         for (auto i = 0u; i < type.sizeInComponents; i++)
2317                         {
2318                                 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2319                         }
2320                         break;
2321                 }
2322                 case GLSLstd450FMax:
2323                 {
2324                         auto lhs = GenericValue(this, routine, insn.word(5));
2325                         auto rhs = GenericValue(this, routine, insn.word(6));
2326                         for (auto i = 0u; i < type.sizeInComponents; i++)
2327                         {
2328                                 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2329                         }
2330                         break;
2331                 }
2332                 case GLSLstd450SMin:
2333                 {
2334                         auto lhs = GenericValue(this, routine, insn.word(5));
2335                         auto rhs = GenericValue(this, routine, insn.word(6));
2336                         for (auto i = 0u; i < type.sizeInComponents; i++)
2337                         {
2338                                 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2339                         }
2340                         break;
2341                 }
2342                 case GLSLstd450SMax:
2343                 {
2344                         auto lhs = GenericValue(this, routine, insn.word(5));
2345                         auto rhs = GenericValue(this, routine, insn.word(6));
2346                         for (auto i = 0u; i < type.sizeInComponents; i++)
2347                         {
2348                                 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2349                         }
2350                         break;
2351                 }
2352                 case GLSLstd450UMin:
2353                 {
2354                         auto lhs = GenericValue(this, routine, insn.word(5));
2355                         auto rhs = GenericValue(this, routine, insn.word(6));
2356                         for (auto i = 0u; i < type.sizeInComponents; i++)
2357                         {
2358                                 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2359                         }
2360                         break;
2361                 }
2362                 case GLSLstd450UMax:
2363                 {
2364                         auto lhs = GenericValue(this, routine, insn.word(5));
2365                         auto rhs = GenericValue(this, routine, insn.word(6));
2366                         for (auto i = 0u; i < type.sizeInComponents; i++)
2367                         {
2368                                 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2369                         }
2370                         break;
2371                 }
2372                 case GLSLstd450Step:
2373                 {
2374                         auto edge = GenericValue(this, routine, insn.word(5));
2375                         auto x = GenericValue(this, routine, insn.word(6));
2376                         for (auto i = 0u; i < type.sizeInComponents; i++)
2377                         {
2378                                 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2379                         }
2380                         break;
2381                 }
2382                 case GLSLstd450SmoothStep:
2383                 {
2384                         auto edge0 = GenericValue(this, routine, insn.word(5));
2385                         auto edge1 = GenericValue(this, routine, insn.word(6));
2386                         auto x = GenericValue(this, routine, insn.word(7));
2387                         for (auto i = 0u; i < type.sizeInComponents; i++)
2388                         {
2389                                 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2390                                                 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2391                                 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2392                         }
2393                         break;
2394                 }
2395                 case GLSLstd450FMix:
2396                 {
2397                         auto x = GenericValue(this, routine, insn.word(5));
2398                         auto y = GenericValue(this, routine, insn.word(6));
2399                         auto a = GenericValue(this, routine, insn.word(7));
2400                         for (auto i = 0u; i < type.sizeInComponents; i++)
2401                         {
2402                                 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2403                         }
2404                         break;
2405                 }
2406                 case GLSLstd450FClamp:
2407                 {
2408                         auto x = GenericValue(this, routine, insn.word(5));
2409                         auto minVal = GenericValue(this, routine, insn.word(6));
2410                         auto maxVal = GenericValue(this, routine, insn.word(7));
2411                         for (auto i = 0u; i < type.sizeInComponents; i++)
2412                         {
2413                                 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2414                         }
2415                         break;
2416                 }
2417                 case GLSLstd450SClamp:
2418                 {
2419                         auto x = GenericValue(this, routine, insn.word(5));
2420                         auto minVal = GenericValue(this, routine, insn.word(6));
2421                         auto maxVal = GenericValue(this, routine, insn.word(7));
2422                         for (auto i = 0u; i < type.sizeInComponents; i++)
2423                         {
2424                                 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2425                         }
2426                         break;
2427                 }
2428                 case GLSLstd450UClamp:
2429                 {
2430                         auto x = GenericValue(this, routine, insn.word(5));
2431                         auto minVal = GenericValue(this, routine, insn.word(6));
2432                         auto maxVal = GenericValue(this, routine, insn.word(7));
2433                         for (auto i = 0u; i < type.sizeInComponents; i++)
2434                         {
2435                                 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2436                         }
2437                         break;
2438                 }
2439                 case GLSLstd450FSign:
2440                 {
2441                         auto src = GenericValue(this, routine, insn.word(5));
2442                         for (auto i = 0u; i < type.sizeInComponents; i++)
2443                         {
2444                                 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2445                                 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2446                                 dst.move(i, neg | pos);
2447                         }
2448                         break;
2449                 }
2450                 case GLSLstd450SSign:
2451                 {
2452                         auto src = GenericValue(this, routine, insn.word(5));
2453                         for (auto i = 0u; i < type.sizeInComponents; i++)
2454                         {
2455                                 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2456                                 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2457                                 dst.move(i, neg | pos);
2458                         }
2459                         break;
2460                 }
2461                 case GLSLstd450Reflect:
2462                 {
2463                         auto I = GenericValue(this, routine, insn.word(5));
2464                         auto N = GenericValue(this, routine, insn.word(6));
2465
2466                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2467
2468                         for (auto i = 0u; i < type.sizeInComponents; i++)
2469                         {
2470                                 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2471                         }
2472                         break;
2473                 }
2474                 case GLSLstd450Refract:
2475                 {
2476                         auto I = GenericValue(this, routine, insn.word(5));
2477                         auto N = GenericValue(this, routine, insn.word(6));
2478                         auto eta = GenericValue(this, routine, insn.word(7));
2479
2480                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2481                         SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2482                         SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2483                         SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2484
2485                         for (auto i = 0u; i < type.sizeInComponents; i++)
2486                         {
2487                                 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2488                         }
2489                         break;
2490                 }
2491                 case GLSLstd450FaceForward:
2492                 {
2493                         auto N = GenericValue(this, routine, insn.word(5));
2494                         auto I = GenericValue(this, routine, insn.word(6));
2495                         auto Nref = GenericValue(this, routine, insn.word(7));
2496
2497                         SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2498                         SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2499
2500                         for (auto i = 0u; i < type.sizeInComponents; i++)
2501                         {
2502                                 auto n = N.Float(i);
2503                                 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2504                         }
2505                         break;
2506                 }
2507                 case GLSLstd450Length:
2508                 {
2509                         auto x = GenericValue(this, routine, insn.word(5));
2510                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2511
2512                         dst.move(0, Sqrt(d));
2513                         break;
2514                 }
2515                 case GLSLstd450Normalize:
2516                 {
2517                         auto x = GenericValue(this, routine, insn.word(5));
2518                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2519                         SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2520
2521                         for (auto i = 0u; i < type.sizeInComponents; i++)
2522                         {
2523                                 dst.move(i, invLength * x.Float(i));
2524                         }
2525                         break;
2526                 }
2527                 case GLSLstd450Distance:
2528                 {
2529                         auto p0 = GenericValue(this, routine, insn.word(5));
2530                         auto p1 = GenericValue(this, routine, insn.word(6));
2531                         auto p0Type = getType(getObject(insn.word(5)).type);
2532
2533                         // sqrt(dot(p0-p1, p0-p1))
2534                         SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2535
2536                         for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2537                         {
2538                                 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2539                         }
2540
2541                         dst.move(0, Sqrt(d));
2542                         break;
2543                 }
2544                 default:
2545                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2546                 }
2547
2548                 return EmitResult::Continue;
2549         }
2550
2551         std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2552         {
2553                 switch(memorySemantics)
2554                 {
2555                 case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
2556                 case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
2557                 case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
2558                 case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
2559                 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2560                 default:
2561                         UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2562                         return std::memory_order_acq_rel;
2563                 }
2564         }
2565
2566         SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2567         {
2568                 SIMD::Float d = x.Float(0) * y.Float(0);
2569
2570                 for (auto i = 1u; i < numComponents; i++)
2571                 {
2572                         d += x.Float(i) * y.Float(i);
2573                 }
2574
2575                 return d;
2576         }
2577
2578         SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2579         {
2580                 auto routine = state->routine;
2581                 auto &type = getType(insn.word(1));
2582                 ASSERT(type.sizeInComponents == 1);
2583                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2584                 auto &srcType = getType(getObject(insn.word(3)).type);
2585                 auto src = GenericValue(this, routine, insn.word(3));
2586
2587                 SIMD::UInt result = src.UInt(0);
2588
2589                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2590                 {
2591                         result |= src.UInt(i);
2592                 }
2593
2594                 dst.move(0, result);
2595                 return EmitResult::Continue;
2596         }
2597
2598         SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2599         {
2600                 auto routine = state->routine;
2601                 auto &type = getType(insn.word(1));
2602                 ASSERT(type.sizeInComponents == 1);
2603                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2604                 auto &srcType = getType(getObject(insn.word(3)).type);
2605                 auto src = GenericValue(this, routine, insn.word(3));
2606
2607                 SIMD::UInt result = src.UInt(0);
2608
2609                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2610                 {
2611                         result &= src.UInt(i);
2612                 }
2613
2614                 dst.move(0, result);
2615                 return EmitResult::Continue;
2616         }
2617
2618         SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2619         {
2620                 auto target = Block::ID(insn.word(1));
2621                 auto edge = Block::Edge{state->currentBlock, target};
2622                 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2623                 return EmitResult::Terminator;
2624         }
2625
2626         SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2627         {
2628                 auto block = getBlock(state->currentBlock);
2629                 ASSERT(block.branchInstruction == insn);
2630
2631                 auto condId = Object::ID(block.branchInstruction.word(1));
2632                 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2633                 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2634
2635                 auto cond = GenericValue(this, state->routine, condId);
2636                 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2637
2638                 // TODO: Optimize for case where all lanes take same path.
2639
2640                 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2641                 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2642
2643                 return EmitResult::Terminator;
2644         }
2645
2646         SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2647         {
2648                 auto block = getBlock(state->currentBlock);
2649                 ASSERT(block.branchInstruction == insn);
2650
2651                 auto selId = Object::ID(block.branchInstruction.word(1));
2652
2653                 auto sel = GenericValue(this, state->routine, selId);
2654                 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2655
2656                 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2657
2658                 // TODO: Optimize for case where all lanes take same path.
2659
2660                 SIMD::Int defaultLaneMask = state->activeLaneMask();
2661
2662                 // Gather up the case label matches and calculate defaultLaneMask.
2663                 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2664                 caseLabelMatches.reserve(numCases);
2665                 for (uint32_t i = 0; i < numCases; i++)
2666                 {
2667                         auto label = block.branchInstruction.word(i * 2 + 3);
2668                         auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2669                         auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2670                         state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2671                         defaultLaneMask &= ~caseLabelMatch;
2672                 }
2673
2674                 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2675                 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2676
2677                 return EmitResult::Terminator;
2678         }
2679
2680         SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2681         {
2682                 // TODO: Log something in this case?
2683                 state->setActiveLaneMask(SIMD::Int(0));
2684                 return EmitResult::Terminator;
2685         }
2686
2687         SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2688         {
2689                 state->setActiveLaneMask(SIMD::Int(0));
2690                 return EmitResult::Terminator;
2691         }
2692
2693         SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2694         {
2695                 auto routine = state->routine;
2696                 auto typeId = Type::ID(insn.word(1));
2697                 auto type = getType(typeId);
2698                 auto objectId = Object::ID(insn.word(2));
2699
2700                 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2701
2702                 bool first = true;
2703                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2704                 {
2705                         auto varId = Object::ID(insn.word(w + 0));
2706                         auto blockId = Block::ID(insn.word(w + 1));
2707
2708                         auto in = GenericValue(this, routine, varId);
2709                         auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2710
2711                         for (uint32_t i = 0; i < type.sizeInComponents; i++)
2712                         {
2713                                 auto inMasked = in.Int(i) & mask;
2714                                 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2715                         }
2716                         first = false;
2717                 }
2718
2719                 return EmitResult::Continue;
2720         }
2721
2722         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2723         {
2724                 for (auto insn : *this)
2725                 {
2726                         switch (insn.opcode())
2727                         {
2728                         case spv::OpVariable:
2729                         {
2730                                 Object::ID resultId = insn.word(2);
2731                                 auto &object = getObject(resultId);
2732                                 auto &objectTy = getType(object.type);
2733                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2734                                 {
2735                                         auto &dst = routine->getValue(resultId);
2736                                         int offset = 0;
2737                                         VisitInterface(resultId,
2738                                                                    [&](Decorations const &d, AttribType type) {
2739                                                                            auto scalarSlot = d.Location << 2 | d.Component;
2740                                                                            routine->outputs[scalarSlot] = dst[offset++];
2741                                                                    });
2742                                 }
2743                                 break;
2744                         }
2745                         default:
2746                                 break;
2747                         }
2748                 }
2749         }
2750
2751         SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
2752         {
2753                 // Default to a Simple, this may change later.
2754                 kind = Block::Simple;
2755
2756                 // Walk the instructions to find the last two of the block.
2757                 InsnIterator insns[2];
2758                 for (auto insn : *this)
2759                 {
2760                         insns[0] = insns[1];
2761                         insns[1] = insn;
2762                 }
2763
2764                 switch (insns[1].opcode())
2765                 {
2766                         case spv::OpBranch:
2767                                 branchInstruction = insns[1];
2768                                 outs.emplace(Block::ID(branchInstruction.word(1)));
2769
2770                                 switch (insns[0].opcode())
2771                                 {
2772                                         case spv::OpLoopMerge:
2773                                                 kind = Loop;
2774                                                 mergeInstruction = insns[0];
2775                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2776                                                 continueTarget = Block::ID(mergeInstruction.word(2));
2777                                                 break;
2778
2779                                         default:
2780                                                 kind = Block::Simple;
2781                                                 break;
2782                                 }
2783                                 break;
2784
2785                         case spv::OpBranchConditional:
2786                                 branchInstruction = insns[1];
2787                                 outs.emplace(Block::ID(branchInstruction.word(2)));
2788                                 outs.emplace(Block::ID(branchInstruction.word(3)));
2789
2790                                 switch (insns[0].opcode())
2791                                 {
2792                                         case spv::OpSelectionMerge:
2793                                                 kind = StructuredBranchConditional;
2794                                                 mergeInstruction = insns[0];
2795                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2796                                                 break;
2797
2798                                         case spv::OpLoopMerge:
2799                                                 kind = Loop;
2800                                                 mergeInstruction = insns[0];
2801                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2802                                                 continueTarget = Block::ID(mergeInstruction.word(2));
2803                                                 break;
2804
2805                                         default:
2806                                                 kind = UnstructuredBranchConditional;
2807                                                 break;
2808                                 }
2809                                 break;
2810
2811                         case spv::OpSwitch:
2812                                 branchInstruction = insns[1];
2813                                 outs.emplace(Block::ID(branchInstruction.word(2)));
2814                                 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
2815                                 {
2816                                         outs.emplace(Block::ID(branchInstruction.word(w)));
2817                                 }
2818
2819                                 switch (insns[0].opcode())
2820                                 {
2821                                         case spv::OpSelectionMerge:
2822                                                 kind = StructuredSwitch;
2823                                                 mergeInstruction = insns[0];
2824                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2825                                                 break;
2826
2827                                         default:
2828                                                 kind = UnstructuredSwitch;
2829                                                 break;
2830                                 }
2831                                 break;
2832
2833                         default:
2834                                 break;
2835                 }
2836         }
2837
2838         void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
2839         {
2840                 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
2841         }
2842
2843         void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
2844         {
2845                 auto edge = Block::Edge{from, to};
2846                 auto it = edgeActiveLaneMasks.find(edge);
2847                 if (it == edgeActiveLaneMasks.end())
2848                 {
2849                         edgeActiveLaneMasks.emplace(edge, mask);
2850                 }
2851                 else
2852                 {
2853                         auto combined = it->second | mask;
2854                         edgeActiveLaneMasks.erase(edge);
2855                         edgeActiveLaneMasks.emplace(edge, combined);
2856                 }
2857         }
2858
2859         RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
2860         {
2861                 auto edge = Block::Edge{from, to};
2862                 auto it = edgeActiveLaneMasks.find(edge);
2863                 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
2864                 return it->second;
2865         }
2866
2867         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
2868                 pipelineLayout(pipelineLayout)
2869         {
2870         }
2871
2872 }