src/Pipeline/SpirvShader.cpp

   1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //    http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 #include <spirv/unified1/spirv.hpp>
  16 #include <spirv/unified1/GLSL.std.450.h>
  17 #include "SpirvShader.hpp"
  18 #include "System/Math.hpp"
  19 #include "Vulkan/VkBuffer.hpp"
  20 #include "Vulkan/VkDebug.hpp"
  21 #include "Vulkan/VkPipelineLayout.hpp"
  22 #include "Device/Config.hpp"
  23
  24 #include <queue>
  25
  26 #ifdef Bool
  27 #undef Bool // b/127920555
  28 #endif
  29
  30 namespace
  31 {
  32         rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
  33         {
  34                 return rr::SignMask(ints) != 0;
  35         }
  36
  37         rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
  38         {
  39                 return rr::SignMask(~ints) != 0;
  40         }
  41 }
  42
  43 namespace sw
  44 {
  45         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
  46
  47         SpirvShader::SpirvShader(InsnStore const &insns)
  48                         : insns{insns}, inputs{MAX_INTERFACE_COMPONENTS},
  49                           outputs{MAX_INTERFACE_COMPONENTS},
  50                           serialID{serialCounter++}, modes{}
  51         {
  52                 ASSERT(insns.size() > 0);
  53
  54                 // Simplifying assumptions (to be satisfied by earlier transformations)
  55                 // - There is exactly one entrypoint in the module, and it's the one we want
  56                 // - The only input/output OpVariables present are those used by the entrypoint
  57
  58                 Block::ID currentBlock;
  59                 InsnIterator blockStart;
  60
  61                 for (auto insn : *this)
  62                 {
  63                         switch (insn.opcode())
  64                         {
  65                         case spv::OpExecutionMode:
  66                                 ProcessExecutionMode(insn);
  67                                 break;
  68
  69                         case spv::OpDecorate:
  70                         {
  71                                 TypeOrObjectID targetId = insn.word(1);
  72                                 auto decoration = static_cast<spv::Decoration>(insn.word(2));
  73                                 decorations[targetId].Apply(
  74                                                 decoration,
  75                                                 insn.wordCount() > 3 ? insn.word(3) : 0);
  76
  77                                 if (decoration == spv::DecorationCentroid)
  78                                         modes.NeedsCentroid = true;
  79                                 break;
  80                         }
  81
  82                         case spv::OpMemberDecorate:
  83                         {
  84                                 Type::ID targetId = insn.word(1);
  85                                 auto memberIndex = insn.word(2);
  86                                 auto &d = memberDecorations[targetId];
  87                                 if (memberIndex >= d.size())
  88                                         d.resize(memberIndex + 1);    // on demand; exact size would require another pass...
  89                                 auto decoration = static_cast<spv::Decoration>(insn.word(3));
  90                                 d[memberIndex].Apply(
  91                                                 decoration,
  92                                                 insn.wordCount() > 4 ? insn.word(4) : 0);
  93
  94                                 if (decoration == spv::DecorationCentroid)
  95                                         modes.NeedsCentroid = true;
  96                                 break;
  97                         }
  98
  99                         case spv::OpDecorationGroup:
 100                                 // Nothing to do here. We don't need to record the definition of the group; we'll just have
 101                                 // the bundle of decorations float around. If we were to ever walk the decorations directly,
 102                                 // we might think about introducing this as a real Object.
 103                                 break;
 104
 105                         case spv::OpGroupDecorate:
 106                         {
 107                                 auto const &srcDecorations = decorations[insn.word(1)];
 108                                 for (auto i = 2u; i < insn.wordCount(); i++)
 109                                 {
 110                                         // remaining operands are targets to apply the group to.
 111                                         decorations[insn.word(i)].Apply(srcDecorations);
 112                                 }
 113                                 break;
 114                         }
 115
 116                         case spv::OpGroupMemberDecorate:
 117                         {
 118                                 auto const &srcDecorations = decorations[insn.word(1)];
 119                                 for (auto i = 2u; i < insn.wordCount(); i += 2)
 120                                 {
 121                                         // remaining operands are pairs of <id>, literal for members to apply to.
 122                                         auto &d = memberDecorations[insn.word(i)];
 123                                         auto memberIndex = insn.word(i + 1);
 124                                         if (memberIndex >= d.size())
 125                                                 d.resize(memberIndex + 1);    // on demand resize, see above...
 126                                         d[memberIndex].Apply(srcDecorations);
 127                                 }
 128                                 break;
 129                         }
 130
 131                         case spv::OpLabel:
 132                         {
 133                                 ASSERT(currentBlock.value() == 0);
 134                                 currentBlock = Block::ID(insn.word(1));
 135                                 blockStart = insn;
 136                                 break;
 137                         }
 138
 139                         // Branch Instructions (subset of Termination Instructions):
 140                         case spv::OpBranch:
 141                         case spv::OpBranchConditional:
 142                         case spv::OpSwitch:
 143                         case spv::OpReturn:
 144                         // fallthrough
 145
 146                         // Termination instruction:
 147                         case spv::OpKill:
 148                         case spv::OpUnreachable:
 149                         {
 150                                 ASSERT(currentBlock.value() != 0);
 151                                 auto blockEnd = insn; blockEnd++;
 152                                 blocks[currentBlock] = Block(blockStart, blockEnd);
 153                                 currentBlock = Block::ID(0);
 154
 155                                 if (insn.opcode() == spv::OpKill)
 156                                 {
 157                                         modes.ContainsKill = true;
 158                                 }
 159                                 break;
 160                         }
 161
 162                         case spv::OpLoopMerge:
 163                         case spv::OpSelectionMerge:
 164                                 break; // Nothing to do in analysis pass.
 165
 166                         case spv::OpTypeVoid:
 167                         case spv::OpTypeBool:
 168                         case spv::OpTypeInt:
 169                         case spv::OpTypeFloat:
 170                         case spv::OpTypeVector:
 171                         case spv::OpTypeMatrix:
 172                         case spv::OpTypeImage:
 173                         case spv::OpTypeSampler:
 174                         case spv::OpTypeSampledImage:
 175                         case spv::OpTypeArray:
 176                         case spv::OpTypeRuntimeArray:
 177                         case spv::OpTypeStruct:
 178                         case spv::OpTypePointer:
 179                         case spv::OpTypeFunction:
 180                                 DeclareType(insn);
 181                                 break;
 182
 183                         case spv::OpVariable:
 184                         {
 185                                 Type::ID typeId = insn.word(1);
 186                                 Object::ID resultId = insn.word(2);
 187                                 auto storageClass = static_cast<spv::StorageClass>(insn.word(3));
 188                                 if (insn.wordCount() > 4)
 189                                         UNIMPLEMENTED("Variable initializers not yet supported");
 190
 191                                 auto &object = defs[resultId];
 192                                 object.kind = Object::Kind::Variable;
 193                                 object.definition = insn;
 194                                 object.type = typeId;
 195                                 object.pointerBase = insn.word(2);      // base is itself
 196
 197                                 ASSERT(getType(typeId).storageClass == storageClass);
 198
 199                                 switch (storageClass)
 200                                 {
 201                                 case spv::StorageClassInput:
 202                                 case spv::StorageClassOutput:
 203                                         ProcessInterfaceVariable(object);
 204                                         break;
 205                                 case spv::StorageClassUniform:
 206                                 case spv::StorageClassStorageBuffer:
 207                                 case spv::StorageClassPushConstant:
 208                                         object.kind = Object::Kind::PhysicalPointer;
 209                                         break;
 210
 211                                 case spv::StorageClassPrivate:
 212                                 case spv::StorageClassFunction:
 213                                         break; // Correctly handled.
 214
 215                                 case spv::StorageClassUniformConstant:
 216                                 case spv::StorageClassWorkgroup:
 217                                 case spv::StorageClassCrossWorkgroup:
 218                                 case spv::StorageClassGeneric:
 219                                 case spv::StorageClassAtomicCounter:
 220                                 case spv::StorageClassImage:
 221                                         UNIMPLEMENTED("StorageClass %d not yet implemented", (int)storageClass);
 222                                         break;
 223
 224                                 default:
 225                                         UNREACHABLE("Unexpected StorageClass %d", storageClass); // See Appendix A of the Vulkan spec.
 226                                         break;
 227                                 }
 228                                 break;
 229                         }
 230
 231                         case spv::OpConstant:
 232                                 CreateConstant(insn).constantValue[0] = insn.word(3);
 233                                 break;
 234                         case spv::OpConstantFalse:
 235                                 CreateConstant(insn).constantValue[0] = 0;              // represent boolean false as zero
 236                                 break;
 237                         case spv::OpConstantTrue:
 238                                 CreateConstant(insn).constantValue[0] = ~0u;    // represent boolean true as all bits set
 239                                 break;
 240                         case spv::OpConstantNull:
 241                         case spv::OpUndef:
 242                         {
 243                                 // TODO: consider a real LLVM-level undef. For now, zero is a perfectly good value.
 244                                 // OpConstantNull forms a constant of arbitrary type, all zeros.
 245                                 auto &object = CreateConstant(insn);
 246                                 auto &objectTy = getType(object.type);
 247                                 for (auto i = 0u; i < objectTy.sizeInComponents; i++)
 248                                 {
 249                                         object.constantValue[i] = 0;
 250                                 }
 251                                 break;
 252                         }
 253                         case spv::OpConstantComposite:
 254                         {
 255                                 auto &object = CreateConstant(insn);
 256                                 auto offset = 0u;
 257                                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
 258                                 {
 259                                         auto &constituent = getObject(insn.word(i + 3));
 260                                         auto &constituentTy = getType(constituent.type);
 261                                         for (auto j = 0u; j < constituentTy.sizeInComponents; j++)
 262                                                 object.constantValue[offset++] = constituent.constantValue[j];
 263                                 }
 264
 265                                 auto objectId = Object::ID(insn.word(2));
 266                                 auto decorationsIt = decorations.find(objectId);
 267                                 if (decorationsIt != decorations.end() &&
 268                                         decorationsIt->second.BuiltIn == spv::BuiltInWorkgroupSize)
 269                                 {
 270                                         // https://www.khronos.org/registry/vulkan/specs/1.1/html/vkspec.html#interfaces-builtin-variables :
 271                                         // Decorating an object with the WorkgroupSize built-in
 272                                         // decoration will make that object contain the dimensions
 273                                         // of a local workgroup. If an object is decorated with the
 274                                         // WorkgroupSize decoration, this must take precedence over
 275                                         // any execution mode set for LocalSize.
 276                                         // The object decorated with WorkgroupSize must be declared
 277                                         // as a three-component vector of 32-bit integers.
 278                                         ASSERT(getType(object.type).sizeInComponents == 3);
 279                                         modes.WorkgroupSizeX = object.constantValue[0];
 280                                         modes.WorkgroupSizeY = object.constantValue[1];
 281                                         modes.WorkgroupSizeZ = object.constantValue[2];
 282                                 }
 283                                 break;
 284                         }
 285
 286                         case spv::OpCapability:
 287                                 break; // Various capabilities will be declared, but none affect our code generation at this point.
 288                         case spv::OpMemoryModel:
 289                                 break; // Memory model does not affect our code generation until we decide to do Vulkan Memory Model support.
 290
 291                         case spv::OpEntryPoint:
 292                                 break;
 293                         case spv::OpFunction:
 294                                 ASSERT(mainBlockId.value() == 0); // Multiple functions found
 295                                 // Scan forward to find the function's label.
 296                                 for (auto it = insn; it != end() && mainBlockId.value() == 0; it++)
 297                                 {
 298                                         switch (it.opcode())
 299                                         {
 300                                         case spv::OpFunction:
 301                                         case spv::OpFunctionParameter:
 302                                                 break;
 303                                         case spv::OpLabel:
 304                                                 mainBlockId = Block::ID(it.word(1));
 305                                                 break;
 306                                         default:
 307                                                 WARN("Unexpected opcode '%s' following OpFunction", OpcodeName(it.opcode()).c_str());
 308                                         }
 309                                 }
 310                                 ASSERT(mainBlockId.value() != 0); // Function's OpLabel not found
 311                                 break;
 312                         case spv::OpFunctionEnd:
 313                                 // Due to preprocessing, the entrypoint and its function provide no value.
 314                                 break;
 315                         case spv::OpExtInstImport:
 316                                 // We will only support the GLSL 450 extended instruction set, so no point in tracking the ID we assign it.
 317                                 // Valid shaders will not attempt to import any other instruction sets.
 318                                 if (0 != strcmp("GLSL.std.450", reinterpret_cast<char const *>(insn.wordPointer(2))))
 319                                 {
 320                                         UNIMPLEMENTED("Only GLSL extended instruction set is supported");
 321                                 }
 322                                 break;
 323                         case spv::OpName:
 324                         case spv::OpMemberName:
 325                         case spv::OpSource:
 326                         case spv::OpSourceContinued:
 327                         case spv::OpSourceExtension:
 328                         case spv::OpLine:
 329                         case spv::OpNoLine:
 330                         case spv::OpModuleProcessed:
 331                         case spv::OpString:
 332                                 // No semantic impact
 333                                 break;
 334
 335                         case spv::OpFunctionParameter:
 336                         case spv::OpFunctionCall:
 337                         case spv::OpSpecConstant:
 338                         case spv::OpSpecConstantComposite:
 339                         case spv::OpSpecConstantFalse:
 340                         case spv::OpSpecConstantOp:
 341                         case spv::OpSpecConstantTrue:
 342                                 // These should have all been removed by preprocessing passes. If we see them here,
 343                                 // our assumptions are wrong and we will probably generate wrong code.
 344                                 UNIMPLEMENTED("%s should have already been lowered.", OpcodeName(insn.opcode()).c_str());
 345                                 break;
 346
 347                         case spv::OpFConvert:
 348                         case spv::OpSConvert:
 349                         case spv::OpUConvert:
 350                                 UNIMPLEMENTED("No valid uses for Op*Convert until we support multiple bit widths");
 351                                 break;
 352
 353                         case spv::OpLoad:
 354                         case spv::OpAccessChain:
 355                         case spv::OpInBoundsAccessChain:
 356                         case spv::OpCompositeConstruct:
 357                         case spv::OpCompositeInsert:
 358                         case spv::OpCompositeExtract:
 359                         case spv::OpVectorShuffle:
 360                         case spv::OpVectorTimesScalar:
 361                         case spv::OpMatrixTimesScalar:
 362                         case spv::OpVectorExtractDynamic:
 363                         case spv::OpVectorInsertDynamic:
 364                         case spv::OpNot: // Unary ops
 365                         case spv::OpSNegate:
 366                         case spv::OpFNegate:
 367                         case spv::OpLogicalNot:
 368                         case spv::OpIAdd: // Binary ops
 369                         case spv::OpISub:
 370                         case spv::OpIMul:
 371                         case spv::OpSDiv:
 372                         case spv::OpUDiv:
 373                         case spv::OpFAdd:
 374                         case spv::OpFSub:
 375                         case spv::OpFMul:
 376                         case spv::OpFDiv:
 377                         case spv::OpFMod:
 378                         case spv::OpFRem:
 379                         case spv::OpFOrdEqual:
 380                         case spv::OpFUnordEqual:
 381                         case spv::OpFOrdNotEqual:
 382                         case spv::OpFUnordNotEqual:
 383                         case spv::OpFOrdLessThan:
 384                         case spv::OpFUnordLessThan:
 385                         case spv::OpFOrdGreaterThan:
 386                         case spv::OpFUnordGreaterThan:
 387                         case spv::OpFOrdLessThanEqual:
 388                         case spv::OpFUnordLessThanEqual:
 389                         case spv::OpFOrdGreaterThanEqual:
 390                         case spv::OpFUnordGreaterThanEqual:
 391                         case spv::OpSMod:
 392                         case spv::OpSRem:
 393                         case spv::OpUMod:
 394                         case spv::OpIEqual:
 395                         case spv::OpINotEqual:
 396                         case spv::OpUGreaterThan:
 397                         case spv::OpSGreaterThan:
 398                         case spv::OpUGreaterThanEqual:
 399                         case spv::OpSGreaterThanEqual:
 400                         case spv::OpULessThan:
 401                         case spv::OpSLessThan:
 402                         case spv::OpULessThanEqual:
 403                         case spv::OpSLessThanEqual:
 404                         case spv::OpShiftRightLogical:
 405                         case spv::OpShiftRightArithmetic:
 406                         case spv::OpShiftLeftLogical:
 407                         case spv::OpBitwiseOr:
 408                         case spv::OpBitwiseXor:
 409                         case spv::OpBitwiseAnd:
 410                         case spv::OpLogicalOr:
 411                         case spv::OpLogicalAnd:
 412                         case spv::OpLogicalEqual:
 413                         case spv::OpLogicalNotEqual:
 414                         case spv::OpUMulExtended:
 415                         case spv::OpSMulExtended:
 416                         case spv::OpDot:
 417                         case spv::OpConvertFToU:
 418                         case spv::OpConvertFToS:
 419                         case spv::OpConvertSToF:
 420                         case spv::OpConvertUToF:
 421                         case spv::OpBitcast:
 422                         case spv::OpSelect:
 423                         case spv::OpExtInst:
 424                         case spv::OpIsInf:
 425                         case spv::OpIsNan:
 426                         case spv::OpAny:
 427                         case spv::OpAll:
 428                         case spv::OpDPdx:
 429                         case spv::OpDPdxCoarse:
 430                         case spv::OpDPdy:
 431                         case spv::OpDPdyCoarse:
 432                         case spv::OpFwidth:
 433                         case spv::OpFwidthCoarse:
 434                         case spv::OpDPdxFine:
 435                         case spv::OpDPdyFine:
 436                         case spv::OpFwidthFine:
 437                         case spv::OpAtomicLoad:
 438                         case spv::OpPhi:
 439                                 // Instructions that yield an intermediate value
 440                         {
 441                                 Type::ID typeId = insn.word(1);
 442                                 Object::ID resultId = insn.word(2);
 443                                 auto &object = defs[resultId];
 444                                 object.type = typeId;
 445                                 object.kind = Object::Kind::Value;
 446                                 object.definition = insn;
 447
 448                                 if (insn.opcode() == spv::OpAccessChain || insn.opcode() == spv::OpInBoundsAccessChain)
 449                                 {
 450                                         // interior ptr has two parts:
 451                                         // - logical base ptr, common across all lanes and known at compile time
 452                                         // - per-lane offset
 453                                         Object::ID baseId = insn.word(3);
 454                                         object.pointerBase = getObject(baseId).pointerBase;
 455                                 }
 456                                 break;
 457                         }
 458
 459                         case spv::OpStore:
 460                         case spv::OpAtomicStore:
 461                                 // Don't need to do anything during analysis pass
 462                                 break;
 463
 464                         default:
 465                                 UNIMPLEMENTED("%s", OpcodeName(insn.opcode()).c_str());
 466                         }
 467                 }
 468
 469                 // Assign all Block::ins
 470                 for (auto &it : blocks)
 471                 {
 472                         auto &blockId = it.first;
 473                         auto &block = it.second;
 474                         for (auto &outId : block.outs)
 475                         {
 476                                 auto outIt = blocks.find(outId);
 477                                 ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
 478                                 auto &out = outIt->second;
 479                                 out.ins.emplace(blockId);
 480                         }
 481                 }
 482         }
 483
 484         void SpirvShader::DeclareType(InsnIterator insn)
 485         {
 486                 Type::ID resultId = insn.word(1);
 487
 488                 auto &type = types[resultId];
 489                 type.definition = insn;
 490                 type.sizeInComponents = ComputeTypeSize(insn);
 491
 492                 // A structure is a builtin block if it has a builtin
 493                 // member. All members of such a structure are builtins.
 494                 switch (insn.opcode())
 495                 {
 496                 case spv::OpTypeStruct:
 497                 {
 498                         auto d = memberDecorations.find(resultId);
 499                         if (d != memberDecorations.end())
 500                         {
 501                                 for (auto &m : d->second)
 502                                 {
 503                                         if (m.HasBuiltIn)
 504                                         {
 505                                                 type.isBuiltInBlock = true;
 506                                                 break;
 507                                         }
 508                                 }
 509                         }
 510                         break;
 511                 }
 512                 case spv::OpTypePointer:
 513                 {
 514                         Type::ID elementTypeId = insn.word(3);
 515                         type.element = elementTypeId;
 516                         type.isBuiltInBlock = getType(elementTypeId).isBuiltInBlock;
 517                         type.storageClass = static_cast<spv::StorageClass>(insn.word(2));
 518                         break;
 519                 }
 520                 case spv::OpTypeVector:
 521                 case spv::OpTypeMatrix:
 522                 case spv::OpTypeArray:
 523                 case spv::OpTypeRuntimeArray:
 524                 {
 525                         Type::ID elementTypeId = insn.word(2);
 526                         type.element = elementTypeId;
 527                         break;
 528                 }
 529                 default:
 530                         break;
 531                 }
 532         }
 533
 534         SpirvShader::Object& SpirvShader::CreateConstant(InsnIterator insn)
 535         {
 536                 Type::ID typeId = insn.word(1);
 537                 Object::ID resultId = insn.word(2);
 538                 auto &object = defs[resultId];
 539                 auto &objectTy = getType(typeId);
 540                 object.type = typeId;
 541                 object.kind = Object::Kind::Constant;
 542                 object.definition = insn;
 543                 object.constantValue = std::unique_ptr<uint32_t[]>(new uint32_t[objectTy.sizeInComponents]);
 544                 return object;
 545         }
 546
 547         void SpirvShader::ProcessInterfaceVariable(Object &object)
 548         {
 549                 auto &objectTy = getType(object.type);
 550                 ASSERT(objectTy.storageClass == spv::StorageClassInput || objectTy.storageClass == spv::StorageClassOutput);
 551
 552                 ASSERT(objectTy.opcode() == spv::OpTypePointer);
 553                 auto pointeeTy = getType(objectTy.element);
 554
 555                 auto &builtinInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputBuiltins : outputBuiltins;
 556                 auto &userDefinedInterface = (objectTy.storageClass == spv::StorageClassInput) ? inputs : outputs;
 557
 558                 ASSERT(object.opcode() == spv::OpVariable);
 559                 Object::ID resultId = object.definition.word(2);
 560
 561                 if (objectTy.isBuiltInBlock)
 562                 {
 563                         // walk the builtin block, registering each of its members separately.
 564                         auto m = memberDecorations.find(objectTy.element);
 565                         ASSERT(m != memberDecorations.end());        // otherwise we wouldn't have marked the type chain
 566                         auto &structType = pointeeTy.definition;
 567                         auto offset = 0u;
 568                         auto word = 2u;
 569                         for (auto &member : m->second)
 570                         {
 571                                 auto &memberType = getType(structType.word(word));
 572
 573                                 if (member.HasBuiltIn)
 574                                 {
 575                                         builtinInterface[member.BuiltIn] = {resultId, offset, memberType.sizeInComponents};
 576                                 }
 577
 578                                 offset += memberType.sizeInComponents;
 579                                 ++word;
 580                         }
 581                         return;
 582                 }
 583
 584                 auto d = decorations.find(resultId);
 585                 if (d != decorations.end() && d->second.HasBuiltIn)
 586                 {
 587                         builtinInterface[d->second.BuiltIn] = {resultId, 0, pointeeTy.sizeInComponents};
 588                 }
 589                 else
 590                 {
 591                         object.kind = Object::Kind::InterfaceVariable;
 592                         VisitInterface(resultId,
 593                                                    [&userDefinedInterface](Decorations const &d, AttribType type) {
 594                                                            // Populate a single scalar slot in the interface from a collection of decorations and the intended component type.
 595                                                            auto scalarSlot = (d.Location << 2) | d.Component;
 596                                                            ASSERT(scalarSlot >= 0 &&
 597                                                                           scalarSlot < static_cast<int32_t>(userDefinedInterface.size()));
 598
 599                                                            auto &slot = userDefinedInterface[scalarSlot];
 600                                                            slot.Type = type;
 601                                                            slot.Flat = d.Flat;
 602                                                            slot.NoPerspective = d.NoPerspective;
 603                                                            slot.Centroid = d.Centroid;
 604                                                    });
 605                 }
 606         }
 607
 608         void SpirvShader::ProcessExecutionMode(InsnIterator insn)
 609         {
 610                 auto mode = static_cast<spv::ExecutionMode>(insn.word(2));
 611                 switch (mode)
 612                 {
 613                 case spv::ExecutionModeEarlyFragmentTests:
 614                         modes.EarlyFragmentTests = true;
 615                         break;
 616                 case spv::ExecutionModeDepthReplacing:
 617                         modes.DepthReplacing = true;
 618                         break;
 619                 case spv::ExecutionModeDepthGreater:
 620                         modes.DepthGreater = true;
 621                         break;
 622                 case spv::ExecutionModeDepthLess:
 623                         modes.DepthLess = true;
 624                         break;
 625                 case spv::ExecutionModeDepthUnchanged:
 626                         modes.DepthUnchanged = true;
 627                         break;
 628                 case spv::ExecutionModeLocalSize:
 629                         modes.WorkgroupSizeX = insn.word(3);
 630                         modes.WorkgroupSizeY = insn.word(4);
 631                         modes.WorkgroupSizeZ = insn.word(5);
 632                         break;
 633                 case spv::ExecutionModeOriginUpperLeft:
 634                         // This is always the case for a Vulkan shader. Do nothing.
 635                         break;
 636                 default:
 637                         UNIMPLEMENTED("No other execution modes are permitted");
 638                 }
 639         }
 640
 641         uint32_t SpirvShader::ComputeTypeSize(InsnIterator insn)
 642         {
 643                 // Types are always built from the bottom up (with the exception of forward ptrs, which
 644                 // don't appear in Vulkan shaders. Therefore, we can always assume our component parts have
 645                 // already been described (and so their sizes determined)
 646                 switch (insn.opcode())
 647                 {
 648                 case spv::OpTypeVoid:
 649                 case spv::OpTypeSampler:
 650                 case spv::OpTypeImage:
 651                 case spv::OpTypeSampledImage:
 652                 case spv::OpTypeFunction:
 653                 case spv::OpTypeRuntimeArray:
 654                         // Objects that don't consume any space.
 655                         // Descriptor-backed objects currently only need exist at compile-time.
 656                         // Runtime arrays don't appear in places where their size would be interesting
 657                         return 0;
 658
 659                 case spv::OpTypeBool:
 660                 case spv::OpTypeFloat:
 661                 case spv::OpTypeInt:
 662                         // All the fundamental types are 1 component. If we ever add support for 8/16/64-bit components,
 663                         // we might need to change this, but only 32 bit components are required for Vulkan 1.1.
 664                         return 1;
 665
 666                 case spv::OpTypeVector:
 667                 case spv::OpTypeMatrix:
 668                         // Vectors and matrices both consume element count * element size.
 669                         return getType(insn.word(2)).sizeInComponents * insn.word(3);
 670
 671                 case spv::OpTypeArray:
 672                 {
 673                         // Element count * element size. Array sizes come from constant ids.
 674                         auto arraySize = GetConstantInt(insn.word(3));
 675                         return getType(insn.word(2)).sizeInComponents * arraySize;
 676                 }
 677
 678                 case spv::OpTypeStruct:
 679                 {
 680                         uint32_t size = 0;
 681                         for (uint32_t i = 2u; i < insn.wordCount(); i++)
 682                         {
 683                                 size += getType(insn.word(i)).sizeInComponents;
 684                         }
 685                         return size;
 686                 }
 687
 688                 case spv::OpTypePointer:
 689                         // Runtime representation of a pointer is a per-lane index.
 690                         // Note: clients are expected to look through the pointer if they want the pointee size instead.
 691                         return 1;
 692
 693                 default:
 694                         // Some other random insn.
 695                         UNIMPLEMENTED("Only types are supported");
 696                         return 0;
 697                 }
 698         }
 699
 700         bool SpirvShader::IsStorageInterleavedByLane(spv::StorageClass storageClass)
 701         {
 702                 switch (storageClass)
 703                 {
 704                 case spv::StorageClassUniform:
 705                 case spv::StorageClassStorageBuffer:
 706                 case spv::StorageClassPushConstant:
 707                         return false;
 708                 default:
 709                         return true;
 710                 }
 711         }
 712
 713         template<typename F>
 714         int SpirvShader::VisitInterfaceInner(Type::ID id, Decorations d, F f) const
 715         {
 716                 // Recursively walks variable definition and its type tree, taking into account
 717                 // any explicit Location or Component decorations encountered; where explicit
 718                 // Locations or Components are not specified, assigns them sequentially.
 719                 // Collected decorations are carried down toward the leaves and across
 720                 // siblings; Effect of decorations intentionally does not flow back up the tree.
 721                 //
 722                 // F is a functor to be called with the effective decoration set for every component.
 723                 //
 724                 // Returns the next available location, and calls f().
 725
 726                 // This covers the rules in Vulkan 1.1 spec, 14.1.4 Location Assignment.
 727
 728                 ApplyDecorationsForId(&d, id);
 729
 730                 auto const &obj = getType(id);
 731                 switch(obj.opcode())
 732                 {
 733                 case spv::OpTypePointer:
 734                         return VisitInterfaceInner<F>(obj.definition.word(3), d, f);
 735                 case spv::OpTypeMatrix:
 736                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Location++)
 737                         {
 738                                 // consumes same components of N consecutive locations
 739                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
 740                         }
 741                         return d.Location;
 742                 case spv::OpTypeVector:
 743                         for (auto i = 0u; i < obj.definition.word(3); i++, d.Component++)
 744                         {
 745                                 // consumes N consecutive components in the same location
 746                                 VisitInterfaceInner<F>(obj.definition.word(2), d, f);
 747                         }
 748                         return d.Location + 1;
 749                 case spv::OpTypeFloat:
 750                         f(d, ATTRIBTYPE_FLOAT);
 751                         return d.Location + 1;
 752                 case spv::OpTypeInt:
 753                         f(d, obj.definition.word(3) ? ATTRIBTYPE_INT : ATTRIBTYPE_UINT);
 754                         return d.Location + 1;
 755                 case spv::OpTypeBool:
 756                         f(d, ATTRIBTYPE_UINT);
 757                         return d.Location + 1;
 758                 case spv::OpTypeStruct:
 759                 {
 760                         // iterate over members, which may themselves have Location/Component decorations
 761                         for (auto i = 0u; i < obj.definition.wordCount() - 2; i++)
 762                         {
 763                                 ApplyDecorationsForIdMember(&d, id, i);
 764                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(i + 2), d, f);
 765                                 d.Component = 0;    // Implicit locations always have component=0
 766                         }
 767                         return d.Location;
 768                 }
 769                 case spv::OpTypeArray:
 770                 {
 771                         auto arraySize = GetConstantInt(obj.definition.word(3));
 772                         for (auto i = 0u; i < arraySize; i++)
 773                         {
 774                                 d.Location = VisitInterfaceInner<F>(obj.definition.word(2), d, f);
 775                         }
 776                         return d.Location;
 777                 }
 778                 default:
 779                         // Intentionally partial; most opcodes do not participate in type hierarchies
 780                         return 0;
 781                 }
 782         }
 783
 784         template<typename F>
 785         void SpirvShader::VisitInterface(Object::ID id, F f) const
 786         {
 787                 // Walk a variable definition and call f for each component in it.
 788                 Decorations d{};
 789                 ApplyDecorationsForId(&d, id);
 790
 791                 auto def = getObject(id).definition;
 792                 ASSERT(def.opcode() == spv::OpVariable);
 793                 VisitInterfaceInner<F>(def.word(1), d, f);
 794         }
 795
 796         SIMD::Int SpirvShader::WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
 797         {
 798                 // Produce a offset into external memory in sizeof(float) units
 799
 800                 int constantOffset = 0;
 801                 SIMD::Int dynamicOffset = SIMD::Int(0);
 802                 auto &baseObject = getObject(id);
 803                 Type::ID typeId = getType(baseObject.type).element;
 804                 Decorations d{};
 805                 ApplyDecorationsForId(&d, baseObject.type);
 806
 807                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
 808                 // Start with its offset and build from there.
 809                 if (baseObject.kind == Object::Kind::Value)
 810                 {
 811                         dynamicOffset += routine->getIntermediate(id).Int(0);
 812                 }
 813
 814                 for (auto i = 0u; i < numIndexes; i++)
 815                 {
 816                         auto & type = getType(typeId);
 817                         switch (type.definition.opcode())
 818                         {
 819                         case spv::OpTypeStruct:
 820                         {
 821                                 int memberIndex = GetConstantInt(indexIds[i]);
 822                                 ApplyDecorationsForIdMember(&d, typeId, memberIndex);
 823                                 ASSERT(d.HasOffset);
 824                                 constantOffset += d.Offset / sizeof(float);
 825                                 typeId = type.definition.word(2u + memberIndex);
 826                                 break;
 827                         }
 828                         case spv::OpTypeArray:
 829                         case spv::OpTypeRuntimeArray:
 830                         {
 831                                 // TODO: b/127950082: Check bounds.
 832                                 ApplyDecorationsForId(&d, typeId);
 833                                 ASSERT(d.HasArrayStride);
 834                                 auto & obj = getObject(indexIds[i]);
 835                                 if (obj.kind == Object::Kind::Constant)
 836                                         constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
 837                                 else
 838                                         dynamicOffset += SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
 839                                 typeId = type.element;
 840                                 break;
 841                         }
 842                         case spv::OpTypeMatrix:
 843                         {
 844                                 // TODO: b/127950082: Check bounds.
 845                                 ApplyDecorationsForId(&d, typeId);
 846                                 ASSERT(d.HasMatrixStride);
 847                                 auto & obj = getObject(indexIds[i]);
 848                                 if (obj.kind == Object::Kind::Constant)
 849                                         constantOffset += d.MatrixStride/sizeof(float) * GetConstantInt(indexIds[i]);
 850                                 else
 851                                         dynamicOffset += SIMD::Int(d.MatrixStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0);
 852                                 typeId = type.element;
 853                                 break;
 854                         }
 855                         case spv::OpTypeVector:
 856                         {
 857                                 auto & obj = getObject(indexIds[i]);
 858                                 if (obj.kind == Object::Kind::Constant)
 859                                         constantOffset += GetConstantInt(indexIds[i]);
 860                                 else
 861                                         dynamicOffset += routine->getIntermediate(indexIds[i]).Int(0);
 862                                 typeId = type.element;
 863                                 break;
 864                         }
 865                         default:
 866                                 UNIMPLEMENTED("Unexpected type '%s' in WalkExplicitLayoutAccessChain", OpcodeName(type.definition.opcode()).c_str());
 867                         }
 868                 }
 869
 870                 return dynamicOffset + SIMD::Int(constantOffset);
 871         }
 872
 873         SIMD::Int SpirvShader::WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
 874         {
 875                 // TODO: avoid doing per-lane work in some cases if we can?
 876                 // Produce a *component* offset into location-oriented memory
 877
 878                 int constantOffset = 0;
 879                 SIMD::Int dynamicOffset = SIMD::Int(0);
 880                 auto &baseObject = getObject(id);
 881                 Type::ID typeId = getType(baseObject.type).element;
 882
 883                 // The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
 884                 // Start with its offset and build from there.
 885                 if (baseObject.kind == Object::Kind::Value)
 886                 {
 887                         dynamicOffset += routine->getIntermediate(id).Int(0);
 888                 }
 889
 890                 for (auto i = 0u; i < numIndexes; i++)
 891                 {
 892                         auto & type = getType(typeId);
 893                         switch(type.opcode())
 894                         {
 895                         case spv::OpTypeStruct:
 896                         {
 897                                 int memberIndex = GetConstantInt(indexIds[i]);
 898                                 int offsetIntoStruct = 0;
 899                                 for (auto j = 0; j < memberIndex; j++) {
 900                                         auto memberType = type.definition.word(2u + j);
 901                                         offsetIntoStruct += getType(memberType).sizeInComponents;
 902                                 }
 903                                 constantOffset += offsetIntoStruct;
 904                                 typeId = type.definition.word(2u + memberIndex);
 905                                 break;
 906                         }
 907
 908                         case spv::OpTypeVector:
 909                         case spv::OpTypeMatrix:
 910                         case spv::OpTypeArray:
 911                         case spv::OpTypeRuntimeArray:
 912                         {
 913                                 // TODO: b/127950082: Check bounds.
 914                                 auto stride = getType(type.element).sizeInComponents;
 915                                 auto & obj = getObject(indexIds[i]);
 916                                 if (obj.kind == Object::Kind::Constant)
 917                                         constantOffset += stride * GetConstantInt(indexIds[i]);
 918                                 else
 919                                         dynamicOffset += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
 920                                 typeId = type.element;
 921                                 break;
 922                         }
 923
 924                         default:
 925                                 UNIMPLEMENTED("Unexpected type '%s' in WalkAccessChain", OpcodeName(type.opcode()).c_str());
 926                         }
 927                 }
 928
 929                 return dynamicOffset + SIMD::Int(constantOffset);
 930         }
 931
 932         uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
 933         {
 934                 uint32_t constantOffset = 0;
 935
 936                 for (auto i = 0u; i < numIndexes; i++)
 937                 {
 938                         auto & type = getType(typeId);
 939                         switch(type.opcode())
 940                         {
 941                         case spv::OpTypeStruct:
 942                         {
 943                                 int memberIndex = indexes[i];
 944                                 int offsetIntoStruct = 0;
 945                                 for (auto j = 0; j < memberIndex; j++) {
 946                                         auto memberType = type.definition.word(2u + j);
 947                                         offsetIntoStruct += getType(memberType).sizeInComponents;
 948                                 }
 949                                 constantOffset += offsetIntoStruct;
 950                                 typeId = type.definition.word(2u + memberIndex);
 951                                 break;
 952                         }
 953
 954                         case spv::OpTypeVector:
 955                         case spv::OpTypeMatrix:
 956                         case spv::OpTypeArray:
 957                         {
 958                                 auto elementType = type.definition.word(2);
 959                                 auto stride = getType(elementType).sizeInComponents;
 960                                 constantOffset += stride * indexes[i];
 961                                 typeId = elementType;
 962                                 break;
 963                         }
 964
 965                         default:
 966                                 UNIMPLEMENTED("Unexpected type in WalkLiteralAccessChain");
 967                         }
 968                 }
 969
 970                 return constantOffset;
 971         }
 972
 973         void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
 974         {
 975                 switch (decoration)
 976                 {
 977                 case spv::DecorationLocation:
 978                         HasLocation = true;
 979                         Location = static_cast<int32_t>(arg);
 980                         break;
 981                 case spv::DecorationComponent:
 982                         HasComponent = true;
 983                         Component = arg;
 984                         break;
 985                 case spv::DecorationDescriptorSet:
 986                         HasDescriptorSet = true;
 987                         DescriptorSet = arg;
 988                         break;
 989                 case spv::DecorationBinding:
 990                         HasBinding = true;
 991                         Binding = arg;
 992                         break;
 993                 case spv::DecorationBuiltIn:
 994                         HasBuiltIn = true;
 995                         BuiltIn = static_cast<spv::BuiltIn>(arg);
 996                         break;
 997                 case spv::DecorationFlat:
 998                         Flat = true;
 999                         break;
1000                 case spv::DecorationNoPerspective:
1001                         NoPerspective = true;
1002                         break;
1003                 case spv::DecorationCentroid:
1004                         Centroid = true;
1005                         break;
1006                 case spv::DecorationBlock:
1007                         Block = true;
1008                         break;
1009                 case spv::DecorationBufferBlock:
1010                         BufferBlock = true;
1011                         break;
1012                 case spv::DecorationOffset:
1013                         HasOffset = true;
1014                         Offset = static_cast<int32_t>(arg);
1015                         break;
1016                 case spv::DecorationArrayStride:
1017                         HasArrayStride = true;
1018                         ArrayStride = static_cast<int32_t>(arg);
1019                         break;
1020                 case spv::DecorationMatrixStride:
1021                         HasMatrixStride = true;
1022                         MatrixStride = static_cast<int32_t>(arg);
1023                         break;
1024                 default:
1025                         // Intentionally partial, there are many decorations we just don't care about.
1026                         break;
1027                 }
1028         }
1029
1030         void SpirvShader::Decorations::Apply(const sw::SpirvShader::Decorations &src)
1031         {
1032                 // Apply a decoration group to this set of decorations
1033                 if (src.HasBuiltIn)
1034                 {
1035                         HasBuiltIn = true;
1036                         BuiltIn = src.BuiltIn;
1037                 }
1038
1039                 if (src.HasLocation)
1040                 {
1041                         HasLocation = true;
1042                         Location = src.Location;
1043                 }
1044
1045                 if (src.HasComponent)
1046                 {
1047                         HasComponent = true;
1048                         Component = src.Component;
1049                 }
1050
1051                 if (src.HasDescriptorSet)
1052                 {
1053                         HasDescriptorSet = true;
1054                         DescriptorSet = src.DescriptorSet;
1055                 }
1056
1057                 if (src.HasBinding)
1058                 {
1059                         HasBinding = true;
1060                         Binding = src.Binding;
1061                 }
1062
1063                 if (src.HasOffset)
1064                 {
1065                         HasOffset = true;
1066                         Offset = src.Offset;
1067                 }
1068
1069                 if (src.HasArrayStride)
1070                 {
1071                         HasArrayStride = true;
1072                         ArrayStride = src.ArrayStride;
1073                 }
1074
1075                 if (src.HasMatrixStride)
1076                 {
1077                         HasMatrixStride = true;
1078                         MatrixStride = src.MatrixStride;
1079                 }
1080
1081                 Flat |= src.Flat;
1082                 NoPerspective |= src.NoPerspective;
1083                 Centroid |= src.Centroid;
1084                 Block |= src.Block;
1085                 BufferBlock |= src.BufferBlock;
1086         }
1087
1088         void SpirvShader::ApplyDecorationsForId(Decorations *d, TypeOrObjectID id) const
1089         {
1090                 auto it = decorations.find(id);
1091                 if (it != decorations.end())
1092                         d->Apply(it->second);
1093         }
1094
1095         void SpirvShader::ApplyDecorationsForIdMember(Decorations *d, Type::ID id, uint32_t member) const
1096         {
1097                 auto it = memberDecorations.find(id);
1098                 if (it != memberDecorations.end() && member < it->second.size())
1099                 {
1100                         d->Apply(it->second[member]);
1101                 }
1102         }
1103
1104         uint32_t SpirvShader::GetConstantInt(Object::ID id) const
1105         {
1106                 // Slightly hackish access to constants very early in translation.
1107                 // General consumption of constants by other instructions should
1108                 // probably be just lowered to Reactor.
1109
1110                 // TODO: not encountered yet since we only use this for array sizes etc,
1111                 // but is possible to construct integer constant 0 via OpConstantNull.
1112                 auto insn = getObject(id).definition;
1113                 ASSERT(insn.opcode() == spv::OpConstant);
1114                 ASSERT(getType(insn.word(1)).opcode() == spv::OpTypeInt);
1115                 return insn.word(3);
1116         }
1117
1118         // emit-time
1119
1120         void SpirvShader::emitProlog(SpirvRoutine *routine) const
1121         {
1122                 for (auto insn : *this)
1123                 {
1124                         switch (insn.opcode())
1125                         {
1126                         case spv::OpVariable:
1127                         {
1128                                 Type::ID resultPointerTypeId = insn.word(1);
1129                                 auto resultPointerType = getType(resultPointerTypeId);
1130                                 auto pointeeType = getType(resultPointerType.element);
1131
1132                                 if(pointeeType.sizeInComponents > 0)  // TODO: what to do about zero-slot objects?
1133                                 {
1134                                         Object::ID resultId = insn.word(2);
1135                                         routine->createLvalue(resultId, pointeeType.sizeInComponents);
1136                                 }
1137                                 break;
1138                         }
1139                         default:
1140                                 // Nothing else produces interface variables, so can all be safely ignored.
1141                                 break;
1142                         }
1143                 }
1144         }
1145
1146         void SpirvShader::emit(SpirvRoutine *routine, RValue<SIMD::Int> const &activeLaneMask) const
1147         {
1148                 EmitState state;
1149                 state.setActiveLaneMask(activeLaneMask);
1150                 state.routine = routine;
1151
1152                 // Emit everything up to the first label
1153                 // TODO: Separate out dispatch of block from non-block instructions?
1154                 for (auto insn : *this)
1155                 {
1156                         if (insn.opcode() == spv::OpLabel)
1157                         {
1158                                 break;
1159                         }
1160                         EmitInstruction(insn, &state);
1161                 }
1162
1163                 // Emit all the blocks in BFS order, starting with the main block.
1164                 std::queue<Block::ID> pending;
1165                 pending.push(mainBlockId);
1166                 while (pending.size() > 0)
1167                 {
1168                         auto id = pending.front();
1169                         pending.pop();
1170                         if (state.visited.count(id) == 0)
1171                         {
1172                                 EmitBlock(id, &state);
1173                                 for (auto it : getBlock(id).outs)
1174                                 {
1175                                         pending.push(it);
1176                                 }
1177                         }
1178                 }
1179         }
1180
1181         void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
1182         {
1183                 if (state->visited.count(id) > 0)
1184                 {
1185                         return; // Already processed this block.
1186                 }
1187
1188                 state->visited.emplace(id);
1189
1190                 auto &block = getBlock(id);
1191
1192                 switch (block.kind)
1193                 {
1194                         case Block::Simple:
1195                         case Block::StructuredBranchConditional:
1196                         case Block::UnstructuredBranchConditional:
1197                         case Block::StructuredSwitch:
1198                         case Block::UnstructuredSwitch:
1199                                 if (id != mainBlockId)
1200                                 {
1201                                         // Emit all preceding blocks and set the activeLaneMask.
1202                                         Intermediate activeLaneMask(1);
1203                                         activeLaneMask.move(0, SIMD::Int(0));
1204                                         for (auto in : block.ins)
1205                                         {
1206                                                 EmitBlock(in, state);
1207                                                 auto inMask = state->getActiveLaneMaskEdge(in, id);
1208                                                 activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
1209                                         }
1210                                         state->setActiveLaneMask(activeLaneMask.Int(0));
1211                                 }
1212                                 state->currentBlock = id;
1213                                 EmitInstructions(block.begin(), block.end(), state);
1214                                 break;
1215
1216                         case Block::Loop:
1217                                 state->currentBlock = id;
1218                                 EmitLoop(state);
1219                                 break;
1220
1221                         default:
1222                                 UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
1223                 }
1224         }
1225
1226         void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
1227         {
1228                 for (auto insn = begin; insn != end; insn++)
1229                 {
1230                         auto res = EmitInstruction(insn, state);
1231                         switch (res)
1232                         {
1233                         case EmitResult::Continue:
1234                                 continue;
1235                         case EmitResult::Terminator:
1236                                 break;
1237                         default:
1238                                 UNREACHABLE("Unexpected EmitResult %d", int(res));
1239                                 break;
1240                         }
1241                 }
1242         }
1243
1244         void SpirvShader::EmitLoop(EmitState *state) const
1245         {
1246                 auto blockId = state->currentBlock;
1247                 auto block = getBlock(blockId);
1248
1249                 // loopActiveLaneMask is the mask of lanes that are continuing to loop.
1250                 // This is initialized with the incoming active lane masks.
1251                 SIMD::Int loopActiveLaneMask = SIMD::Int(0);
1252                 for (auto in : block.ins)
1253                 {
1254                         if (!existsPath(blockId, in)) // if not a loop back edge
1255                         {
1256                                 EmitBlock(in, state);
1257                                 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1258                         }
1259                 }
1260
1261                 // Generate an alloca for each of the loop's phis.
1262                 // These will be primed with the incoming, non back edge Phi values
1263                 // before the loop, and then updated just before the loop jumps back to
1264                 // the block.
1265                 struct LoopPhi
1266                 {
1267                         Object::ID phiId; // The Phi identifier.
1268                         Object::ID continueValue; // The source merge value from the loop.
1269                         Array<SIMD::Int> storage; // The alloca.
1270                 };
1271
1272                 std::vector<LoopPhi> phis;
1273
1274                 // For each OpPhi between the block start and the merge instruction:
1275                 for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
1276                 {
1277                         if (insn.opcode() == spv::OpPhi)
1278                         {
1279                                 auto objectId = Object::ID(insn.word(2));
1280                                 auto &object = getObject(objectId);
1281                                 auto &type = getType(object.type);
1282
1283                                 LoopPhi phi;
1284                                 phi.phiId = Object::ID(insn.word(2));
1285                                 phi.storage = Array<SIMD::Int>(type.sizeInComponents);
1286
1287                                 // Start with the Phi set to 0.
1288                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1289                                 {
1290                                         phi.storage[i] = SIMD::Int(0);
1291                                 }
1292
1293                                 // For each Phi source:
1294                                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
1295                                 {
1296                                         auto varId = Object::ID(insn.word(w + 0));
1297                                         auto blockId = Block::ID(insn.word(w + 1));
1298                                         if (existsPath(state->currentBlock, blockId))
1299                                         {
1300                                                 // This source is from a loop back-edge.
1301                                                 ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
1302                                                 phi.continueValue = varId;
1303                                         }
1304                                         else
1305                                         {
1306                                                 // This source is from a preceding block.
1307                                                 for (uint32_t i = 0; i < type.sizeInComponents; i++)
1308                                                 {
1309                                                         auto in = GenericValue(this, state->routine, varId);
1310                                                         auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
1311                                                         phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
1312                                                 }
1313                                         }
1314                                 }
1315
1316                                 phis.push_back(phi);
1317                         }
1318                 }
1319
1320                 // Create the loop basic blocks
1321                 auto headerBasicBlock = Nucleus::createBasicBlock();
1322                 auto mergeBasicBlock = Nucleus::createBasicBlock();
1323
1324                 // Start emitting code inside the loop.
1325                 Nucleus::createBr(headerBasicBlock);
1326                 Nucleus::setInsertBlock(headerBasicBlock);
1327
1328                 // Load the Phi values from storage.
1329                 // This will load at the start of each loop.
1330                 for (auto &phi : phis)
1331                 {
1332                         auto &type = getType(getObject(phi.phiId).type);
1333                         auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
1334                         for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1335                         {
1336                                 dst.move(i, phi.storage[i]);
1337                         }
1338                 }
1339
1340                 // Load the active lane mask.
1341                 state->setActiveLaneMask(loopActiveLaneMask);
1342
1343                 // Emit all the non-phi instructions in this loop header block.
1344                 for (auto insn = block.begin(); insn != block.end(); insn++)
1345                 {
1346                         if (insn.opcode() != spv::OpPhi)
1347                         {
1348                                 EmitInstruction(insn, state);
1349                         }
1350                 }
1351
1352                 // Emit all the back-edge blocks and use their active lane masks to
1353                 // rebuild the loopActiveLaneMask.
1354                 loopActiveLaneMask = SIMD::Int(0);
1355                 for (auto in : block.ins)
1356                 {
1357                         if (existsPath(blockId, in))
1358                         {
1359                                 EmitBlock(in, state);
1360                                 loopActiveLaneMask |= state->getActiveLaneMaskEdge(in, blockId);
1361                         }
1362                 }
1363
1364                 // Update loop phi values
1365                 for (auto &phi : phis)
1366                 {
1367                         if (phi.continueValue != 0)
1368                         {
1369                                 auto val = GenericValue(this, state->routine, phi.continueValue);
1370                                 auto &type = getType(getObject(phi.phiId).type);
1371                                 for (unsigned int i = 0u; i < type.sizeInComponents; i++)
1372                                 {
1373                                         phi.storage[i] = val.Int(i);
1374                                 }
1375                         }
1376                 }
1377
1378                 // Loop body now done.
1379                 // If any lanes are still active, jump back to the loop header,
1380                 // otherwise jump to the merge block.
1381                 Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
1382
1383                 // Emit the merge block, and we're done.
1384                 Nucleus::setInsertBlock(mergeBasicBlock);
1385                 EmitBlock(block.mergeBlock, state);
1386         }
1387
1388         SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
1389         {
1390                 switch (insn.opcode())
1391                 {
1392                 case spv::OpTypeVoid:
1393                 case spv::OpTypeInt:
1394                 case spv::OpTypeFloat:
1395                 case spv::OpTypeBool:
1396                 case spv::OpTypeVector:
1397                 case spv::OpTypeArray:
1398                 case spv::OpTypeRuntimeArray:
1399                 case spv::OpTypeMatrix:
1400                 case spv::OpTypeStruct:
1401                 case spv::OpTypePointer:
1402                 case spv::OpTypeFunction:
1403                 case spv::OpExecutionMode:
1404                 case spv::OpMemoryModel:
1405                 case spv::OpFunction:
1406                 case spv::OpFunctionEnd:
1407                 case spv::OpConstant:
1408                 case spv::OpConstantNull:
1409                 case spv::OpConstantTrue:
1410                 case spv::OpConstantFalse:
1411                 case spv::OpConstantComposite:
1412                 case spv::OpUndef:
1413                 case spv::OpExtension:
1414                 case spv::OpCapability:
1415                 case spv::OpEntryPoint:
1416                 case spv::OpExtInstImport:
1417                 case spv::OpDecorate:
1418                 case spv::OpMemberDecorate:
1419                 case spv::OpGroupDecorate:
1420                 case spv::OpGroupMemberDecorate:
1421                 case spv::OpDecorationGroup:
1422                 case spv::OpName:
1423                 case spv::OpMemberName:
1424                 case spv::OpSource:
1425                 case spv::OpSourceContinued:
1426                 case spv::OpSourceExtension:
1427                 case spv::OpLine:
1428                 case spv::OpNoLine:
1429                 case spv::OpModuleProcessed:
1430                 case spv::OpString:
1431                         // Nothing to do at emit time. These are either fully handled at analysis time,
1432                         // or don't require any work at all.
1433                         return EmitResult::Continue;
1434
1435                 case spv::OpLabel:
1436                         return EmitResult::Continue;
1437
1438                 case spv::OpVariable:
1439                         return EmitVariable(insn, state);
1440
1441                 case spv::OpLoad:
1442                 case spv::OpAtomicLoad:
1443                         return EmitLoad(insn, state);
1444
1445                 case spv::OpStore:
1446                 case spv::OpAtomicStore:
1447                         return EmitStore(insn, state);
1448
1449                 case spv::OpAccessChain:
1450                 case spv::OpInBoundsAccessChain:
1451                         return EmitAccessChain(insn, state);
1452
1453                 case spv::OpCompositeConstruct:
1454                         return EmitCompositeConstruct(insn, state);
1455
1456                 case spv::OpCompositeInsert:
1457                         return EmitCompositeInsert(insn, state);
1458
1459                 case spv::OpCompositeExtract:
1460                         return EmitCompositeExtract(insn, state);
1461
1462                 case spv::OpVectorShuffle:
1463                         return EmitVectorShuffle(insn, state);
1464
1465                 case spv::OpVectorExtractDynamic:
1466                         return EmitVectorExtractDynamic(insn, state);
1467
1468                 case spv::OpVectorInsertDynamic:
1469                         return EmitVectorInsertDynamic(insn, state);
1470
1471                 case spv::OpVectorTimesScalar:
1472                 case spv::OpMatrixTimesScalar:
1473                         return EmitVectorTimesScalar(insn, state);
1474
1475                 case spv::OpNot:
1476                 case spv::OpSNegate:
1477                 case spv::OpFNegate:
1478                 case spv::OpLogicalNot:
1479                 case spv::OpConvertFToU:
1480                 case spv::OpConvertFToS:
1481                 case spv::OpConvertSToF:
1482                 case spv::OpConvertUToF:
1483                 case spv::OpBitcast:
1484                 case spv::OpIsInf:
1485                 case spv::OpIsNan:
1486                 case spv::OpDPdx:
1487                 case spv::OpDPdxCoarse:
1488                 case spv::OpDPdy:
1489                 case spv::OpDPdyCoarse:
1490                 case spv::OpFwidth:
1491                 case spv::OpFwidthCoarse:
1492                 case spv::OpDPdxFine:
1493                 case spv::OpDPdyFine:
1494                 case spv::OpFwidthFine:
1495                         return EmitUnaryOp(insn, state);
1496
1497                 case spv::OpIAdd:
1498                 case spv::OpISub:
1499                 case spv::OpIMul:
1500                 case spv::OpSDiv:
1501                 case spv::OpUDiv:
1502                 case spv::OpFAdd:
1503                 case spv::OpFSub:
1504                 case spv::OpFMul:
1505                 case spv::OpFDiv:
1506                 case spv::OpFMod:
1507                 case spv::OpFRem:
1508                 case spv::OpFOrdEqual:
1509                 case spv::OpFUnordEqual:
1510                 case spv::OpFOrdNotEqual:
1511                 case spv::OpFUnordNotEqual:
1512                 case spv::OpFOrdLessThan:
1513                 case spv::OpFUnordLessThan:
1514                 case spv::OpFOrdGreaterThan:
1515                 case spv::OpFUnordGreaterThan:
1516                 case spv::OpFOrdLessThanEqual:
1517                 case spv::OpFUnordLessThanEqual:
1518                 case spv::OpFOrdGreaterThanEqual:
1519                 case spv::OpFUnordGreaterThanEqual:
1520                 case spv::OpSMod:
1521                 case spv::OpSRem:
1522                 case spv::OpUMod:
1523                 case spv::OpIEqual:
1524                 case spv::OpINotEqual:
1525                 case spv::OpUGreaterThan:
1526                 case spv::OpSGreaterThan:
1527                 case spv::OpUGreaterThanEqual:
1528                 case spv::OpSGreaterThanEqual:
1529                 case spv::OpULessThan:
1530                 case spv::OpSLessThan:
1531                 case spv::OpULessThanEqual:
1532                 case spv::OpSLessThanEqual:
1533                 case spv::OpShiftRightLogical:
1534                 case spv::OpShiftRightArithmetic:
1535                 case spv::OpShiftLeftLogical:
1536                 case spv::OpBitwiseOr:
1537                 case spv::OpBitwiseXor:
1538                 case spv::OpBitwiseAnd:
1539                 case spv::OpLogicalOr:
1540                 case spv::OpLogicalAnd:
1541                 case spv::OpLogicalEqual:
1542                 case spv::OpLogicalNotEqual:
1543                 case spv::OpUMulExtended:
1544                 case spv::OpSMulExtended:
1545                         return EmitBinaryOp(insn, state);
1546
1547                 case spv::OpDot:
1548                         return EmitDot(insn, state);
1549
1550                 case spv::OpSelect:
1551                         return EmitSelect(insn, state);
1552
1553                 case spv::OpExtInst:
1554                         return EmitExtendedInstruction(insn, state);
1555
1556                 case spv::OpAny:
1557                         return EmitAny(insn, state);
1558
1559                 case spv::OpAll:
1560                         return EmitAll(insn, state);
1561
1562                 case spv::OpBranch:
1563                         return EmitBranch(insn, state);
1564
1565                 case spv::OpPhi:
1566                         return EmitPhi(insn, state);
1567
1568                 case spv::OpSelectionMerge:
1569                 case spv::OpLoopMerge:
1570                         return EmitResult::Continue;
1571
1572                 case spv::OpBranchConditional:
1573                         return EmitBranchConditional(insn, state);
1574
1575                 case spv::OpSwitch:
1576                         return EmitSwitch(insn, state);
1577
1578                 case spv::OpUnreachable:
1579                         return EmitUnreachable(insn, state);
1580
1581                 case spv::OpReturn:
1582                         return EmitReturn(insn, state);
1583
1584                 default:
1585                         UNIMPLEMENTED("opcode: %s", OpcodeName(insn.opcode()).c_str());
1586                         break;
1587                 }
1588
1589                 return EmitResult::Continue;
1590         }
1591
1592         SpirvShader::EmitResult SpirvShader::EmitVariable(InsnIterator insn, EmitState *state) const
1593         {
1594                 auto routine = state->routine;
1595                 Object::ID resultId = insn.word(2);
1596                 auto &object = getObject(resultId);
1597                 auto &objectTy = getType(object.type);
1598                 switch (objectTy.storageClass)
1599                 {
1600                 case spv::StorageClassInput:
1601                 {
1602                         if (object.kind == Object::Kind::InterfaceVariable)
1603                         {
1604                                 auto &dst = routine->getValue(resultId);
1605                                 int offset = 0;
1606                                 VisitInterface(resultId,
1607                                                                 [&](Decorations const &d, AttribType type) {
1608                                                                         auto scalarSlot = d.Location << 2 | d.Component;
1609                                                                         dst[offset++] = routine->inputs[scalarSlot];
1610                                                                 });
1611                         }
1612                         break;
1613                 }
1614                 case spv::StorageClassUniform:
1615                 case spv::StorageClassStorageBuffer:
1616                 {
1617                         Decorations d{};
1618                         ApplyDecorationsForId(&d, resultId);
1619                         ASSERT(d.DescriptorSet >= 0);
1620                         ASSERT(d.Binding >= 0);
1621
1622                         size_t bindingOffset = routine->pipelineLayout->getBindingOffset(d.DescriptorSet, d.Binding);
1623
1624                         Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet]; // DescriptorSet*
1625                         Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset); // VkDescriptorBufferInfo*
1626                         Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(binding + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
1627                         Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
1628                         Int offset = *Pointer<Int>(binding + OFFSET(VkDescriptorBufferInfo, offset));
1629                         Pointer<Byte> address = data + offset;
1630                         routine->physicalPointers[resultId] = address;
1631                         break;
1632                 }
1633                 case spv::StorageClassPushConstant:
1634                 {
1635                         routine->physicalPointers[resultId] = routine->pushConstants;
1636                         break;
1637                 }
1638                 default:
1639                         break;
1640                 }
1641
1642                 return EmitResult::Continue;
1643         }
1644
1645         SpirvShader::EmitResult SpirvShader::EmitLoad(InsnIterator insn, EmitState *state) const
1646         {
1647                 auto routine = state->routine;
1648                 bool atomic = (insn.opcode() == spv::OpAtomicLoad);
1649                 Object::ID resultId = insn.word(2);
1650                 Object::ID pointerId = insn.word(3);
1651                 auto &result = getObject(resultId);
1652                 auto &resultTy = getType(result.type);
1653                 auto &pointer = getObject(pointerId);
1654                 auto &pointerBase = getObject(pointer.pointerBase);
1655                 auto &pointerBaseTy = getType(pointerBase.type);
1656                 std::memory_order memoryOrder = std::memory_order_relaxed;
1657
1658                 if(atomic)
1659                 {
1660                         Object::ID semanticsId = insn.word(5);
1661                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1662                         memoryOrder = MemoryOrder(memorySemantics);
1663                 }
1664
1665                 ASSERT(getType(pointer.type).element == result.type);
1666                 ASSERT(Type::ID(insn.word(1)) == result.type);
1667                 ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1668
1669                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1670                 {
1671                         UNIMPLEMENTED("StorageClassImage load not yet implemented");
1672                 }
1673
1674                 Pointer<Float> ptrBase;
1675                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1676                 {
1677                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1678                 }
1679                 else
1680                 {
1681                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1682                 }
1683
1684                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1685                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1686
1687                 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
1688
1689                 If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1690                 {
1691                         // Divergent offsets or masked lanes.
1692                         auto offsets = pointer.kind == Object::Kind::Value ?
1693                                         As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1694                                         RValue<SIMD::Int>(SIMD::Int(0));
1695                         for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1696                         {
1697                                 // i wish i had a Float,Float,Float,Float constructor here..
1698                                 for (int j = 0; j < SIMD::Width; j++)
1699                                 {
1700                                         If(Extract(state->activeLaneMask(), j) != 0)
1701                                         {
1702                                                 Int offset = Int(i) + Extract(offsets, j);
1703                                                 if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1704                                                 load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
1705                                         }
1706                                 }
1707                         }
1708                 }
1709                 Else
1710                 {
1711                         // No divergent offsets or masked lanes.
1712                         if (interleavedByLane)
1713                         {
1714                                 // Lane-interleaved data.
1715                                 Pointer<SIMD::Float> src = ptrBase;
1716                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1717                                 {
1718                                         load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1719                                 }
1720                         }
1721                         else
1722                         {
1723                                 // Non-interleaved data.
1724                                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1725                                 {
1726                                         load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
1727                                 }
1728                         }
1729                 }
1730
1731                 auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
1732                 for (auto i = 0u; i < resultTy.sizeInComponents; i++)
1733                 {
1734                         dst.move(i, load[i]);
1735                 }
1736
1737                 return EmitResult::Continue;
1738         }
1739
1740         SpirvShader::EmitResult SpirvShader::EmitStore(InsnIterator insn, EmitState *state) const
1741         {
1742                 auto routine = state->routine;
1743                 bool atomic = (insn.opcode() == spv::OpAtomicStore);
1744                 Object::ID pointerId = insn.word(1);
1745                 Object::ID objectId = insn.word(atomic ? 4 : 2);
1746                 auto &object = getObject(objectId);
1747                 auto &pointer = getObject(pointerId);
1748                 auto &pointerTy = getType(pointer.type);
1749                 auto &elementTy = getType(pointerTy.element);
1750                 auto &pointerBase = getObject(pointer.pointerBase);
1751                 auto &pointerBaseTy = getType(pointerBase.type);
1752                 std::memory_order memoryOrder = std::memory_order_relaxed;
1753
1754                 if(atomic)
1755                 {
1756                         Object::ID semanticsId = insn.word(3);
1757                         auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
1758                         memoryOrder = MemoryOrder(memorySemantics);
1759                 }
1760
1761                 ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
1762
1763                 if (pointerBaseTy.storageClass == spv::StorageClassImage)
1764                 {
1765                         UNIMPLEMENTED("StorageClassImage store not yet implemented");
1766                 }
1767
1768                 Pointer<Float> ptrBase;
1769                 if (pointerBase.kind == Object::Kind::PhysicalPointer)
1770                 {
1771                         ptrBase = routine->getPhysicalPointer(pointer.pointerBase);
1772                 }
1773                 else
1774                 {
1775                         ptrBase = &routine->getValue(pointer.pointerBase)[0];
1776                 }
1777
1778                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
1779                 auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
1780
1781                 if (object.kind == Object::Kind::Constant)
1782                 {
1783                         // Constant source data.
1784                         auto src = reinterpret_cast<float *>(object.constantValue.get());
1785                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1786                         {
1787                                 // Divergent offsets or masked lanes.
1788                                 auto offsets = pointer.kind == Object::Kind::Value ?
1789                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1790                                                 RValue<SIMD::Int>(SIMD::Int(0));
1791                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1792                                 {
1793                                         for (int j = 0; j < SIMD::Width; j++)
1794                                         {
1795                                                 If(Extract(state->activeLaneMask(), j) != 0)
1796                                                 {
1797                                                         Int offset = Int(i) + Extract(offsets, j);
1798                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1799                                                         Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1800                                                 }
1801                                         }
1802                                 }
1803                         }
1804                         Else
1805                         {
1806                                 // Constant source data.
1807                                 // No divergent offsets or masked lanes.
1808                                 Pointer<SIMD::Float> dst = ptrBase;
1809                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1810                                 {
1811                                         Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1812                                 }
1813                         }
1814                 }
1815                 else
1816                 {
1817                         // Intermediate source data.
1818                         auto &src = routine->getIntermediate(objectId);
1819                         If(pointer.kind == Object::Kind::Value || anyInactiveLanes)
1820                         {
1821                                 // Divergent offsets or masked lanes.
1822                                 auto offsets = pointer.kind == Object::Kind::Value ?
1823                                                 As<SIMD::Int>(routine->getIntermediate(pointerId).Int(0)) :
1824                                                 RValue<SIMD::Int>(SIMD::Int(0));
1825                                 for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1826                                 {
1827                                         for (int j = 0; j < SIMD::Width; j++)
1828                                         {
1829                                                 If(Extract(state->activeLaneMask(), j) != 0)
1830                                                 {
1831                                                         Int offset = Int(i) + Extract(offsets, j);
1832                                                         if (interleavedByLane) { offset = offset * SIMD::Width + j; }
1833                                                         Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
1834                                                 }
1835                                         }
1836                                 }
1837                         }
1838                         Else
1839                         {
1840                                 // No divergent offsets or masked lanes.
1841                                 if (interleavedByLane)
1842                                 {
1843                                         // Lane-interleaved data.
1844                                         Pointer<SIMD::Float> dst = ptrBase;
1845                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1846                                         {
1847                                                 Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1848                                         }
1849                                 }
1850                                 else
1851                                 {
1852                                         // Intermediate source data. Non-interleaved data.
1853                                         Pointer<SIMD::Float> dst = ptrBase;
1854                                         for (auto i = 0u; i < elementTy.sizeInComponents; i++)
1855                                         {
1856                                                 Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
1857                                         }
1858                                 }
1859                         }
1860                 }
1861
1862                 return EmitResult::Continue;
1863         }
1864
1865         SpirvShader::EmitResult SpirvShader::EmitAccessChain(InsnIterator insn, EmitState *state) const
1866         {
1867                 auto routine = state->routine;
1868                 Type::ID typeId = insn.word(1);
1869                 Object::ID resultId = insn.word(2);
1870                 Object::ID baseId = insn.word(3);
1871                 uint32_t numIndexes = insn.wordCount() - 4;
1872                 const uint32_t *indexes = insn.wordPointer(4);
1873                 auto &type = getType(typeId);
1874                 ASSERT(type.sizeInComponents == 1);
1875                 ASSERT(getObject(baseId).pointerBase == getObject(resultId).pointerBase);
1876
1877                 auto &dst = routine->createIntermediate(resultId, type.sizeInComponents);
1878
1879                 if(type.storageClass == spv::StorageClassPushConstant ||
1880                    type.storageClass == spv::StorageClassUniform ||
1881                    type.storageClass == spv::StorageClassStorageBuffer)
1882                 {
1883                         dst.move(0, WalkExplicitLayoutAccessChain(baseId, numIndexes, indexes, routine));
1884                 }
1885                 else
1886                 {
1887                         dst.move(0, WalkAccessChain(baseId, numIndexes, indexes, routine));
1888                 }
1889
1890                 return EmitResult::Continue;
1891         }
1892
1893         SpirvShader::EmitResult SpirvShader::EmitCompositeConstruct(InsnIterator insn, EmitState *state) const
1894         {
1895                 auto routine = state->routine;
1896                 auto &type = getType(insn.word(1));
1897                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1898                 auto offset = 0u;
1899
1900                 for (auto i = 0u; i < insn.wordCount() - 3; i++)
1901                 {
1902                         Object::ID srcObjectId = insn.word(3u + i);
1903                         auto & srcObject = getObject(srcObjectId);
1904                         auto & srcObjectTy = getType(srcObject.type);
1905                         GenericValue srcObjectAccess(this, routine, srcObjectId);
1906
1907                         for (auto j = 0u; j < srcObjectTy.sizeInComponents; j++)
1908                         {
1909                                 dst.move(offset++, srcObjectAccess.Float(j));
1910                         }
1911                 }
1912
1913                 return EmitResult::Continue;
1914         }
1915
1916         SpirvShader::EmitResult SpirvShader::EmitCompositeInsert(InsnIterator insn, EmitState *state) const
1917         {
1918                 auto routine = state->routine;
1919                 Type::ID resultTypeId = insn.word(1);
1920                 auto &type = getType(resultTypeId);
1921                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1922                 auto &newPartObject = getObject(insn.word(3));
1923                 auto &newPartObjectTy = getType(newPartObject.type);
1924                 auto firstNewComponent = WalkLiteralAccessChain(resultTypeId, insn.wordCount() - 5, insn.wordPointer(5));
1925
1926                 GenericValue srcObjectAccess(this, routine, insn.word(4));
1927                 GenericValue newPartObjectAccess(this, routine, insn.word(3));
1928
1929                 // old components before
1930                 for (auto i = 0u; i < firstNewComponent; i++)
1931                 {
1932                         dst.move(i, srcObjectAccess.Float(i));
1933                 }
1934                 // new part
1935                 for (auto i = 0u; i < newPartObjectTy.sizeInComponents; i++)
1936                 {
1937                         dst.move(firstNewComponent + i, newPartObjectAccess.Float(i));
1938                 }
1939                 // old components after
1940                 for (auto i = firstNewComponent + newPartObjectTy.sizeInComponents; i < type.sizeInComponents; i++)
1941                 {
1942                         dst.move(i, srcObjectAccess.Float(i));
1943                 }
1944
1945                 return EmitResult::Continue;
1946         }
1947
1948         SpirvShader::EmitResult SpirvShader::EmitCompositeExtract(InsnIterator insn, EmitState *state) const
1949         {
1950                 auto routine = state->routine;
1951                 auto &type = getType(insn.word(1));
1952                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1953                 auto &compositeObject = getObject(insn.word(3));
1954                 Type::ID compositeTypeId = compositeObject.definition.word(1);
1955                 auto firstComponent = WalkLiteralAccessChain(compositeTypeId, insn.wordCount() - 4, insn.wordPointer(4));
1956
1957                 GenericValue compositeObjectAccess(this, routine, insn.word(3));
1958                 for (auto i = 0u; i < type.sizeInComponents; i++)
1959                 {
1960                         dst.move(i, compositeObjectAccess.Float(firstComponent + i));
1961                 }
1962
1963                 return EmitResult::Continue;
1964         }
1965
1966         SpirvShader::EmitResult SpirvShader::EmitVectorShuffle(InsnIterator insn, EmitState *state) const
1967         {
1968                 auto routine = state->routine;
1969                 auto &type = getType(insn.word(1));
1970                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
1971
1972                 // Note: number of components in result type, first half type, and second
1973                 // half type are all independent.
1974                 auto &firstHalfType = getType(getObject(insn.word(3)).type);
1975
1976                 GenericValue firstHalfAccess(this, routine, insn.word(3));
1977                 GenericValue secondHalfAccess(this, routine, insn.word(4));
1978
1979                 for (auto i = 0u; i < type.sizeInComponents; i++)
1980                 {
1981                         auto selector = insn.word(5 + i);
1982                         if (selector == static_cast<uint32_t>(-1))
1983                         {
1984                                 // Undefined value. Until we decide to do real undef values, zero is as good
1985                                 // a value as any
1986                                 dst.move(i, RValue<SIMD::Float>(0.0f));
1987                         }
1988                         else if (selector < firstHalfType.sizeInComponents)
1989                         {
1990                                 dst.move(i, firstHalfAccess.Float(selector));
1991                         }
1992                         else
1993                         {
1994                                 dst.move(i, secondHalfAccess.Float(selector - firstHalfType.sizeInComponents));
1995                         }
1996                 }
1997
1998                 return EmitResult::Continue;
1999         }
2000
2001         SpirvShader::EmitResult SpirvShader::EmitVectorExtractDynamic(InsnIterator insn, EmitState *state) const
2002         {
2003                 auto routine = state->routine;
2004                 auto &type = getType(insn.word(1));
2005                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2006                 auto &srcType = getType(getObject(insn.word(3)).type);
2007
2008                 GenericValue src(this, routine, insn.word(3));
2009                 GenericValue index(this, routine, insn.word(4));
2010
2011                 SIMD::UInt v = SIMD::UInt(0);
2012
2013                 for (auto i = 0u; i < srcType.sizeInComponents; i++)
2014                 {
2015                         v |= CmpEQ(index.UInt(0), SIMD::UInt(i)) & src.UInt(i);
2016                 }
2017
2018                 dst.move(0, v);
2019                 return EmitResult::Continue;
2020         }
2021
2022         SpirvShader::EmitResult SpirvShader::EmitVectorInsertDynamic(InsnIterator insn, EmitState *state) const
2023         {
2024                 auto routine = state->routine;
2025                 auto &type = getType(insn.word(1));
2026                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2027
2028                 GenericValue src(this, routine, insn.word(3));
2029                 GenericValue component(this, routine, insn.word(4));
2030                 GenericValue index(this, routine, insn.word(5));
2031
2032                 for (auto i = 0u; i < type.sizeInComponents; i++)
2033                 {
2034                         SIMD::UInt mask = CmpEQ(SIMD::UInt(i), index.UInt(0));
2035                         dst.move(i, (src.UInt(i) & ~mask) | (component.UInt(0) & mask));
2036                 }
2037                 return EmitResult::Continue;
2038         }
2039
2040         SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
2041         {
2042                 auto routine = state->routine;
2043                 auto &type = getType(insn.word(1));
2044                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2045                 auto lhs = GenericValue(this, routine, insn.word(3));
2046                 auto rhs = GenericValue(this, routine, insn.word(4));
2047
2048                 for (auto i = 0u; i < type.sizeInComponents; i++)
2049                 {
2050                         dst.move(i, lhs.Float(i) * rhs.Float(0));
2051                 }
2052
2053                 return EmitResult::Continue;
2054         }
2055
2056         SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
2057         {
2058                 auto routine = state->routine;
2059                 auto &type = getType(insn.word(1));
2060                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2061                 auto src = GenericValue(this, routine, insn.word(3));
2062
2063                 for (auto i = 0u; i < type.sizeInComponents; i++)
2064                 {
2065                         switch (insn.opcode())
2066                         {
2067                         case spv::OpNot:
2068                         case spv::OpLogicalNot:         // logical not == bitwise not due to all-bits boolean representation
2069                                 dst.move(i, ~src.UInt(i));
2070                                 break;
2071                         case spv::OpSNegate:
2072                                 dst.move(i, -src.Int(i));
2073                                 break;
2074                         case spv::OpFNegate:
2075                                 dst.move(i, -src.Float(i));
2076                                 break;
2077                         case spv::OpConvertFToU:
2078                                 dst.move(i, SIMD::UInt(src.Float(i)));
2079                                 break;
2080                         case spv::OpConvertFToS:
2081                                 dst.move(i, SIMD::Int(src.Float(i)));
2082                                 break;
2083                         case spv::OpConvertSToF:
2084                                 dst.move(i, SIMD::Float(src.Int(i)));
2085                                 break;
2086                         case spv::OpConvertUToF:
2087                                 dst.move(i, SIMD::Float(src.UInt(i)));
2088                                 break;
2089                         case spv::OpBitcast:
2090                                 dst.move(i, src.Float(i));
2091                                 break;
2092                         case spv::OpIsInf:
2093                                 dst.move(i, IsInf(src.Float(i)));
2094                                 break;
2095                         case spv::OpIsNan:
2096                                 dst.move(i, IsNan(src.Float(i)));
2097                                 break;
2098                         case spv::OpDPdx:
2099                         case spv::OpDPdxCoarse:
2100                                 // Derivative instructions: FS invocations are laid out like so:
2101                                 //    0 1
2102                                 //    2 3
2103                                 static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
2104                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
2105                                 break;
2106                         case spv::OpDPdy:
2107                         case spv::OpDPdyCoarse:
2108                                 dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
2109                                 break;
2110                         case spv::OpFwidth:
2111                         case spv::OpFwidthCoarse:
2112                                 dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0))
2113                                                         + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
2114                                 break;
2115                         case spv::OpDPdxFine:
2116                         {
2117                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2118                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2119                                 SIMD::Float v = SIMD::Float(firstRow);
2120                                 v = Insert(v, secondRow, 2);
2121                                 v = Insert(v, secondRow, 3);
2122                                 dst.move(i, v);
2123                                 break;
2124                         }
2125                         case spv::OpDPdyFine:
2126                         {
2127                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2128                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2129                                 SIMD::Float v = SIMD::Float(firstColumn);
2130                                 v = Insert(v, secondColumn, 1);
2131                                 v = Insert(v, secondColumn, 3);
2132                                 dst.move(i, v);
2133                                 break;
2134                         }
2135                         case spv::OpFwidthFine:
2136                         {
2137                                 auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
2138                                 auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
2139                                 SIMD::Float dpdx = SIMD::Float(firstRow);
2140                                 dpdx = Insert(dpdx, secondRow, 2);
2141                                 dpdx = Insert(dpdx, secondRow, 3);
2142                                 auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
2143                                 auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
2144                                 SIMD::Float dpdy = SIMD::Float(firstColumn);
2145                                 dpdy = Insert(dpdy, secondColumn, 1);
2146                                 dpdy = Insert(dpdy, secondColumn, 3);
2147                                 dst.move(i, Abs(dpdx) + Abs(dpdy));
2148                                 break;
2149                         }
2150                         default:
2151                                 UNIMPLEMENTED("Unhandled unary operator %s", OpcodeName(insn.opcode()).c_str());
2152                         }
2153                 }
2154
2155                 return EmitResult::Continue;
2156         }
2157
2158         SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
2159         {
2160                 auto routine = state->routine;
2161                 auto &type = getType(insn.word(1));
2162                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2163                 auto &lhsType = getType(getObject(insn.word(3)).type);
2164                 auto lhs = GenericValue(this, routine, insn.word(3));
2165                 auto rhs = GenericValue(this, routine, insn.word(4));
2166
2167                 for (auto i = 0u; i < lhsType.sizeInComponents; i++)
2168                 {
2169                         switch (insn.opcode())
2170                         {
2171                         case spv::OpIAdd:
2172                                 dst.move(i, lhs.Int(i) + rhs.Int(i));
2173                                 break;
2174                         case spv::OpISub:
2175                                 dst.move(i, lhs.Int(i) - rhs.Int(i));
2176                                 break;
2177                         case spv::OpIMul:
2178                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2179                                 break;
2180                         case spv::OpSDiv:
2181                         {
2182                                 SIMD::Int a = lhs.Int(i);
2183                                 SIMD::Int b = rhs.Int(i);
2184                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2185                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2186                                 dst.move(i, a / b);
2187                                 break;
2188                         }
2189                         case spv::OpUDiv:
2190                         {
2191                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2192                                 dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
2193                                 break;
2194                         }
2195                         case spv::OpSRem:
2196                         {
2197                                 SIMD::Int a = lhs.Int(i);
2198                                 SIMD::Int b = rhs.Int(i);
2199                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2200                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2201                                 dst.move(i, a % b);
2202                                 break;
2203                         }
2204                         case spv::OpSMod:
2205                         {
2206                                 SIMD::Int a = lhs.Int(i);
2207                                 SIMD::Int b = rhs.Int(i);
2208                                 b = b | CmpEQ(b, SIMD::Int(0)); // prevent divide-by-zero
2209                                 a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1))); // prevent integer overflow
2210                                 auto mod = a % b;
2211                                 // If a and b have opposite signs, the remainder operation takes
2212                                 // the sign from a but OpSMod is supposed to take the sign of b.
2213                                 // Adding b will ensure that the result has the correct sign and
2214                                 // that it is still congruent to a modulo b.
2215                                 //
2216                                 // See also http://mathforum.org/library/drmath/view/52343.html
2217                                 auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
2218                                 auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
2219                                 dst.move(i, As<SIMD::Float>(fixedMod));
2220                                 break;
2221                         }
2222                         case spv::OpUMod:
2223                         {
2224                                 auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
2225                                 dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
2226                                 break;
2227                         }
2228                         case spv::OpIEqual:
2229                         case spv::OpLogicalEqual:
2230                                 dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
2231                                 break;
2232                         case spv::OpINotEqual:
2233                         case spv::OpLogicalNotEqual:
2234                                 dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
2235                                 break;
2236                         case spv::OpUGreaterThan:
2237                                 dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
2238                                 break;
2239                         case spv::OpSGreaterThan:
2240                                 dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
2241                                 break;
2242                         case spv::OpUGreaterThanEqual:
2243                                 dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
2244                                 break;
2245                         case spv::OpSGreaterThanEqual:
2246                                 dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
2247                                 break;
2248                         case spv::OpULessThan:
2249                                 dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
2250                                 break;
2251                         case spv::OpSLessThan:
2252                                 dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
2253                                 break;
2254                         case spv::OpULessThanEqual:
2255                                 dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
2256                                 break;
2257                         case spv::OpSLessThanEqual:
2258                                 dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
2259                                 break;
2260                         case spv::OpFAdd:
2261                                 dst.move(i, lhs.Float(i) + rhs.Float(i));
2262                                 break;
2263                         case spv::OpFSub:
2264                                 dst.move(i, lhs.Float(i) - rhs.Float(i));
2265                                 break;
2266                         case spv::OpFMul:
2267                                 dst.move(i, lhs.Float(i) * rhs.Float(i));
2268                                 break;
2269                         case spv::OpFDiv:
2270                                 dst.move(i, lhs.Float(i) / rhs.Float(i));
2271                                 break;
2272                         case spv::OpFMod:
2273                                 // TODO(b/126873455): inaccurate for values greater than 2^24
2274                                 dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
2275                                 break;
2276                         case spv::OpFRem:
2277                                 dst.move(i, lhs.Float(i) % rhs.Float(i));
2278                                 break;
2279                         case spv::OpFOrdEqual:
2280                                 dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
2281                                 break;
2282                         case spv::OpFUnordEqual:
2283                                 dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
2284                                 break;
2285                         case spv::OpFOrdNotEqual:
2286                                 dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
2287                                 break;
2288                         case spv::OpFUnordNotEqual:
2289                                 dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
2290                                 break;
2291                         case spv::OpFOrdLessThan:
2292                                 dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
2293                                 break;
2294                         case spv::OpFUnordLessThan:
2295                                 dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
2296                                 break;
2297                         case spv::OpFOrdGreaterThan:
2298                                 dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
2299                                 break;
2300                         case spv::OpFUnordGreaterThan:
2301                                 dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
2302                                 break;
2303                         case spv::OpFOrdLessThanEqual:
2304                                 dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
2305                                 break;
2306                         case spv::OpFUnordLessThanEqual:
2307                                 dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
2308                                 break;
2309                         case spv::OpFOrdGreaterThanEqual:
2310                                 dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
2311                                 break;
2312                         case spv::OpFUnordGreaterThanEqual:
2313                                 dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
2314                                 break;
2315                         case spv::OpShiftRightLogical:
2316                                 dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
2317                                 break;
2318                         case spv::OpShiftRightArithmetic:
2319                                 dst.move(i, lhs.Int(i) >> rhs.Int(i));
2320                                 break;
2321                         case spv::OpShiftLeftLogical:
2322                                 dst.move(i, lhs.UInt(i) << rhs.UInt(i));
2323                                 break;
2324                         case spv::OpBitwiseOr:
2325                         case spv::OpLogicalOr:
2326                                 dst.move(i, lhs.UInt(i) | rhs.UInt(i));
2327                                 break;
2328                         case spv::OpBitwiseXor:
2329                                 dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
2330                                 break;
2331                         case spv::OpBitwiseAnd:
2332                         case spv::OpLogicalAnd:
2333                                 dst.move(i, lhs.UInt(i) & rhs.UInt(i));
2334                                 break;
2335                         case spv::OpSMulExtended:
2336                                 // Extended ops: result is a structure containing two members of the same type as lhs & rhs.
2337                                 // In our flat view then, component i is the i'th component of the first member;
2338                                 // component i + N is the i'th component of the second member.
2339                                 dst.move(i, lhs.Int(i) * rhs.Int(i));
2340                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.Int(i), rhs.Int(i)));
2341                                 break;
2342                         case spv::OpUMulExtended:
2343                                 dst.move(i, lhs.UInt(i) * rhs.UInt(i));
2344                                 dst.move(i + lhsType.sizeInComponents, MulHigh(lhs.UInt(i), rhs.UInt(i)));
2345                                 break;
2346                         default:
2347                                 UNIMPLEMENTED("Unhandled binary operator %s", OpcodeName(insn.opcode()).c_str());
2348                         }
2349                 }
2350
2351                 return EmitResult::Continue;
2352         }
2353
2354         SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
2355         {
2356                 auto routine = state->routine;
2357                 auto &type = getType(insn.word(1));
2358                 ASSERT(type.sizeInComponents == 1);
2359                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2360                 auto &lhsType = getType(getObject(insn.word(3)).type);
2361                 auto lhs = GenericValue(this, routine, insn.word(3));
2362                 auto rhs = GenericValue(this, routine, insn.word(4));
2363
2364                 dst.move(0, Dot(lhsType.sizeInComponents, lhs, rhs));
2365                 return EmitResult::Continue;
2366         }
2367
2368         SpirvShader::EmitResult SpirvShader::EmitSelect(InsnIterator insn, EmitState *state) const
2369         {
2370                 auto routine = state->routine;
2371                 auto &type = getType(insn.word(1));
2372                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2373                 auto cond = GenericValue(this, routine, insn.word(3));
2374                 auto lhs = GenericValue(this, routine, insn.word(4));
2375                 auto rhs = GenericValue(this, routine, insn.word(5));
2376
2377                 for (auto i = 0u; i < type.sizeInComponents; i++)
2378                 {
2379                         dst.move(i, (cond.Int(i) & lhs.Int(i)) | (~cond.Int(i) & rhs.Int(i)));   // FIXME: IfThenElse()
2380                 }
2381
2382                 return EmitResult::Continue;
2383         }
2384
2385         SpirvShader::EmitResult SpirvShader::EmitExtendedInstruction(InsnIterator insn, EmitState *state) const
2386         {
2387                 auto routine = state->routine;
2388                 auto &type = getType(insn.word(1));
2389                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2390                 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
2391
2392                 switch (extInstIndex)
2393                 {
2394                 case GLSLstd450FAbs:
2395                 {
2396                         auto src = GenericValue(this, routine, insn.word(5));
2397                         for (auto i = 0u; i < type.sizeInComponents; i++)
2398                         {
2399                                 dst.move(i, Abs(src.Float(i)));
2400                         }
2401                         break;
2402                 }
2403                 case GLSLstd450SAbs:
2404                 {
2405                         auto src = GenericValue(this, routine, insn.word(5));
2406                         for (auto i = 0u; i < type.sizeInComponents; i++)
2407                         {
2408                                 dst.move(i, Abs(src.Int(i)));
2409                         }
2410                         break;
2411                 }
2412                 case GLSLstd450Cross:
2413                 {
2414                         auto lhs = GenericValue(this, routine, insn.word(5));
2415                         auto rhs = GenericValue(this, routine, insn.word(6));
2416                         dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
2417                         dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
2418                         dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
2419                         break;
2420                 }
2421                 case GLSLstd450Floor:
2422                 {
2423                         auto src = GenericValue(this, routine, insn.word(5));
2424                         for (auto i = 0u; i < type.sizeInComponents; i++)
2425                         {
2426                                 dst.move(i, Floor(src.Float(i)));
2427                         }
2428                         break;
2429                 }
2430                 case GLSLstd450Trunc:
2431                 {
2432                         auto src = GenericValue(this, routine, insn.word(5));
2433                         for (auto i = 0u; i < type.sizeInComponents; i++)
2434                         {
2435                                 dst.move(i, Trunc(src.Float(i)));
2436                         }
2437                         break;
2438                 }
2439                 case GLSLstd450Ceil:
2440                 {
2441                         auto src = GenericValue(this, routine, insn.word(5));
2442                         for (auto i = 0u; i < type.sizeInComponents; i++)
2443                         {
2444                                 dst.move(i, Ceil(src.Float(i)));
2445                         }
2446                         break;
2447                 }
2448                 case GLSLstd450Fract:
2449                 {
2450                         auto src = GenericValue(this, routine, insn.word(5));
2451                         for (auto i = 0u; i < type.sizeInComponents; i++)
2452                         {
2453                                 dst.move(i, Frac(src.Float(i)));
2454                         }
2455                         break;
2456                 }
2457                 case GLSLstd450Round:
2458                 {
2459                         auto src = GenericValue(this, routine, insn.word(5));
2460                         for (auto i = 0u; i < type.sizeInComponents; i++)
2461                         {
2462                                 dst.move(i, Round(src.Float(i)));
2463                         }
2464                         break;
2465                 }
2466                 case GLSLstd450RoundEven:
2467                 {
2468                         auto src = GenericValue(this, routine, insn.word(5));
2469                         for (auto i = 0u; i < type.sizeInComponents; i++)
2470                         {
2471                                 auto x = Round(src.Float(i));
2472                                 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
2473                                 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
2474                                                 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
2475                         }
2476                         break;
2477                 }
2478                 case GLSLstd450FMin:
2479                 {
2480                         auto lhs = GenericValue(this, routine, insn.word(5));
2481                         auto rhs = GenericValue(this, routine, insn.word(6));
2482                         for (auto i = 0u; i < type.sizeInComponents; i++)
2483                         {
2484                                 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
2485                         }
2486                         break;
2487                 }
2488                 case GLSLstd450FMax:
2489                 {
2490                         auto lhs = GenericValue(this, routine, insn.word(5));
2491                         auto rhs = GenericValue(this, routine, insn.word(6));
2492                         for (auto i = 0u; i < type.sizeInComponents; i++)
2493                         {
2494                                 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
2495                         }
2496                         break;
2497                 }
2498                 case GLSLstd450SMin:
2499                 {
2500                         auto lhs = GenericValue(this, routine, insn.word(5));
2501                         auto rhs = GenericValue(this, routine, insn.word(6));
2502                         for (auto i = 0u; i < type.sizeInComponents; i++)
2503                         {
2504                                 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
2505                         }
2506                         break;
2507                 }
2508                 case GLSLstd450SMax:
2509                 {
2510                         auto lhs = GenericValue(this, routine, insn.word(5));
2511                         auto rhs = GenericValue(this, routine, insn.word(6));
2512                         for (auto i = 0u; i < type.sizeInComponents; i++)
2513                         {
2514                                 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
2515                         }
2516                         break;
2517                 }
2518                 case GLSLstd450UMin:
2519                 {
2520                         auto lhs = GenericValue(this, routine, insn.word(5));
2521                         auto rhs = GenericValue(this, routine, insn.word(6));
2522                         for (auto i = 0u; i < type.sizeInComponents; i++)
2523                         {
2524                                 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
2525                         }
2526                         break;
2527                 }
2528                 case GLSLstd450UMax:
2529                 {
2530                         auto lhs = GenericValue(this, routine, insn.word(5));
2531                         auto rhs = GenericValue(this, routine, insn.word(6));
2532                         for (auto i = 0u; i < type.sizeInComponents; i++)
2533                         {
2534                                 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
2535                         }
2536                         break;
2537                 }
2538                 case GLSLstd450Step:
2539                 {
2540                         auto edge = GenericValue(this, routine, insn.word(5));
2541                         auto x = GenericValue(this, routine, insn.word(6));
2542                         for (auto i = 0u; i < type.sizeInComponents; i++)
2543                         {
2544                                 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
2545                         }
2546                         break;
2547                 }
2548                 case GLSLstd450SmoothStep:
2549                 {
2550                         auto edge0 = GenericValue(this, routine, insn.word(5));
2551                         auto edge1 = GenericValue(this, routine, insn.word(6));
2552                         auto x = GenericValue(this, routine, insn.word(7));
2553                         for (auto i = 0u; i < type.sizeInComponents; i++)
2554                         {
2555                                 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
2556                                                 (edge1.Float(i) - edge0.Float(i)), SIMD::Float(0.0f)), SIMD::Float(1.0f));
2557                                 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
2558                         }
2559                         break;
2560                 }
2561                 case GLSLstd450FMix:
2562                 {
2563                         auto x = GenericValue(this, routine, insn.word(5));
2564                         auto y = GenericValue(this, routine, insn.word(6));
2565                         auto a = GenericValue(this, routine, insn.word(7));
2566                         for (auto i = 0u; i < type.sizeInComponents; i++)
2567                         {
2568                                 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
2569                         }
2570                         break;
2571                 }
2572                 case GLSLstd450FClamp:
2573                 {
2574                         auto x = GenericValue(this, routine, insn.word(5));
2575                         auto minVal = GenericValue(this, routine, insn.word(6));
2576                         auto maxVal = GenericValue(this, routine, insn.word(7));
2577                         for (auto i = 0u; i < type.sizeInComponents; i++)
2578                         {
2579                                 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
2580                         }
2581                         break;
2582                 }
2583                 case GLSLstd450SClamp:
2584                 {
2585                         auto x = GenericValue(this, routine, insn.word(5));
2586                         auto minVal = GenericValue(this, routine, insn.word(6));
2587                         auto maxVal = GenericValue(this, routine, insn.word(7));
2588                         for (auto i = 0u; i < type.sizeInComponents; i++)
2589                         {
2590                                 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
2591                         }
2592                         break;
2593                 }
2594                 case GLSLstd450UClamp:
2595                 {
2596                         auto x = GenericValue(this, routine, insn.word(5));
2597                         auto minVal = GenericValue(this, routine, insn.word(6));
2598                         auto maxVal = GenericValue(this, routine, insn.word(7));
2599                         for (auto i = 0u; i < type.sizeInComponents; i++)
2600                         {
2601                                 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
2602                         }
2603                         break;
2604                 }
2605                 case GLSLstd450FSign:
2606                 {
2607                         auto src = GenericValue(this, routine, insn.word(5));
2608                         for (auto i = 0u; i < type.sizeInComponents; i++)
2609                         {
2610                                 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
2611                                 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
2612                                 dst.move(i, neg | pos);
2613                         }
2614                         break;
2615                 }
2616                 case GLSLstd450SSign:
2617                 {
2618                         auto src = GenericValue(this, routine, insn.word(5));
2619                         for (auto i = 0u; i < type.sizeInComponents; i++)
2620                         {
2621                                 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
2622                                 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
2623                                 dst.move(i, neg | pos);
2624                         }
2625                         break;
2626                 }
2627                 case GLSLstd450Reflect:
2628                 {
2629                         auto I = GenericValue(this, routine, insn.word(5));
2630                         auto N = GenericValue(this, routine, insn.word(6));
2631
2632                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2633
2634                         for (auto i = 0u; i < type.sizeInComponents; i++)
2635                         {
2636                                 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
2637                         }
2638                         break;
2639                 }
2640                 case GLSLstd450Refract:
2641                 {
2642                         auto I = GenericValue(this, routine, insn.word(5));
2643                         auto N = GenericValue(this, routine, insn.word(6));
2644                         auto eta = GenericValue(this, routine, insn.word(7));
2645
2646                         SIMD::Float d = Dot(type.sizeInComponents, I, N);
2647                         SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
2648                         SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
2649                         SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
2650
2651                         for (auto i = 0u; i < type.sizeInComponents; i++)
2652                         {
2653                                 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
2654                         }
2655                         break;
2656                 }
2657                 case GLSLstd450FaceForward:
2658                 {
2659                         auto N = GenericValue(this, routine, insn.word(5));
2660                         auto I = GenericValue(this, routine, insn.word(6));
2661                         auto Nref = GenericValue(this, routine, insn.word(7));
2662
2663                         SIMD::Float d = Dot(type.sizeInComponents, I, Nref);
2664                         SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
2665
2666                         for (auto i = 0u; i < type.sizeInComponents; i++)
2667                         {
2668                                 auto n = N.Float(i);
2669                                 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
2670                         }
2671                         break;
2672                 }
2673                 case GLSLstd450Length:
2674                 {
2675                         auto x = GenericValue(this, routine, insn.word(5));
2676                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2677
2678                         dst.move(0, Sqrt(d));
2679                         break;
2680                 }
2681                 case GLSLstd450Normalize:
2682                 {
2683                         auto x = GenericValue(this, routine, insn.word(5));
2684                         SIMD::Float d = Dot(getType(getObject(insn.word(5)).type).sizeInComponents, x, x);
2685                         SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
2686
2687                         for (auto i = 0u; i < type.sizeInComponents; i++)
2688                         {
2689                                 dst.move(i, invLength * x.Float(i));
2690                         }
2691                         break;
2692                 }
2693                 case GLSLstd450Distance:
2694                 {
2695                         auto p0 = GenericValue(this, routine, insn.word(5));
2696                         auto p1 = GenericValue(this, routine, insn.word(6));
2697                         auto p0Type = getType(getObject(insn.word(5)).type);
2698
2699                         // sqrt(dot(p0-p1, p0-p1))
2700                         SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
2701
2702                         for (auto i = 1u; i < p0Type.sizeInComponents; i++)
2703                         {
2704                                 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
2705                         }
2706
2707                         dst.move(0, Sqrt(d));
2708                         break;
2709                 }
2710                 default:
2711                         UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
2712                 }
2713
2714                 return EmitResult::Continue;
2715         }
2716
2717         std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
2718         {
2719                 switch(memorySemantics)
2720                 {
2721                 case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
2722                 case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
2723                 case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
2724                 case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
2725                 case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
2726                 default:
2727                         UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
2728                         return std::memory_order_acq_rel;
2729                 }
2730         }
2731
2732         SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
2733         {
2734                 SIMD::Float d = x.Float(0) * y.Float(0);
2735
2736                 for (auto i = 1u; i < numComponents; i++)
2737                 {
2738                         d += x.Float(i) * y.Float(i);
2739                 }
2740
2741                 return d;
2742         }
2743
2744         SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
2745         {
2746                 auto routine = state->routine;
2747                 auto &type = getType(insn.word(1));
2748                 ASSERT(type.sizeInComponents == 1);
2749                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2750                 auto &srcType = getType(getObject(insn.word(3)).type);
2751                 auto src = GenericValue(this, routine, insn.word(3));
2752
2753                 SIMD::UInt result = src.UInt(0);
2754
2755                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2756                 {
2757                         result |= src.UInt(i);
2758                 }
2759
2760                 dst.move(0, result);
2761                 return EmitResult::Continue;
2762         }
2763
2764         SpirvShader::EmitResult SpirvShader::EmitAll(InsnIterator insn, EmitState *state) const
2765         {
2766                 auto routine = state->routine;
2767                 auto &type = getType(insn.word(1));
2768                 ASSERT(type.sizeInComponents == 1);
2769                 auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
2770                 auto &srcType = getType(getObject(insn.word(3)).type);
2771                 auto src = GenericValue(this, routine, insn.word(3));
2772
2773                 SIMD::UInt result = src.UInt(0);
2774
2775                 for (auto i = 1u; i < srcType.sizeInComponents; i++)
2776                 {
2777                         result &= src.UInt(i);
2778                 }
2779
2780                 dst.move(0, result);
2781                 return EmitResult::Continue;
2782         }
2783
2784         SpirvShader::EmitResult SpirvShader::EmitBranch(InsnIterator insn, EmitState *state) const
2785         {
2786                 auto target = Block::ID(insn.word(1));
2787                 auto edge = Block::Edge{state->currentBlock, target};
2788                 state->edgeActiveLaneMasks.emplace(edge, state->activeLaneMask());
2789                 return EmitResult::Terminator;
2790         }
2791
2792         SpirvShader::EmitResult SpirvShader::EmitBranchConditional(InsnIterator insn, EmitState *state) const
2793         {
2794                 auto block = getBlock(state->currentBlock);
2795                 ASSERT(block.branchInstruction == insn);
2796
2797                 auto condId = Object::ID(block.branchInstruction.word(1));
2798                 auto trueBlockId = Block::ID(block.branchInstruction.word(2));
2799                 auto falseBlockId = Block::ID(block.branchInstruction.word(3));
2800
2801                 auto cond = GenericValue(this, state->routine, condId);
2802                 ASSERT_MSG(getType(getObject(condId).type).sizeInComponents == 1, "Condition must be a Boolean type scalar");
2803
2804                 // TODO: Optimize for case where all lanes take same path.
2805
2806                 state->addOutputActiveLaneMaskEdge(trueBlockId, cond.Int(0));
2807                 state->addOutputActiveLaneMaskEdge(falseBlockId, ~cond.Int(0));
2808
2809                 return EmitResult::Terminator;
2810         }
2811
2812         SpirvShader::EmitResult SpirvShader::EmitSwitch(InsnIterator insn, EmitState *state) const
2813         {
2814                 auto block = getBlock(state->currentBlock);
2815                 ASSERT(block.branchInstruction == insn);
2816
2817                 auto selId = Object::ID(block.branchInstruction.word(1));
2818
2819                 auto sel = GenericValue(this, state->routine, selId);
2820                 ASSERT_MSG(getType(getObject(selId).type).sizeInComponents == 1, "Selector must be a scalar");
2821
2822                 auto numCases = (block.branchInstruction.wordCount() - 3) / 2;
2823
2824                 // TODO: Optimize for case where all lanes take same path.
2825
2826                 SIMD::Int defaultLaneMask = state->activeLaneMask();
2827
2828                 // Gather up the case label matches and calculate defaultLaneMask.
2829                 std::vector<RValue<SIMD::Int>> caseLabelMatches;
2830                 caseLabelMatches.reserve(numCases);
2831                 for (uint32_t i = 0; i < numCases; i++)
2832                 {
2833                         auto label = block.branchInstruction.word(i * 2 + 3);
2834                         auto caseBlockId = Block::ID(block.branchInstruction.word(i * 2 + 4));
2835                         auto caseLabelMatch = CmpEQ(sel.Int(0), SIMD::Int(label));
2836                         state->addOutputActiveLaneMaskEdge(caseBlockId, caseLabelMatch);
2837                         defaultLaneMask &= ~caseLabelMatch;
2838                 }
2839
2840                 auto defaultBlockId = Block::ID(block.branchInstruction.word(2));
2841                 state->addOutputActiveLaneMaskEdge(defaultBlockId, defaultLaneMask);
2842
2843                 return EmitResult::Terminator;
2844         }
2845
2846         SpirvShader::EmitResult SpirvShader::EmitUnreachable(InsnIterator insn, EmitState *state) const
2847         {
2848                 // TODO: Log something in this case?
2849                 state->setActiveLaneMask(SIMD::Int(0));
2850                 return EmitResult::Terminator;
2851         }
2852
2853         SpirvShader::EmitResult SpirvShader::EmitReturn(InsnIterator insn, EmitState *state) const
2854         {
2855                 state->setActiveLaneMask(SIMD::Int(0));
2856                 return EmitResult::Terminator;
2857         }
2858
2859         SpirvShader::EmitResult SpirvShader::EmitPhi(InsnIterator insn, EmitState *state) const
2860         {
2861                 auto routine = state->routine;
2862                 auto typeId = Type::ID(insn.word(1));
2863                 auto type = getType(typeId);
2864                 auto objectId = Object::ID(insn.word(2));
2865
2866                 auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
2867
2868                 bool first = true;
2869                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
2870                 {
2871                         auto varId = Object::ID(insn.word(w + 0));
2872                         auto blockId = Block::ID(insn.word(w + 1));
2873
2874                         auto in = GenericValue(this, routine, varId);
2875                         auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
2876
2877                         for (uint32_t i = 0; i < type.sizeInComponents; i++)
2878                         {
2879                                 auto inMasked = in.Int(i) & mask;
2880                                 dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
2881                         }
2882                         first = false;
2883                 }
2884
2885                 return EmitResult::Continue;
2886         }
2887
2888         void SpirvShader::emitEpilog(SpirvRoutine *routine) const
2889         {
2890                 for (auto insn : *this)
2891                 {
2892                         switch (insn.opcode())
2893                         {
2894                         case spv::OpVariable:
2895                         {
2896                                 Object::ID resultId = insn.word(2);
2897                                 auto &object = getObject(resultId);
2898                                 auto &objectTy = getType(object.type);
2899                                 if (object.kind == Object::Kind::InterfaceVariable && objectTy.storageClass == spv::StorageClassOutput)
2900                                 {
2901                                         auto &dst = routine->getValue(resultId);
2902                                         int offset = 0;
2903                                         VisitInterface(resultId,
2904                                                                    [&](Decorations const &d, AttribType type) {
2905                                                                            auto scalarSlot = d.Location << 2 | d.Component;
2906                                                                            routine->outputs[scalarSlot] = dst[offset++];
2907                                                                    });
2908                                 }
2909                                 break;
2910                         }
2911                         default:
2912                                 break;
2913                         }
2914                 }
2915         }
2916
2917         SpirvShader::Block::Block(InsnIterator begin, InsnIterator end) : begin_(begin), end_(end)
2918         {
2919                 // Default to a Simple, this may change later.
2920                 kind = Block::Simple;
2921
2922                 // Walk the instructions to find the last two of the block.
2923                 InsnIterator insns[2];
2924                 for (auto insn : *this)
2925                 {
2926                         insns[0] = insns[1];
2927                         insns[1] = insn;
2928                 }
2929
2930                 switch (insns[1].opcode())
2931                 {
2932                         case spv::OpBranch:
2933                                 branchInstruction = insns[1];
2934                                 outs.emplace(Block::ID(branchInstruction.word(1)));
2935
2936                                 switch (insns[0].opcode())
2937                                 {
2938                                         case spv::OpLoopMerge:
2939                                                 kind = Loop;
2940                                                 mergeInstruction = insns[0];
2941                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2942                                                 continueTarget = Block::ID(mergeInstruction.word(2));
2943                                                 break;
2944
2945                                         default:
2946                                                 kind = Block::Simple;
2947                                                 break;
2948                                 }
2949                                 break;
2950
2951                         case spv::OpBranchConditional:
2952                                 branchInstruction = insns[1];
2953                                 outs.emplace(Block::ID(branchInstruction.word(2)));
2954                                 outs.emplace(Block::ID(branchInstruction.word(3)));
2955
2956                                 switch (insns[0].opcode())
2957                                 {
2958                                         case spv::OpSelectionMerge:
2959                                                 kind = StructuredBranchConditional;
2960                                                 mergeInstruction = insns[0];
2961                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2962                                                 break;
2963
2964                                         case spv::OpLoopMerge:
2965                                                 kind = Loop;
2966                                                 mergeInstruction = insns[0];
2967                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2968                                                 continueTarget = Block::ID(mergeInstruction.word(2));
2969                                                 break;
2970
2971                                         default:
2972                                                 kind = UnstructuredBranchConditional;
2973                                                 break;
2974                                 }
2975                                 break;
2976
2977                         case spv::OpSwitch:
2978                                 branchInstruction = insns[1];
2979                                 outs.emplace(Block::ID(branchInstruction.word(2)));
2980                                 for (uint32_t w = 4; w < branchInstruction.wordCount(); w += 2)
2981                                 {
2982                                         outs.emplace(Block::ID(branchInstruction.word(w)));
2983                                 }
2984
2985                                 switch (insns[0].opcode())
2986                                 {
2987                                         case spv::OpSelectionMerge:
2988                                                 kind = StructuredSwitch;
2989                                                 mergeInstruction = insns[0];
2990                                                 mergeBlock = Block::ID(mergeInstruction.word(1));
2991                                                 break;
2992
2993                                         default:
2994                                                 kind = UnstructuredSwitch;
2995                                                 break;
2996                                 }
2997                                 break;
2998
2999                         default:
3000                                 break;
3001                 }
3002         }
3003
3004         bool SpirvShader::existsPath(Block::ID from, Block::ID to) const
3005         {
3006                 // TODO: Optimize: This can be cached on the block.
3007                 Block::Set seen;
3008
3009                 std::queue<Block::ID> pending;
3010                 pending.emplace(from);
3011
3012                 while (pending.size() > 0)
3013                 {
3014                         auto id = pending.front();
3015                         pending.pop();
3016                         for (auto out : getBlock(id).outs)
3017                         {
3018                                 if (seen.count(out) != 0) { continue; }
3019                                 if (out == to) { return true; }
3020                                 pending.emplace(out);
3021                         }
3022                         seen.emplace(id);
3023                 }
3024
3025                 return false;
3026         }
3027
3028         void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
3029         {
3030                 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
3031         }
3032
3033         void SpirvShader::EmitState::addActiveLaneMaskEdge(Block::ID from, Block::ID to, RValue<SIMD::Int> mask)
3034         {
3035                 auto edge = Block::Edge{from, to};
3036                 auto it = edgeActiveLaneMasks.find(edge);
3037                 if (it == edgeActiveLaneMasks.end())
3038                 {
3039                         edgeActiveLaneMasks.emplace(edge, mask);
3040                 }
3041                 else
3042                 {
3043                         auto combined = it->second | mask;
3044                         edgeActiveLaneMasks.erase(edge);
3045                         edgeActiveLaneMasks.emplace(edge, combined);
3046                 }
3047         }
3048
3049         RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
3050         {
3051                 auto edge = Block::Edge{from, to};
3052                 auto it = edgeActiveLaneMasks.find(edge);
3053                 ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
3054                 return it->second;
3055         }
3056
3057         SpirvRoutine::SpirvRoutine(vk::PipelineLayout const *pipelineLayout) :
3058                 pipelineLayout(pipelineLayout)
3059         {
3060         }
3061
3062 }