Eliminate Intermediate::replace()

[android-x86/external-swiftshader.git] / src / Pipeline / SpirvShader.cpp
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp

index cc81c9f..2106bdc 100644 (file)
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -21,12 +21,23 @@
  #include "Vulkan/VkPipelineLayout.hpp"
  #include "Device/Config.hpp"
  
-#include <queue>
-
  #ifdef Bool
  #undef Bool // b/127920555
  #endif
  
+namespace
+{
+       rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
+       {
+               return rr::SignMask(ints) != 0;
+       }
+
+       rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
+       {
+               return rr::SignMask(~ints) != 0;
+       }
+}
+
  namespace sw
  {
         volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
@@ -146,6 +157,7 @@ namespace sw
                                 break;
                         }
  
+                       case spv::OpLoopMerge:
                         case spv::OpSelectionMerge:
                                 break; // Nothing to do in analysis pass.
  
@@ -344,6 +356,10 @@ namespace sw
                         case spv::OpCompositeExtract:
                         case spv::OpVectorShuffle:
                         case spv::OpVectorTimesScalar:
+                       case spv::OpMatrixTimesScalar:
+                       case spv::OpMatrixTimesVector:
+                       case spv::OpVectorTimesMatrix:
+                       case spv::OpMatrixTimesMatrix:
                         case spv::OpVectorExtractDynamic:
                         case spv::OpVectorInsertDynamic:
                         case spv::OpNot: // Unary ops
@@ -451,17 +467,38 @@ namespace sw
                         }
                 }
  
-               // Assign all Block::ins
+               AssignBlockIns();
+       }
+
+       void SpirvShader::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable)
+       {
+               if (reachable.count(id) == 0)
+               {
+                       reachable.emplace(id);
+                       for (auto out : getBlock(id).outs)
+                       {
+                               TraverseReachableBlocks(out, reachable);
+                       }
+               }
+       }
+
+       void SpirvShader::AssignBlockIns()
+       {
+               Block::Set reachable;
+               TraverseReachableBlocks(mainBlockId, reachable);
+
                 for (auto &it : blocks)
                 {
                         auto &blockId = it.first;
-                       auto &block = it.second;
-                       for (auto &outId : block.outs)
+                       if (reachable.count(blockId) > 0)
                         {
-                               auto outIt = blocks.find(outId);
-                               ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
-                               auto &out = outIt->second;
-                               out.ins.emplace(blockId);
+                               for (auto &outId : it.second.outs)
+                               {
+                                       auto outIt = blocks.find(outId);
+                                       ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
+                                       auto &out = outIt->second;
+                                       out.ins.emplace(blockId);
+                               }
                         }
                 }
         }
@@ -1145,62 +1182,50 @@ namespace sw
                         EmitInstruction(insn, &state);
                 }
  
-               // Emit all the blocks in BFS order, starting with the main block.
+               // Emit all the blocks starting from mainBlockId.
+               EmitBlocks(mainBlockId, &state);
+       }
+
+       void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
+       {
+               auto oldPending = state->pending;
+
                 std::queue<Block::ID> pending;
-               pending.push(mainBlockId);
+               state->pending = &pending;
+               pending.push(id);
                 while (pending.size() > 0)
                 {
                         auto id = pending.front();
                         pending.pop();
-                       if (state.visited.count(id) == 0)
+
+                       auto const &block = getBlock(id);
+                       if (id == ignore)
                         {
-                               EmitBlock(id, &state);
-                               for (auto it : getBlock(id).outs)
-                               {
-                                       pending.push(it);
-                               }
+                               continue;
                         }
-               }
-       }
-
-       void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
-       {
-               if (state->visited.count(id) > 0)
-               {
-                       return; // Already processed this block.
-               }
  
-               state->visited.emplace(id);
+                       state->currentBlock = id;
  
-               auto &block = getBlock(id);
+                       switch (block.kind)
+                       {
+                               case Block::Simple:
+                               case Block::StructuredBranchConditional:
+                               case Block::UnstructuredBranchConditional:
+                               case Block::StructuredSwitch:
+                               case Block::UnstructuredSwitch:
+                                       EmitNonLoop(state);
+                                       break;
  
-               switch (block.kind)
-               {
-                       case Block::Simple:
-                       case Block::StructuredBranchConditional:
-                       case Block::UnstructuredBranchConditional:
-                       case Block::StructuredSwitch:
-                       case Block::UnstructuredSwitch:
-                               if (id != mainBlockId)
-                               {
-                                       // Emit all preceeding blocks and set the activeLaneMask.
-                                       Intermediate activeLaneMask(1);
-                                       activeLaneMask.move(0, SIMD::Int(0));
-                                       for (auto in : block.ins)
-                                       {
-                                               EmitBlock(in, state);
-                                               auto inMask = state->getActiveLaneMaskEdge(in, id);
-                                               activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
-                                       }
-                                       state->setActiveLaneMask(activeLaneMask.Int(0));
-                               }
-                               state->currentBlock = id;
-                               EmitInstructions(block.begin(), block.end(), state);
-                               break;
+                               case Block::Loop:
+                                       EmitLoop(state);
+                                       break;
  
-                       default:
-                               UNIMPLEMENTED("Unhandled Block Kind: %d", int(block.kind));
+                               default:
+                                       UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
+                       }
                 }
+
+               state->pending = oldPending;
         }
  
         void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
@@ -1221,6 +1246,230 @@ namespace sw
                 }
         }
  
+       void SpirvShader::EmitNonLoop(EmitState *state) const
+       {
+               auto blockId = state->currentBlock;
+               auto block = getBlock(blockId);
+
+               // Ensure all incoming blocks have been generated.
+               auto depsDone = true;
+               for (auto in : block.ins)
+               {
+                       if (state->visited.count(in) == 0)
+                       {
+                               state->pending->emplace(in);
+                               depsDone = false;
+                       }
+               }
+
+               if (!depsDone)
+               {
+                       // come back to this once the dependencies have been generated
+                       state->pending->emplace(blockId);
+                       return;
+               }
+
+               if (!state->visited.emplace(blockId).second)
+               {
+                       return; // Already generated this block.
+               }
+
+               if (blockId != mainBlockId)
+               {
+                       // Set the activeLaneMask.
+                       SIMD::Int activeLaneMask(0);
+                       for (auto in : block.ins)
+                       {
+                               auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
+                               activeLaneMask |= inMask;
+                       }
+                       state->setActiveLaneMask(activeLaneMask);
+               }
+
+               EmitInstructions(block.begin(), block.end(), state);
+
+               for (auto out : block.outs)
+               {
+                       state->pending->emplace(out);
+               }
+       }
+
+       void SpirvShader::EmitLoop(EmitState *state) const
+       {
+               auto blockId = state->currentBlock;
+               auto block = getBlock(blockId);
+
+               // Ensure all incoming non-back edge blocks have been generated.
+               auto depsDone = true;
+               for (auto in : block.ins)
+               {
+                       if (state->visited.count(in) == 0)
+                       {
+                               if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
+                               {
+                                       state->pending->emplace(in);
+                                       depsDone = false;
+                               }
+                       }
+               }
+
+               if (!depsDone)
+               {
+                       // come back to this once the dependencies have been generated
+                       state->pending->emplace(blockId);
+                       return;
+               }
+
+               if (!state->visited.emplace(blockId).second)
+               {
+                       return; // Already emitted this loop.
+               }
+
+               // loopActiveLaneMask is the mask of lanes that are continuing to loop.
+               // This is initialized with the incoming active lane masks.
+               SIMD::Int loopActiveLaneMask = SIMD::Int(0);
+               for (auto in : block.ins)
+               {
+                       if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
+                       {
+                               loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
+                       }
+               }
+
+               // Generate an alloca for each of the loop's phis.
+               // These will be primed with the incoming, non back edge Phi values
+               // before the loop, and then updated just before the loop jumps back to
+               // the block.
+               struct LoopPhi
+               {
+                       LoopPhi(Object::ID id, uint32_t size) : phiId(id), storage(size) {}
+
+                       Object::ID phiId; // The Phi identifier.
+                       Object::ID continueValue; // The source merge value from the loop.
+                       Array<SIMD::Int> storage; // The alloca.
+               };
+
+               std::vector<LoopPhi> phis;
+
+               // For each OpPhi between the block start and the merge instruction:
+               for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
+               {
+                       if (insn.opcode() == spv::OpPhi)
+                       {
+                               auto objectId = Object::ID(insn.word(2));
+                               auto &object = getObject(objectId);
+                               auto &type = getType(object.type);
+
+                               LoopPhi phi(insn.word(2), type.sizeInComponents);
+
+                               // Start with the Phi set to 0.
+                               for (uint32_t i = 0; i < type.sizeInComponents; i++)
+                               {
+                                       phi.storage[i] = SIMD::Int(0);
+                               }
+
+                               // For each Phi source:
+                               for (uint32_t w = 3; w < insn.wordCount(); w += 2)
+                               {
+                                       auto varId = Object::ID(insn.word(w + 0));
+                                       auto blockId = Block::ID(insn.word(w + 1));
+                                       if (existsPath(state->currentBlock, blockId, block.mergeBlock))
+                                       {
+                                               // This source is from a loop back-edge.
+                                               ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
+                                               phi.continueValue = varId;
+                                       }
+                                       else
+                                       {
+                                               // This source is from a preceding block.
+                                               for (uint32_t i = 0; i < type.sizeInComponents; i++)
+                                               {
+                                                       auto in = GenericValue(this, state->routine, varId);
+                                                       auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
+                                                       phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
+                                               }
+                                       }
+                               }
+
+                               phis.push_back(phi);
+                       }
+               }
+
+               // Create the loop basic blocks
+               auto headerBasicBlock = Nucleus::createBasicBlock();
+               auto mergeBasicBlock = Nucleus::createBasicBlock();
+
+               // Start emitting code inside the loop.
+               Nucleus::createBr(headerBasicBlock);
+               Nucleus::setInsertBlock(headerBasicBlock);
+
+               // Load the Phi values from storage.
+               // This will load at the start of each loop.
+               for (auto &phi : phis)
+               {
+                       auto &type = getType(getObject(phi.phiId).type);
+                       auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
+                       for (unsigned int i = 0u; i < type.sizeInComponents; i++)
+                       {
+                               dst.move(i, phi.storage[i]);
+                       }
+               }
+
+               // Load the active lane mask.
+               state->setActiveLaneMask(loopActiveLaneMask);
+
+               // Emit all the non-phi instructions in this loop header block.
+               for (auto insn = block.begin(); insn != block.end(); insn++)
+               {
+                       if (insn.opcode() != spv::OpPhi)
+                       {
+                               EmitInstruction(insn, state);
+                       }
+               }
+
+               // Emit all loop blocks, but don't emit the merge block yet.
+               for (auto out : block.outs)
+               {
+                       if (existsPath(out, blockId, block.mergeBlock))
+                       {
+                               EmitBlocks(out, state, block.mergeBlock);
+                       }
+               }
+
+               // Rebuild the loopActiveLaneMask from the loop back edges.
+               loopActiveLaneMask = SIMD::Int(0);
+               for (auto in : block.ins)
+               {
+                       if (existsPath(blockId, in, block.mergeBlock))
+                       {
+                               loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
+                       }
+               }
+
+               // Update loop phi values
+               for (auto &phi : phis)
+               {
+                       if (phi.continueValue != 0)
+                       {
+                               auto val = GenericValue(this, state->routine, phi.continueValue);
+                               auto &type = getType(getObject(phi.phiId).type);
+                               for (unsigned int i = 0u; i < type.sizeInComponents; i++)
+                               {
+                                       phi.storage[i] = val.Int(i);
+                               }
+                       }
+               }
+
+               // Loop body now done.
+               // If any lanes are still active, jump back to the loop header,
+               // otherwise jump to the merge block.
+               Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
+
+               // Continue emitting from the merge block.
+               Nucleus::setInsertBlock(mergeBasicBlock);
+               state->pending->emplace(block.mergeBlock);
+       }
+
         SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
         {
                 switch (insn.opcode())
@@ -1305,8 +1554,18 @@ namespace sw
                         return EmitVectorInsertDynamic(insn, state);
  
                 case spv::OpVectorTimesScalar:
+               case spv::OpMatrixTimesScalar:
                         return EmitVectorTimesScalar(insn, state);
  
+               case spv::OpMatrixTimesVector:
+                       return EmitMatrixTimesVector(insn, state);
+
+               case spv::OpVectorTimesMatrix:
+                       return EmitVectorTimesMatrix(insn, state);
+
+               case spv::OpMatrixTimesMatrix:
+                       return EmitMatrixTimesMatrix(insn, state);
+
                 case spv::OpNot:
                 case spv::OpSNegate:
                 case spv::OpFNegate:
@@ -1401,6 +1660,7 @@ namespace sw
                         return EmitPhi(insn, state);
  
                 case spv::OpSelectionMerge:
+               case spv::OpLoopMerge:
                         return EmitResult::Continue;
  
                 case spv::OpBranchConditional:
@@ -1516,7 +1776,7 @@ namespace sw
                 }
  
                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
-               auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
+               auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
  
                 auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
  
@@ -1610,7 +1870,7 @@ namespace sw
                 }
  
                 bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
-               auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
+               auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
  
                 if (object.kind == Object::Kind::Constant)
                 {
@@ -1887,6 +2147,78 @@ namespace sw
                 return EmitResult::Continue;
         }
  
+       SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
+       {
+               auto routine = state->routine;
+               auto &type = getType(insn.word(1));
+               auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
+               auto lhs = GenericValue(this, routine, insn.word(3));
+               auto rhs = GenericValue(this, routine, insn.word(4));
+               auto rhsType = getType(getObject(insn.word(4)).type);
+
+               for (auto i = 0u; i < type.sizeInComponents; i++)
+               {
+                       SIMD::Float v = lhs.Float(i) * rhs.Float(0);
+                       for (auto j = 1u; j < rhsType.sizeInComponents; j++)
+                       {
+                               v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
+                       }
+                       dst.move(i, v);
+               }
+
+               return EmitResult::Continue;
+       }
+
+       SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
+       {
+               auto routine = state->routine;
+               auto &type = getType(insn.word(1));
+               auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
+               auto lhs = GenericValue(this, routine, insn.word(3));
+               auto rhs = GenericValue(this, routine, insn.word(4));
+               auto lhsType = getType(getObject(insn.word(3)).type);
+
+               for (auto i = 0u; i < type.sizeInComponents; i++)
+               {
+                       SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
+                       for (auto j = 1u; j < lhsType.sizeInComponents; j++)
+                       {
+                               v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
+                       }
+                       dst.move(i, v);
+               }
+
+               return EmitResult::Continue;
+       }
+
+       SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
+       {
+               auto routine = state->routine;
+               auto &type = getType(insn.word(1));
+               auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
+               auto lhs = GenericValue(this, routine, insn.word(3));
+               auto rhs = GenericValue(this, routine, insn.word(4));
+
+               auto numColumns = type.definition.word(3);
+               auto numRows = getType(type.definition.word(2)).definition.word(3);
+               auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
+
+               for (auto row = 0u; row < numRows; row++)
+               {
+                       for (auto col = 0u; col < numColumns; col++)
+                       {
+                               SIMD::Float v = SIMD::Float(0);
+                               for (auto i = 0u; i < numAdds; i++)
+                               {
+                                       v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
+                               }
+                               dst.move(numRows * col + row, v);
+                       }
+               }
+
+               return EmitResult::Continue;
+       }
+
         SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
         {
                 auto routine = state->routine;
@@ -2697,7 +3029,7 @@ namespace sw
                 auto type = getType(typeId);
                 auto objectId = Object::ID(insn.word(2));
  
-               auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
+               auto tmp = std::unique_ptr<SIMD::Int[]>(new SIMD::Int[type.sizeInComponents]);
  
                 bool first = true;
                 for (uint32_t w = 3; w < insn.wordCount(); w += 2)
@@ -2706,16 +3038,22 @@ namespace sw
                         auto blockId = Block::ID(insn.word(w + 1));
  
                         auto in = GenericValue(this, routine, varId);
-                       auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
+                       auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
  
                         for (uint32_t i = 0; i < type.sizeInComponents; i++)
                         {
                                 auto inMasked = in.Int(i) & mask;
-                               dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
+                               tmp[i] = first ? inMasked : (tmp[i] | inMasked);
                         }
                         first = false;
                 }
  
+               auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
+               for(uint32_t i = 0; i < type.sizeInComponents; i++)
+               {
+                       dst.move(i, tmp[i]);
+               }
+
                 return EmitResult::Continue;
         }
  
@@ -2835,6 +3173,31 @@ namespace sw
                 }
         }
  
+       bool SpirvShader::existsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
+       {
+               // TODO: Optimize: This can be cached on the block.
+               Block::Set seen;
+               seen.emplace(notPassingThrough);
+
+               std::queue<Block::ID> pending;
+               pending.emplace(from);
+
+               while (pending.size() > 0)
+               {
+                       auto id = pending.front();
+                       pending.pop();
+                       for (auto out : getBlock(id).outs)
+                       {
+                               if (seen.count(out) != 0) { continue; }
+                               if (out == to) { return true; }
+                               pending.emplace(out);
+                       }
+                       seen.emplace(id);
+               }
+
+               return false;
+       }
+
         void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
         {
                 addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
@@ -2856,11 +3219,11 @@ namespace sw
                 }
         }
  
-       RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
+       RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
         {
                 auto edge = Block::Edge{from, to};
-               auto it = edgeActiveLaneMasks.find(edge);
-               ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
+               auto it = state->edgeActiveLaneMasks.find(edge);
+               ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
                 return it->second;
         }