#include "Vulkan/VkPipelineLayout.hpp"
#include "Device/Config.hpp"
-#include <queue>
-
#ifdef Bool
#undef Bool // b/127920555
#endif
+namespace
+{
+ rr::RValue<rr::Bool> AnyTrue(rr::RValue<sw::SIMD::Int> const &ints)
+ {
+ return rr::SignMask(ints) != 0;
+ }
+
+ rr::RValue<rr::Bool> AnyFalse(rr::RValue<sw::SIMD::Int> const &ints)
+ {
+ return rr::SignMask(~ints) != 0;
+ }
+}
+
namespace sw
{
volatile int SpirvShader::serialCounter = 1; // Start at 1, 0 is invalid shader.
break;
}
+ case spv::OpLoopMerge:
case spv::OpSelectionMerge:
break; // Nothing to do in analysis pass.
case spv::OpCompositeExtract:
case spv::OpVectorShuffle:
case spv::OpVectorTimesScalar:
+ case spv::OpMatrixTimesScalar:
+ case spv::OpMatrixTimesVector:
+ case spv::OpVectorTimesMatrix:
+ case spv::OpMatrixTimesMatrix:
case spv::OpVectorExtractDynamic:
case spv::OpVectorInsertDynamic:
case spv::OpNot: // Unary ops
}
}
- // Assign all Block::ins
+ AssignBlockIns();
+ }
+
+ void SpirvShader::TraverseReachableBlocks(Block::ID id, SpirvShader::Block::Set& reachable)
+ {
+ if (reachable.count(id) == 0)
+ {
+ reachable.emplace(id);
+ for (auto out : getBlock(id).outs)
+ {
+ TraverseReachableBlocks(out, reachable);
+ }
+ }
+ }
+
+ void SpirvShader::AssignBlockIns()
+ {
+ Block::Set reachable;
+ TraverseReachableBlocks(mainBlockId, reachable);
+
for (auto &it : blocks)
{
auto &blockId = it.first;
- auto &block = it.second;
- for (auto &outId : block.outs)
+ if (reachable.count(blockId) > 0)
{
- auto outIt = blocks.find(outId);
- ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
- auto &out = outIt->second;
- out.ins.emplace(blockId);
+ for (auto &outId : it.second.outs)
+ {
+ auto outIt = blocks.find(outId);
+ ASSERT_MSG(outIt != blocks.end(), "Block %d has a non-existent out %d", blockId.value(), outId.value());
+ auto &out = outIt->second;
+ out.ins.emplace(blockId);
+ }
}
}
}
EmitInstruction(insn, &state);
}
- // Emit all the blocks in BFS order, starting with the main block.
+ // Emit all the blocks starting from mainBlockId.
+ EmitBlocks(mainBlockId, &state);
+ }
+
+ void SpirvShader::EmitBlocks(Block::ID id, EmitState *state, Block::ID ignore /* = 0 */) const
+ {
+ auto oldPending = state->pending;
+
std::queue<Block::ID> pending;
- pending.push(mainBlockId);
+ state->pending = &pending;
+ pending.push(id);
while (pending.size() > 0)
{
auto id = pending.front();
pending.pop();
- if (state.visited.count(id) == 0)
+
+ auto const &block = getBlock(id);
+ if (id == ignore)
{
- EmitBlock(id, &state);
- for (auto it : getBlock(id).outs)
- {
- pending.push(it);
- }
+ continue;
}
- }
- }
-
- void SpirvShader::EmitBlock(Block::ID id, EmitState *state) const
- {
- if (state->visited.count(id) > 0)
- {
- return; // Already processed this block.
- }
- state->visited.emplace(id);
+ state->currentBlock = id;
- auto &block = getBlock(id);
+ switch (block.kind)
+ {
+ case Block::Simple:
+ case Block::StructuredBranchConditional:
+ case Block::UnstructuredBranchConditional:
+ case Block::StructuredSwitch:
+ case Block::UnstructuredSwitch:
+ EmitNonLoop(state);
+ break;
- switch (block.kind)
- {
- case Block::Simple:
- case Block::StructuredBranchConditional:
- case Block::UnstructuredBranchConditional:
- case Block::StructuredSwitch:
- case Block::UnstructuredSwitch:
- if (id != mainBlockId)
- {
- // Emit all preceeding blocks and set the activeLaneMask.
- Intermediate activeLaneMask(1);
- activeLaneMask.move(0, SIMD::Int(0));
- for (auto in : block.ins)
- {
- EmitBlock(in, state);
- auto inMask = state->getActiveLaneMaskEdge(in, id);
- activeLaneMask.replace(0, activeLaneMask.Int(0) | inMask);
- }
- state->setActiveLaneMask(activeLaneMask.Int(0));
- }
- state->currentBlock = id;
- EmitInstructions(block.begin(), block.end(), state);
- break;
+ case Block::Loop:
+ EmitLoop(state);
+ break;
- default:
- UNIMPLEMENTED("Unhandled Block Kind: %d", int(block.kind));
+ default:
+ UNREACHABLE("Unexpected Block Kind: %d", int(block.kind));
+ }
}
+
+ state->pending = oldPending;
}
void SpirvShader::EmitInstructions(InsnIterator begin, InsnIterator end, EmitState *state) const
}
}
+ void SpirvShader::EmitNonLoop(EmitState *state) const
+ {
+ auto blockId = state->currentBlock;
+ auto block = getBlock(blockId);
+
+ // Ensure all incoming blocks have been generated.
+ auto depsDone = true;
+ for (auto in : block.ins)
+ {
+ if (state->visited.count(in) == 0)
+ {
+ state->pending->emplace(in);
+ depsDone = false;
+ }
+ }
+
+ if (!depsDone)
+ {
+ // come back to this once the dependencies have been generated
+ state->pending->emplace(blockId);
+ return;
+ }
+
+ if (!state->visited.emplace(blockId).second)
+ {
+ return; // Already generated this block.
+ }
+
+ if (blockId != mainBlockId)
+ {
+ // Set the activeLaneMask.
+ SIMD::Int activeLaneMask(0);
+ for (auto in : block.ins)
+ {
+ auto inMask = GetActiveLaneMaskEdge(state, in, blockId);
+ activeLaneMask |= inMask;
+ }
+ state->setActiveLaneMask(activeLaneMask);
+ }
+
+ EmitInstructions(block.begin(), block.end(), state);
+
+ for (auto out : block.outs)
+ {
+ state->pending->emplace(out);
+ }
+ }
+
+ void SpirvShader::EmitLoop(EmitState *state) const
+ {
+ auto blockId = state->currentBlock;
+ auto block = getBlock(blockId);
+
+ // Ensure all incoming non-back edge blocks have been generated.
+ auto depsDone = true;
+ for (auto in : block.ins)
+ {
+ if (state->visited.count(in) == 0)
+ {
+ if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
+ {
+ state->pending->emplace(in);
+ depsDone = false;
+ }
+ }
+ }
+
+ if (!depsDone)
+ {
+ // come back to this once the dependencies have been generated
+ state->pending->emplace(blockId);
+ return;
+ }
+
+ if (!state->visited.emplace(blockId).second)
+ {
+ return; // Already emitted this loop.
+ }
+
+ // loopActiveLaneMask is the mask of lanes that are continuing to loop.
+ // This is initialized with the incoming active lane masks.
+ SIMD::Int loopActiveLaneMask = SIMD::Int(0);
+ for (auto in : block.ins)
+ {
+ if (!existsPath(blockId, in, block.mergeBlock)) // if not a loop back edge
+ {
+ loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
+ }
+ }
+
+ // Generate an alloca for each of the loop's phis.
+ // These will be primed with the incoming, non back edge Phi values
+ // before the loop, and then updated just before the loop jumps back to
+ // the block.
+ struct LoopPhi
+ {
+ LoopPhi(Object::ID id, uint32_t size) : phiId(id), storage(size) {}
+
+ Object::ID phiId; // The Phi identifier.
+ Object::ID continueValue; // The source merge value from the loop.
+ Array<SIMD::Int> storage; // The alloca.
+ };
+
+ std::vector<LoopPhi> phis;
+
+ // For each OpPhi between the block start and the merge instruction:
+ for (auto insn = block.begin(); insn != block.mergeInstruction; insn++)
+ {
+ if (insn.opcode() == spv::OpPhi)
+ {
+ auto objectId = Object::ID(insn.word(2));
+ auto &object = getObject(objectId);
+ auto &type = getType(object.type);
+
+ LoopPhi phi(insn.word(2), type.sizeInComponents);
+
+ // Start with the Phi set to 0.
+ for (uint32_t i = 0; i < type.sizeInComponents; i++)
+ {
+ phi.storage[i] = SIMD::Int(0);
+ }
+
+ // For each Phi source:
+ for (uint32_t w = 3; w < insn.wordCount(); w += 2)
+ {
+ auto varId = Object::ID(insn.word(w + 0));
+ auto blockId = Block::ID(insn.word(w + 1));
+ if (existsPath(state->currentBlock, blockId, block.mergeBlock))
+ {
+ // This source is from a loop back-edge.
+ ASSERT(phi.continueValue == 0 || phi.continueValue == varId);
+ phi.continueValue = varId;
+ }
+ else
+ {
+ // This source is from a preceding block.
+ for (uint32_t i = 0; i < type.sizeInComponents; i++)
+ {
+ auto in = GenericValue(this, state->routine, varId);
+ auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
+ phi.storage[i] = phi.storage[i] | (in.Int(i) & mask);
+ }
+ }
+ }
+
+ phis.push_back(phi);
+ }
+ }
+
+ // Create the loop basic blocks
+ auto headerBasicBlock = Nucleus::createBasicBlock();
+ auto mergeBasicBlock = Nucleus::createBasicBlock();
+
+ // Start emitting code inside the loop.
+ Nucleus::createBr(headerBasicBlock);
+ Nucleus::setInsertBlock(headerBasicBlock);
+
+ // Load the Phi values from storage.
+ // This will load at the start of each loop.
+ for (auto &phi : phis)
+ {
+ auto &type = getType(getObject(phi.phiId).type);
+ auto &dst = state->routine->createIntermediate(phi.phiId, type.sizeInComponents);
+ for (unsigned int i = 0u; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, phi.storage[i]);
+ }
+ }
+
+ // Load the active lane mask.
+ state->setActiveLaneMask(loopActiveLaneMask);
+
+ // Emit all the non-phi instructions in this loop header block.
+ for (auto insn = block.begin(); insn != block.end(); insn++)
+ {
+ if (insn.opcode() != spv::OpPhi)
+ {
+ EmitInstruction(insn, state);
+ }
+ }
+
+ // Emit all loop blocks, but don't emit the merge block yet.
+ for (auto out : block.outs)
+ {
+ if (existsPath(out, blockId, block.mergeBlock))
+ {
+ EmitBlocks(out, state, block.mergeBlock);
+ }
+ }
+
+ // Rebuild the loopActiveLaneMask from the loop back edges.
+ loopActiveLaneMask = SIMD::Int(0);
+ for (auto in : block.ins)
+ {
+ if (existsPath(blockId, in, block.mergeBlock))
+ {
+ loopActiveLaneMask |= GetActiveLaneMaskEdge(state, in, blockId);
+ }
+ }
+
+ // Update loop phi values
+ for (auto &phi : phis)
+ {
+ if (phi.continueValue != 0)
+ {
+ auto val = GenericValue(this, state->routine, phi.continueValue);
+ auto &type = getType(getObject(phi.phiId).type);
+ for (unsigned int i = 0u; i < type.sizeInComponents; i++)
+ {
+ phi.storage[i] = val.Int(i);
+ }
+ }
+ }
+
+ // Loop body now done.
+ // If any lanes are still active, jump back to the loop header,
+ // otherwise jump to the merge block.
+ Nucleus::createCondBr(AnyTrue(loopActiveLaneMask).value, headerBasicBlock, mergeBasicBlock);
+
+ // Continue emitting from the merge block.
+ Nucleus::setInsertBlock(mergeBasicBlock);
+ state->pending->emplace(block.mergeBlock);
+ }
+
SpirvShader::EmitResult SpirvShader::EmitInstruction(InsnIterator insn, EmitState *state) const
{
switch (insn.opcode())
return EmitVectorInsertDynamic(insn, state);
case spv::OpVectorTimesScalar:
+ case spv::OpMatrixTimesScalar:
return EmitVectorTimesScalar(insn, state);
+ case spv::OpMatrixTimesVector:
+ return EmitMatrixTimesVector(insn, state);
+
+ case spv::OpVectorTimesMatrix:
+ return EmitVectorTimesMatrix(insn, state);
+
+ case spv::OpMatrixTimesMatrix:
+ return EmitMatrixTimesMatrix(insn, state);
+
case spv::OpNot:
case spv::OpSNegate:
case spv::OpFNegate:
return EmitPhi(insn, state);
case spv::OpSelectionMerge:
+ case spv::OpLoopMerge:
return EmitResult::Continue;
case spv::OpBranchConditional:
}
bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
- auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
+ auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
}
bool interleavedByLane = IsStorageInterleavedByLane(pointerBaseTy.storageClass);
- auto anyInactiveLanes = SignMask(~state->activeLaneMask()) != 0;
+ auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
if (object.kind == Object::Kind::Constant)
{
return EmitResult::Continue;
}
+ SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
+ {
+ auto routine = state->routine;
+ auto &type = getType(insn.word(1));
+ auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
+ auto lhs = GenericValue(this, routine, insn.word(3));
+ auto rhs = GenericValue(this, routine, insn.word(4));
+ auto rhsType = getType(getObject(insn.word(4)).type);
+
+ for (auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Float v = lhs.Float(i) * rhs.Float(0);
+ for (auto j = 1u; j < rhsType.sizeInComponents; j++)
+ {
+ v += lhs.Float(i + type.sizeInComponents * j) * rhs.Float(j);
+ }
+ dst.move(i, v);
+ }
+
+ return EmitResult::Continue;
+ }
+
+ SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
+ {
+ auto routine = state->routine;
+ auto &type = getType(insn.word(1));
+ auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
+ auto lhs = GenericValue(this, routine, insn.word(3));
+ auto rhs = GenericValue(this, routine, insn.word(4));
+ auto lhsType = getType(getObject(insn.word(3)).type);
+
+ for (auto i = 0u; i < type.sizeInComponents; i++)
+ {
+ SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhsType.sizeInComponents);
+ for (auto j = 1u; j < lhsType.sizeInComponents; j++)
+ {
+ v += lhs.Float(j) * rhs.Float(i * lhsType.sizeInComponents + j);
+ }
+ dst.move(i, v);
+ }
+
+ return EmitResult::Continue;
+ }
+
+ SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
+ {
+ auto routine = state->routine;
+ auto &type = getType(insn.word(1));
+ auto &dst = routine->createIntermediate(insn.word(2), type.sizeInComponents);
+ auto lhs = GenericValue(this, routine, insn.word(3));
+ auto rhs = GenericValue(this, routine, insn.word(4));
+
+ auto numColumns = type.definition.word(3);
+ auto numRows = getType(type.definition.word(2)).definition.word(3);
+ auto numAdds = getType(getObject(insn.word(3)).type).definition.word(3);
+
+ for (auto row = 0u; row < numRows; row++)
+ {
+ for (auto col = 0u; col < numColumns; col++)
+ {
+ SIMD::Float v = SIMD::Float(0);
+ for (auto i = 0u; i < numAdds; i++)
+ {
+ v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
+ }
+ dst.move(numRows * col + row, v);
+ }
+ }
+
+ return EmitResult::Continue;
+ }
+
SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
{
auto routine = state->routine;
auto type = getType(typeId);
auto objectId = Object::ID(insn.word(2));
- auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
+ auto tmp = std::unique_ptr<SIMD::Int[]>(new SIMD::Int[type.sizeInComponents]);
bool first = true;
for (uint32_t w = 3; w < insn.wordCount(); w += 2)
auto blockId = Block::ID(insn.word(w + 1));
auto in = GenericValue(this, routine, varId);
- auto mask = state->getActiveLaneMaskEdge(blockId, state->currentBlock);
+ auto mask = GetActiveLaneMaskEdge(state, blockId, state->currentBlock);
for (uint32_t i = 0; i < type.sizeInComponents; i++)
{
auto inMasked = in.Int(i) & mask;
- dst.replace(i, first ? inMasked : (dst.Int(i) | inMasked));
+ tmp[i] = first ? inMasked : (tmp[i] | inMasked);
}
first = false;
}
+ auto &dst = routine->createIntermediate(objectId, type.sizeInComponents);
+ for(uint32_t i = 0; i < type.sizeInComponents; i++)
+ {
+ dst.move(i, tmp[i]);
+ }
+
return EmitResult::Continue;
}
}
}
+ bool SpirvShader::existsPath(Block::ID from, Block::ID to, Block::ID notPassingThrough) const
+ {
+ // TODO: Optimize: This can be cached on the block.
+ Block::Set seen;
+ seen.emplace(notPassingThrough);
+
+ std::queue<Block::ID> pending;
+ pending.emplace(from);
+
+ while (pending.size() > 0)
+ {
+ auto id = pending.front();
+ pending.pop();
+ for (auto out : getBlock(id).outs)
+ {
+ if (seen.count(out) != 0) { continue; }
+ if (out == to) { return true; }
+ pending.emplace(out);
+ }
+ seen.emplace(id);
+ }
+
+ return false;
+ }
+
void SpirvShader::EmitState::addOutputActiveLaneMaskEdge(Block::ID to, RValue<SIMD::Int> mask)
{
addActiveLaneMaskEdge(currentBlock, to, mask & activeLaneMask());
}
}
- RValue<SIMD::Int> SpirvShader::EmitState::getActiveLaneMaskEdge(Block::ID from, Block::ID to)
+ RValue<SIMD::Int> SpirvShader::GetActiveLaneMaskEdge(EmitState *state, Block::ID from, Block::ID to) const
{
auto edge = Block::Edge{from, to};
- auto it = edgeActiveLaneMasks.find(edge);
- ASSERT_MSG(it != edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
+ auto it = state->edgeActiveLaneMasks.find(edge);
+ ASSERT_MSG(it != state->edgeActiveLaneMasks.end(), "Could not find edge %d -> %d", from.value(), to.value());
return it->second;
}