From 26f1a86bd903ca959bdcb0f0f5bdb04984cc3567 Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Sat, 2 Feb 2019 15:23:01 -0800 Subject: [PATCH] Rework setup, VS->FS structures, etc for Vulkan - Remnants of old fixed function attributes all gone - Initial support for some builtins to prove the model - Setup now driven by correct shader state - VS->FS intermediate structure matches SPIRV model -- builtins are not in location space; location space itself is flat scalars rather than vec4-oriented. There are still some vertex pipe features which are not supported, as ES3 didn't have them -- proper handling of noperspective, etc. Change-Id: Ia8e3c72af54c4d1cbcc18482a741daa5e8e7c053 Bug: b/120799499 Reviewed-on: https://swiftshader-review.googlesource.com/c/24376 Kokoro-Presubmit: kokoro Reviewed-by: Ben Clayton Reviewed-by: Nicolas Capens Tested-by: Chris Forbes --- src/Device/Renderer.cpp | 37 +++++----------- src/Device/SetupProcessor.cpp | 48 +-------------------- src/Device/SetupProcessor.hpp | 12 +----- src/Device/Vertex.hpp | 76 ++++++--------------------------- src/Pipeline/SetupRoutine.cpp | 97 +++++++++++++++++------------------------- src/Pipeline/SetupRoutine.hpp | 2 +- src/Pipeline/VertexProgram.cpp | 13 +++--- src/Pipeline/VertexRoutine.cpp | 77 ++++++++++++++++++++++----------- 8 files changed, 128 insertions(+), 234 deletions(-) diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp index aa62ceedf..6cf20f05d 100644 --- a/src/Device/Renderer.cpp +++ b/src/Device/Renderer.cpp @@ -31,6 +31,7 @@ #include "System/Timer.hpp" #include "Vulkan/VkDebug.hpp" #include "Pipeline/SpirvShader.hpp" +#include "Vertex.hpp" #undef max @@ -1236,7 +1237,6 @@ namespace sw const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; int ms = state.multiSample; - int pos = state.positionRegister; const DrawData *data = draw.data; int visible = 0; @@ -1248,7 +1248,7 @@ namespace sw if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) { - Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); + Polygon polygon(&v0.builtins.position, &v1.builtins.position, &v2.builtins.position); int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; @@ -1332,10 +1332,8 @@ namespace sw Vertex &v0 = triangle.v0; Vertex &v1 = triangle.v1; - int pos = state.positionRegister; - - const float4 &P0 = v0.v[pos]; - const float4 &P1 = v1.v[pos]; + const float4 &P0 = v0.builtins.position; + const float4 &P1 = v1.builtins.position; if(P0.w <= 0 && P1.w <= 0) { @@ -1525,30 +1523,17 @@ namespace sw Vertex &v = triangle.v0; - float pSize; - - int pts = state.pointSizeRegister; - - if(state.pointSizeRegister != Unused) - { - pSize = v.v[pts].y; - } - else - { - pSize = 1.0f; - } + float pSize = v.builtins.pointSize; pSize = clamp(pSize, data.pointSizeMin, data.pointSizeMax); float4 P[4]; int C[4]; - int pos = state.positionRegister; - - P[0] = v.v[pos]; - P[1] = v.v[pos]; - P[2] = v.v[pos]; - P[3] = v.v[pos]; + P[0] = v.builtins.position; + P[1] = v.builtins.position; + P[2] = v.builtins.position; + P[3] = v.builtins.position; const float X = pSize * P[0].w * data.halfPixelX[0]; const float Y = pSize * P[0].w * data.halfPixelY[0]; @@ -1572,8 +1557,8 @@ namespace sw triangle.v1 = triangle.v0; triangle.v2 = triangle.v0; - triangle.v1.X += iround(16 * 0.5f * pSize); - triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner + triangle.v1.projected.x += iround(16 * 0.5f * pSize); + triangle.v2.projected.y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner Polygon polygon(P, 4); diff --git a/src/Device/SetupProcessor.cpp b/src/Device/SetupProcessor.cpp index 800d320dd..4e0823bfd 100644 --- a/src/Device/SetupProcessor.cpp +++ b/src/Device/SetupProcessor.cpp @@ -87,60 +87,16 @@ namespace sw state.slopeDepthBias = context->slopeDepthBias != 0.0f; state.vFace = context->pixelShader && context->pixelShader->hasBuiltinInput(spv::BuiltInFrontFacing); - state.positionRegister = Pos; - state.pointSizeRegister = Unused; - state.multiSample = context->getMultiSampleCount(); state.rasterizerDiscard = context->rasterizerDiscard; - //TODO: route properly - state.positionRegister = 0;//context->vertexShader->getPositionRegister(); - state.pointSizeRegister = 1;//context->vertexShader->getPointSizeRegister(); - - for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) + for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++) { - for(int component = 0; component < 4; component++) - { - state.gradient[interpolant][component].attribute = Unused; - state.gradient[interpolant][component].flat = false; - state.gradient[interpolant][component].wrap = false; - } + state.gradient[interpolant] = context->pixelShader->inputs[interpolant]; } const bool point = context->isDrawPoint(); -// for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) -// { -// for(int component = 0; component < 4; component++) -// { -// const Shader::Semantic& semantic = context->pixelShader->getInput(interpolant, component); -// -// if(semantic.active()) -// { -// int input = interpolant; -// for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) -// { -// if(semantic == context->vertexShader->getOutput(i, component)) -// { -// input = i; -// break; -// } -// } -// -// bool flat = point; -// -// switch(semantic.usage) -// { -// case Shader::USAGE_TEXCOORD: flat = false; break; -// case Shader::USAGE_COLOR: flat = semantic.flat || point; break; -// } -// -// state.gradient[interpolant][component].attribute = input; -// state.gradient[interpolant][component].flat = flat; -// } -// } -// } - state.hash = state.computeHash(); return state; diff --git a/src/Device/SetupProcessor.hpp b/src/Device/SetupProcessor.hpp index 797c362d3..7001bce80 100644 --- a/src/Device/SetupProcessor.hpp +++ b/src/Device/SetupProcessor.hpp @@ -15,6 +15,7 @@ #ifndef sw_SetupProcessor_hpp #define sw_SetupProcessor_hpp +#include #include "Context.hpp" #include "RoutineCache.hpp" #include "System/Types.hpp" @@ -41,8 +42,6 @@ namespace sw bool interpolateZ : 1; bool interpolateW : 1; bool perspective : 1; - unsigned int positionRegister : BITS(VERTEX_OUTPUT_LAST); - unsigned int pointSizeRegister : BITS(VERTEX_OUTPUT_LAST); CullMode cullMode : BITS(CULL_LAST); bool twoSidedStencil : 1; bool slopeDepthBias : 1; @@ -50,14 +49,7 @@ namespace sw unsigned int multiSample : 3; // 1, 2 or 4 bool rasterizerDiscard : 1; - struct Gradient - { - unsigned char attribute : BITS(VERTEX_OUTPUT_LAST); - bool flat : 1; - bool wrap : 1; - }; - - Gradient gradient[MAX_FRAGMENT_INPUTS][4]; + SpirvShader::InterfaceComponent gradient[MAX_INTERFACE_COMPONENTS]; }; struct State : States diff --git a/src/Device/Vertex.hpp b/src/Device/Vertex.hpp index 972bbf366..27b8b1816 100644 --- a/src/Device/Vertex.hpp +++ b/src/Device/Vertex.hpp @@ -21,75 +21,25 @@ namespace sw { - enum Out - { - // Default vertex output semantics - Pos = 0, - C0 = 1, // Diffuse - C1 = 2, // Specular - T0 = 3, - T1 = 4, - T2 = 5, - T3 = 6, - T4 = 7, - T5 = 8, - T6 = 9, - T7 = 10, - Fog = 11, // x component - Pts = Fog, // y component - - // Variable semantics - V0 = 0, - Vn_1 = MAX_VERTEX_OUTPUTS - 1, - - Unused, - VERTEX_OUTPUT_LAST = Unused, - }; - - struct UVWQ + ALIGN(16, struct Vertex { - float u; - float v; - float w; - float q; + float v[MAX_INTERFACE_COMPONENTS]; - float &operator[](int i) + struct { - return (&u)[i]; - } - }; - - ALIGN(16, struct Vertex - { - union + float4 position; + float pointSize; + } builtins; + struct { - struct // Fixed semantics - { - // Position - float x; - float y; - float z; - float w; - - float4 C[2]; // Diffuse and specular color - - UVWQ T[8]; // Texture coordinates - - float f; // Fog - float pSize; // Point size - }; - - float4 v[MAX_VERTEX_OUTPUTS]; // Generic components using semantic declaration - }; - - // Projected coordinates - int X; - int Y; - float Z; - float W; + int x; + int y; + float z; + float w; + } projected; int clipFlags; - int padding[3]; + int padding[2]; }); static_assert((sizeof(Vertex) & 0x0000000F) == 0, "Vertex size not a multiple of 16 bytes (alignment requirement)"); diff --git a/src/Pipeline/SetupRoutine.cpp b/src/Pipeline/SetupRoutine.cpp index d3fd5c316..80a45575b 100644 --- a/src/Pipeline/SetupRoutine.cpp +++ b/src/Pipeline/SetupRoutine.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include "SetupRoutine.hpp" #include "Constants.hpp" @@ -54,8 +55,6 @@ namespace sw const int V1 = (triangle || line) ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0); const int V2 = triangle ? OFFSET(Triangle,v2) : (line ? OFFSET(Triangle,v1) : OFFSET(Triangle,v0)); - int pos = state.positionRegister; - Pointer v0 = tri + V0; Pointer v1 = tri + V1; Pointer v2 = tri + V2; @@ -63,13 +62,13 @@ namespace sw Array X(16); Array Y(16); - X[0] = *Pointer(v0 + OFFSET(Vertex,X)); - X[1] = *Pointer(v1 + OFFSET(Vertex,X)); - X[2] = *Pointer(v2 + OFFSET(Vertex,X)); + X[0] = *Pointer(v0 + OFFSET(Vertex,projected.x)); + X[1] = *Pointer(v1 + OFFSET(Vertex,projected.x)); + X[2] = *Pointer(v2 + OFFSET(Vertex,projected.x)); - Y[0] = *Pointer(v0 + OFFSET(Vertex,Y)); - Y[1] = *Pointer(v1 + OFFSET(Vertex,Y)); - Y[2] = *Pointer(v2 + OFFSET(Vertex,Y)); + Y[0] = *Pointer(v0 + OFFSET(Vertex,projected.y)); + Y[1] = *Pointer(v1 + OFFSET(Vertex,projected.y)); + Y[2] = *Pointer(v2 + OFFSET(Vertex,projected.y)); Int d = 1; // Winding direction @@ -91,9 +90,9 @@ namespace sw Return(false); } - Int w0w1w2 = *Pointer(v0 + pos * 16 + 12) ^ - *Pointer(v1 + pos * 16 + 12) ^ - *Pointer(v2 + pos * 16 + 12); + Int w0w1w2 = *Pointer(v0 + OFFSET(Vertex, builtins.position.w)) ^ + *Pointer(v1 + OFFSET(Vertex, builtins.position.w)) ^ + *Pointer(v2 + OFFSET(Vertex, builtins.position.w)); A = IfThenElse(w0w1w2 < 0, -A, A); @@ -279,9 +278,9 @@ namespace sw // Sort by minimum y if(triangle) { - Float y0 = *Pointer(v0 + pos * 16 + 4); - Float y1 = *Pointer(v1 + pos * 16 + 4); - Float y2 = *Pointer(v2 + pos * 16 + 4); + Float y0 = *Pointer(v0 + OFFSET(Vertex, builtins.position.y)); + Float y1 = *Pointer(v1 + OFFSET(Vertex, builtins.position.y)); + Float y2 = *Pointer(v2 + OFFSET(Vertex, builtins.position.y)); Float yMin = Min(Min(y0, y1), y2); @@ -292,9 +291,9 @@ namespace sw // Sort by maximum w if(triangle) { - Float w0 = *Pointer(v0 + pos * 16 + 12); - Float w1 = *Pointer(v1 + pos * 16 + 12); - Float w2 = *Pointer(v2 + pos * 16 + 12); + Float w0 = *Pointer(v0 + OFFSET(Vertex, builtins.position.w)); + Float w1 = *Pointer(v1 + OFFSET(Vertex, builtins.position.w)); + Float w2 = *Pointer(v2 + OFFSET(Vertex, builtins.position.w)); Float wMax = Max(Max(w0, w1), w2); @@ -302,9 +301,9 @@ namespace sw conditionalRotate2(wMax == w2, v0, v1, v2); } - Float w0 = *Pointer(v0 + pos * 16 + 12); - Float w1 = *Pointer(v1 + pos * 16 + 12); - Float w2 = *Pointer(v2 + pos * 16 + 12); + Float w0 = *Pointer(v0 + OFFSET(Vertex, builtins.position.w)); + Float w1 = *Pointer(v1 + OFFSET(Vertex, builtins.position.w)); + Float w2 = *Pointer(v2 + OFFSET(Vertex, builtins.position.w)); Float4 w012; @@ -313,15 +312,15 @@ namespace sw w012.z = w2; w012.w = 1; - Float rhw0 = *Pointer(v0 + OFFSET(Vertex,W)); + Float rhw0 = *Pointer(v0 + OFFSET(Vertex,projected.w)); - Int X0 = *Pointer(v0 + OFFSET(Vertex,X)); - Int X1 = *Pointer(v1 + OFFSET(Vertex,X)); - Int X2 = *Pointer(v2 + OFFSET(Vertex,X)); + Int X0 = *Pointer(v0 + OFFSET(Vertex,projected.x)); + Int X1 = *Pointer(v1 + OFFSET(Vertex,projected.x)); + Int X2 = *Pointer(v2 + OFFSET(Vertex,projected.x)); - Int Y0 = *Pointer(v0 + OFFSET(Vertex,Y)); - Int Y1 = *Pointer(v1 + OFFSET(Vertex,Y)); - Int Y2 = *Pointer(v2 + OFFSET(Vertex,Y)); + Int Y0 = *Pointer(v0 + OFFSET(Vertex,projected.y)); + Int Y1 = *Pointer(v1 + OFFSET(Vertex,projected.y)); + Int Y2 = *Pointer(v2 + OFFSET(Vertex,projected.y)); if(line) { @@ -396,9 +395,9 @@ namespace sw if(state.interpolateZ) { - Float z0 = *Pointer(v0 + OFFSET(Vertex,Z)); - Float z1 = *Pointer(v1 + OFFSET(Vertex,Z)); - Float z2 = *Pointer(v2 + OFFSET(Vertex,Z)); + Float z0 = *Pointer(v0 + OFFSET(Vertex,projected.z)); + Float z1 = *Pointer(v1 + OFFSET(Vertex,projected.z)); + Float z2 = *Pointer(v2 + OFFSET(Vertex,projected.z)); z1 -= z0; z2 -= z0; @@ -451,19 +450,16 @@ namespace sw *Pointer(primitive + OFFSET(Primitive,z.C), 16) = C; } - for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) + for (int interpolant = 0; interpolant < MAX_INTERFACE_COMPONENTS; interpolant++) { - for(int component = 0; component < 4; component++) - { - int attribute = state.gradient[interpolant][component].attribute; - bool flat = state.gradient[interpolant][component].flat; - bool wrap = state.gradient[interpolant][component].wrap; - - if(attribute != Unused) - { - setupGradient(primitive, tri, w012, M, v0, v1, v2, OFFSET(Vertex,v[attribute][component]), OFFSET(Primitive,V[interpolant][component]), flat, point, state.perspective, wrap, component); - } - } + // TODO: fix point, perspective, etc. Not convinced various edge cases are really correct here for either VK or GL. + if (state.gradient[interpolant].Type != SpirvShader::ATTRIBTYPE_UNUSED) + setupGradient(primitive, tri, w012, M, v0, v1, v2, + OFFSET(Vertex, v[interpolant]), + OFFSET(Primitive, V[interpolant]), + state.gradient[interpolant].Flat, + point, + state.perspective, 0); } Return(true); @@ -472,7 +468,7 @@ namespace sw routine = function("SetupRoutine"); } - void SetupRoutine::setupGradient(Pointer &primitive, Pointer &triangle, Float4 &w012, Float4 (&m)[3], Pointer &v0, Pointer &v1, Pointer &v2, int attribute, int planeEquation, bool flat, bool sprite, bool perspective, bool wrap, int component) + void SetupRoutine::setupGradient(Pointer &primitive, Pointer &triangle, Float4 &w012, Float4 (&m)[3], Pointer &v0, Pointer &v1, Pointer &v2, int attribute, int planeEquation, bool flat, bool sprite, bool perspective, int component) { Float4 i; @@ -505,21 +501,6 @@ namespace sw i.w = 0; } - if(wrap) - { - Float m; - - m = *Pointer(v0 + attribute); - m = Max(m, *Pointer(v1 + attribute)); - m = Max(m, *Pointer(v2 + attribute)); - m -= 0.5f; - - // TODO: Vectorize - If(Float(i.x) < m) i.x = i.x + 1.0f; - If(Float(i.y) < m) i.y = i.y + 1.0f; - If(Float(i.z) < m) i.z = i.z + 1.0f; - } - if(!perspective) { i *= w012; diff --git a/src/Pipeline/SetupRoutine.hpp b/src/Pipeline/SetupRoutine.hpp index 977eb8f2c..b43dd7a99 100644 --- a/src/Pipeline/SetupRoutine.hpp +++ b/src/Pipeline/SetupRoutine.hpp @@ -33,7 +33,7 @@ namespace sw Routine *getRoutine(); private: - void setupGradient(Pointer &primitive, Pointer &triangle, Float4 &w012, Float4 (&m)[3], Pointer &v0, Pointer &v1, Pointer &v2, int attribute, int planeEquation, bool flatShading, bool sprite, bool perspective, bool wrap, int component); + void setupGradient(Pointer &primitive, Pointer &triangle, Float4 &w012, Float4 (&m)[3], Pointer &v0, Pointer &v1, Pointer &v2, int attribute, int planeEquation, bool flatShading, bool sprite, bool perspective, int component); void edge(Pointer &primitive, Pointer &data, const Int &Xa, const Int &Ya, const Int &Xb, const Int &Yb, Int &q); void conditionalRotate1(Bool condition, Pointer &v0, Pointer &v1, Pointer &v2); void conditionalRotate2(Bool condition, Pointer &v0, Pointer &v1, Pointer &v2); diff --git a/src/Pipeline/VertexProgram.cpp b/src/Pipeline/VertexProgram.cpp index 1e0e1629b..7bb498fd3 100644 --- a/src/Pipeline/VertexProgram.cpp +++ b/src/Pipeline/VertexProgram.cpp @@ -32,11 +32,14 @@ namespace sw enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - // TODO: wire up builtins - //if(shader->isInstanceIdDeclared()) - //{ - // instanceID = *Pointer(data + OFFSET(DrawData,instanceID)); - //} + auto it = spirvShader->inputBuiltins.find(spv::BuiltInInstanceIndex); + if (it != spirvShader->inputBuiltins.end()) + { + // TODO: we could do better here; we know InstanceIndex is uniform across all lanes + assert(it->second.SizeInComponents == 1); + (*routine.lvalues[it->second.Id])[it->second.FirstComponent] = + As(Int4((*Pointer(data + OFFSET(DrawData, instanceID))))); + } } VertexProgram::~VertexProgram() diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp index 636351fee..e1f0b3d24 100644 --- a/src/Pipeline/VertexRoutine.cpp +++ b/src/Pipeline/VertexRoutine.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include "VertexRoutine.hpp" #include "Constants.hpp" @@ -94,14 +95,21 @@ namespace sw void VertexRoutine::computeClipFlags() { - int pos = state.positionRegister; - - Int4 maxX = CmpLT(o[pos].w, o[pos].x); - Int4 maxY = CmpLT(o[pos].w, o[pos].y); - Int4 maxZ = CmpLT(o[pos].w, o[pos].z); - Int4 minX = CmpNLE(-o[pos].w, o[pos].x); - Int4 minY = CmpNLE(-o[pos].w, o[pos].y); - Int4 minZ = CmpNLE(Float4(0.0f), o[pos].z); + auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); + assert(it != spirvShader->outputBuiltins.end()); + assert(it->second.SizeInComponents == 4); + auto &pos = (*routine.lvalues[it->second.Id]); + auto posX = pos[it->second.FirstComponent]; + auto posY = pos[it->second.FirstComponent + 1]; + auto posZ = pos[it->second.FirstComponent + 2]; + auto posW = pos[it->second.FirstComponent + 3]; + + Int4 maxX = CmpLT(posW, posX); + Int4 maxY = CmpLT(posW, posY); + Int4 maxZ = CmpLT(posW, posZ); + Int4 minX = CmpNLE(-posW, posX); + Int4 minY = CmpNLE(-posW, posY); + Int4 minZ = CmpNLE(Float4(0.0f), posZ); clipFlags = *Pointer(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing clipFlags |= *Pointer(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4); @@ -110,9 +118,9 @@ namespace sw clipFlags |= *Pointer(constants + OFFSET(Constants,minY) + SignMask(minY) * 4); clipFlags |= *Pointer(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4); - Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer(constants + OFFSET(Constants,maxPos))); - Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer(constants + OFFSET(Constants,maxPos))); - Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer(constants + OFFSET(Constants,maxPos))); + Int4 finiteX = CmpLE(Abs(posX), *Pointer(constants + OFFSET(Constants,maxPos))); + Int4 finiteY = CmpLE(Abs(posY), *Pointer(constants + OFFSET(Constants,maxPos))); + Int4 finiteZ = CmpLE(Abs(posZ), *Pointer(constants + OFFSET(Constants,maxPos))); Int4 finiteXYZ = finiteX & finiteY & finiteZ; clipFlags |= *Pointer(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4); @@ -658,12 +666,28 @@ namespace sw *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF; // Viewport transform - int pos = state.positionRegister; - - v.x = o[pos].x; - v.y = o[pos].y; - v.z = o[pos].z; - v.w = o[pos].w; + auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); + assert(it != spirvShader->outputBuiltins.end()); + assert(it->second.SizeInComponents == 4); + auto &pos = (*routine.lvalues[it->second.Id]); + auto posX = pos[it->second.FirstComponent]; + auto posY = pos[it->second.FirstComponent + 1]; + auto posZ = pos[it->second.FirstComponent + 2]; + auto posW = pos[it->second.FirstComponent + 3]; + + v.x = posX; + v.y = posY; + v.z = posZ; + v.w = posW; + + // Write the builtin pos into the vertex; it's not going to be consumed by the FS, but may need to reproject if we have to clip. + Vector4f v2 = v; + transpose4x4(v2.x, v2.y, v2.z, v2.w); + + *Pointer(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 0, 16) = v2.x; + *Pointer(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 1, 16) = v2.y; + *Pointer(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 2, 16) = v2.z; + *Pointer(cacheLine + OFFSET(Vertex,builtins.position) + sizeof(Vertex) * 3, 16) = v2.w; Float4 w = As(As(v.w) | (As(CmpEQ(v.w, Float4(0.0f))) & As(Float4(1.0f)))); Float4 rhw = Float4(1.0f) / w; @@ -675,23 +699,26 @@ namespace sw transpose4x4(v.x, v.y, v.z, v.w); - *Pointer(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x; - *Pointer(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y; - *Pointer(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z; - *Pointer(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w; + *Pointer(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 0, 16) = v.x; + *Pointer(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 1, 16) = v.y; + *Pointer(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 2, 16) = v.z; + *Pointer(cacheLine + OFFSET(Vertex,projected) + sizeof(Vertex) * 3, 16) = v.w; } void VertexRoutine::writeVertex(const Pointer &vertex, Pointer &cache) { - for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++) + for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++) { - if(state.output[i].write) + if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED) { - *Pointer(vertex + OFFSET(Vertex,v[i]), 16) = *Pointer(cache + OFFSET(Vertex,v[i]), 16); + *Pointer(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer(cache + OFFSET(Vertex, v[i]), 4); } } - *Pointer(vertex + OFFSET(Vertex,X)) = *Pointer(cache + OFFSET(Vertex,X)); + *Pointer(vertex + OFFSET(Vertex,projected)) = *Pointer(cache + OFFSET(Vertex,projected)); *Pointer(vertex + OFFSET(Vertex,clipFlags)) = *Pointer(cache + OFFSET(Vertex,clipFlags)); + *Pointer(vertex + OFFSET(Vertex,builtins.position)) = *Pointer(cache + OFFSET(Vertex,builtins.position)); + *Pointer(vertex + OFFSET(Vertex,builtins.pointSize)) = *Pointer(cache + OFFSET(Vertex,builtins.pointSize)); + } } -- 2.11.0