From 7551ac6868ec1dc3965a00ff0a2003adbb2018d5 Mon Sep 17 00:00:00 2001 From: Nicolas Capens Date: Wed, 20 Jan 2016 17:11:53 -0500 Subject: [PATCH] Make vertex registers members of routine classes. Bug 22652760 Change-Id: I698ce910ee4302178d7235fa316aaa2b268e71a8 Reviewed-on: https://swiftshader-review.googlesource.com/4560 Tested-by: Nicolas Capens Reviewed-by: Nicolas Capens --- src/Shader/VertexPipeline.cpp | 298 ++++++++++++++-------------- src/Shader/VertexProgram.cpp | 443 ++++++++++++++++++++++-------------------- src/Shader/VertexProgram.hpp | 22 +++ src/Shader/VertexRoutine.cpp | 188 +++++++++--------- src/Shader/VertexRoutine.hpp | 53 +---- 5 files changed, 499 insertions(+), 505 deletions(-) diff --git a/src/Shader/VertexPipeline.cpp b/src/Shader/VertexPipeline.cpp index f212b1822..c2fa31f2f 100644 --- a/src/Shader/VertexPipeline.cpp +++ b/src/Shader/VertexPipeline.cpp @@ -53,7 +53,7 @@ namespace sw { for(int i = 0; i < 4; i++) { - Float4 B = r.v[BlendIndices].x; + Float4 B = v[BlendIndices].x; UInt indices; switch(i) @@ -88,9 +88,9 @@ namespace sw switch(state.vertexBlendMatrixCount) { - case 4: weight2 = r.v[BlendWeight].z; - case 3: weight1 = r.v[BlendWeight].y; - case 2: weight0 = r.v[BlendWeight].x; + case 4: weight2 = v[BlendWeight].z; + case 3: weight1 = v[BlendWeight].y; + case 2: weight0 = v[BlendWeight].x; case 1: break; } @@ -162,23 +162,23 @@ namespace sw if(!state.preTransformed) { - position = transformBlend(r.v[Position], Pointer(r.data + OFFSET(DrawData,ff.transformT)), true); + position = transformBlend(v[Position], Pointer(data + OFFSET(DrawData,ff.transformT)), true); } else { - position = r.v[PositionT]; + position = v[PositionT]; } - r.o[Pos].x = position.x; - r.o[Pos].y = position.y; - r.o[Pos].z = position.z; - r.o[Pos].w = position.w; + o[Pos].x = position.x; + o[Pos].y = position.y; + o[Pos].z = position.z; + o[Pos].w = position.w; - Vector4f vertexPosition = transformBlend(r.v[Position], Pointer(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); + Vector4f vertexPosition = transformBlend(v[Position], Pointer(data + OFFSET(DrawData,ff.cameraTransformT)), true); if(state.vertexNormalActive) { - normal = transformBlend(r.v[Normal], Pointer(r.data + OFFSET(DrawData,ff.normalTransformT)), false); + normal = transformBlend(v[Normal], Pointer(data + OFFSET(DrawData,ff.normalTransformT)), false); if(state.normalizeNormals) { @@ -191,53 +191,53 @@ namespace sw // FIXME: Don't process if not used at all if(state.diffuseActive && state.input[Color0]) { - Vector4f diffuse = r.v[Color0]; + Vector4f diffuse = v[Color0]; - r.o[D0].x = diffuse.x; - r.o[D0].y = diffuse.y; - r.o[D0].z = diffuse.z; - r.o[D0].w = diffuse.w; + o[D0].x = diffuse.x; + o[D0].y = diffuse.y; + o[D0].z = diffuse.z; + o[D0].w = diffuse.w; } else { - r.o[D0].x = Float4(1.0f); - r.o[D0].y = Float4(1.0f); - r.o[D0].z = Float4(1.0f); - r.o[D0].w = Float4(1.0f); + o[D0].x = Float4(1.0f); + o[D0].y = Float4(1.0f); + o[D0].z = Float4(1.0f); + o[D0].w = Float4(1.0f); } // FIXME: Don't process if not used at all if(state.specularActive && state.input[Color1]) { - Vector4f specular = r.v[Color1]; + Vector4f specular = v[Color1]; - r.o[D1].x = specular.x; - r.o[D1].y = specular.y; - r.o[D1].z = specular.z; - r.o[D1].w = specular.w; + o[D1].x = specular.x; + o[D1].y = specular.y; + o[D1].z = specular.z; + o[D1].w = specular.w; } else { - r.o[D1].x = Float4(0.0f); - r.o[D1].y = Float4(0.0f); - r.o[D1].z = Float4(0.0f); - r.o[D1].w = Float4(1.0f); + o[D1].x = Float4(0.0f); + o[D1].y = Float4(0.0f); + o[D1].z = Float4(0.0f); + o[D1].w = Float4(1.0f); } } else { - r.o[D0].x = Float4(0.0f); - r.o[D0].y = Float4(0.0f); - r.o[D0].z = Float4(0.0f); - r.o[D0].w = Float4(0.0f); + o[D0].x = Float4(0.0f); + o[D0].y = Float4(0.0f); + o[D0].z = Float4(0.0f); + o[D0].w = Float4(0.0f); - r.o[D1].x = Float4(0.0f); - r.o[D1].y = Float4(0.0f); - r.o[D1].z = Float4(0.0f); - r.o[D1].w = Float4(0.0f); + o[D1].x = Float4(0.0f); + o[D1].y = Float4(0.0f); + o[D1].z = Float4(0.0f); + o[D1].w = Float4(0.0f); Vector4f ambient; - Float4 globalAmbient = *Pointer(r.data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack + Float4 globalAmbient = *Pointer(data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack ambient.x = globalAmbient.x; ambient.y = globalAmbient.y; @@ -257,7 +257,7 @@ namespace sw { Float4 d; // Distance - L.x = L.y = L.z = *Pointer(r.data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack + L.x = L.y = L.z = *Pointer(data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack L.x = L.x.xxxx; L.y = L.y.yyyy; L.z = L.z.zzzz; @@ -272,16 +272,16 @@ namespace sw L.z *= d; d = Rcp_pp(d); // FIXME: Sufficient precision? - Float4 q = *Pointer(r.data + OFFSET(DrawData,ff.attenuationQuadratic[i])); - Float4 l = *Pointer(r.data + OFFSET(DrawData,ff.attenuationLinear[i])); - Float4 c = *Pointer(r.data + OFFSET(DrawData,ff.attenuationConstant[i])); + Float4 q = *Pointer(data + OFFSET(DrawData,ff.attenuationQuadratic[i])); + Float4 l = *Pointer(data + OFFSET(DrawData,ff.attenuationLinear[i])); + Float4 c = *Pointer(data + OFFSET(DrawData,ff.attenuationConstant[i])); att = Rcp_pp((q * d + l) * d + c); } // Ambient per light { - Float4 lightAmbient = *Pointer(r.data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack + Float4 lightAmbient = *Pointer(data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack ambient.x = ambient.x + lightAmbient.x * att; ambient.y = ambient.y + lightAmbient.y * att; @@ -301,26 +301,26 @@ namespace sw if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) { - diff.x = diff.y = diff.z = *Pointer(r.data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack + diff.x = diff.y = diff.z = *Pointer(data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack diff.x = diff.x.xxxx; diff.y = diff.y.yyyy; diff.z = diff.z.zzzz; } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) { - diff = r.v[Color0]; + diff = v[Color0]; } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) { - diff = r.v[Color1]; + diff = v[Color1]; } else ASSERT(false); - Float4 lightDiffuse = *Pointer(r.data + OFFSET(DrawData,ff.lightDiffuse[i])); + Float4 lightDiffuse = *Pointer(data + OFFSET(DrawData,ff.lightDiffuse[i])); - r.o[D0].x = r.o[D0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first? - r.o[D0].y = r.o[D0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first? - r.o[D0].z = r.o[D0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first? + o[D0].x = o[D0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first? + o[D0].y = o[D0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first? + o[D0].z = o[D0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first? } // Specular @@ -330,7 +330,7 @@ namespace sw Vector4f C; // Camera vector Float4 pow; - pow = *Pointer(r.data + OFFSET(DrawData,ff.materialShininess)); + pow = *Pointer(data + OFFSET(DrawData,ff.materialShininess)); S.x = Float4(0.0f) - vertexPosition.x; S.y = Float4(0.0f) - vertexPosition.y; @@ -351,7 +351,7 @@ namespace sw if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) { - Float4 materialSpecular = *Pointer(r.data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack + Float4 materialSpecular = *Pointer(data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack spec.x = materialSpecular.x; spec.y = materialSpecular.y; @@ -359,15 +359,15 @@ namespace sw } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) { - spec = r.v[Color0]; + spec = v[Color0]; } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) { - spec = r.v[Color1]; + spec = v[Color1]; } else ASSERT(false); - Float4 lightSpecular = *Pointer(r.data + OFFSET(DrawData,ff.lightSpecular[i])); + Float4 lightSpecular = *Pointer(data + OFFSET(DrawData,ff.lightSpecular[i])); spec.x *= lightSpecular.x; spec.y *= lightSpecular.y; @@ -383,22 +383,22 @@ namespace sw if(secondaryColor) { - r.o[D1].x = r.o[D1].x + spec.x; - r.o[D1].y = r.o[D1].y + spec.y; - r.o[D1].z = r.o[D1].z + spec.z; + o[D1].x = o[D1].x + spec.x; + o[D1].y = o[D1].y + spec.y; + o[D1].z = o[D1].z + spec.z; } else { - r.o[D0].x = r.o[D0].x + spec.x; - r.o[D0].y = r.o[D0].y + spec.y; - r.o[D0].z = r.o[D0].z + spec.z; + o[D0].x = o[D0].x + spec.x; + o[D0].y = o[D0].y + spec.y; + o[D0].z = o[D0].z + spec.z; } } } if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL) { - Float4 materialAmbient = *Pointer(r.data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack + Float4 materialAmbient = *Pointer(data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack ambient.x = ambient.x * materialAmbient.x; ambient.y = ambient.y * materialAmbient.y; @@ -406,7 +406,7 @@ namespace sw } else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1) { - Vector4f materialDiffuse = r.v[Color0]; + Vector4f materialDiffuse = v[Color0]; ambient.x = ambient.x * materialDiffuse.x; ambient.y = ambient.y * materialDiffuse.y; @@ -414,7 +414,7 @@ namespace sw } else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2) { - Vector4f materialSpecular = r.v[Color1]; + Vector4f materialSpecular = v[Color1]; ambient.x = ambient.x * materialSpecular.x; ambient.y = ambient.y * materialSpecular.y; @@ -422,51 +422,51 @@ namespace sw } else ASSERT(false); - r.o[D0].x = r.o[D0].x + ambient.x; - r.o[D0].y = r.o[D0].y + ambient.y; - r.o[D0].z = r.o[D0].z + ambient.z; + o[D0].x = o[D0].x + ambient.x; + o[D0].y = o[D0].y + ambient.y; + o[D0].z = o[D0].z + ambient.z; // Emissive if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL) { - Float4 materialEmission = *Pointer(r.data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack + Float4 materialEmission = *Pointer(data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack - r.o[D0].x = r.o[D0].x + materialEmission.x; - r.o[D0].y = r.o[D0].y + materialEmission.y; - r.o[D0].z = r.o[D0].z + materialEmission.z; + o[D0].x = o[D0].x + materialEmission.x; + o[D0].y = o[D0].y + materialEmission.y; + o[D0].z = o[D0].z + materialEmission.z; } else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1) { - Vector4f materialSpecular = r.v[Color0]; + Vector4f materialSpecular = v[Color0]; - r.o[D0].x = r.o[D0].x + materialSpecular.x; - r.o[D0].y = r.o[D0].y + materialSpecular.y; - r.o[D0].z = r.o[D0].z + materialSpecular.z; + o[D0].x = o[D0].x + materialSpecular.x; + o[D0].y = o[D0].y + materialSpecular.y; + o[D0].z = o[D0].z + materialSpecular.z; } else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2) { - Vector4f materialSpecular = r.v[Color1]; + Vector4f materialSpecular = v[Color1]; - r.o[D0].x = r.o[D0].x + materialSpecular.x; - r.o[D0].y = r.o[D0].y + materialSpecular.y; - r.o[D0].z = r.o[D0].z + materialSpecular.z; + o[D0].x = o[D0].x + materialSpecular.x; + o[D0].y = o[D0].y + materialSpecular.y; + o[D0].z = o[D0].z + materialSpecular.z; } else ASSERT(false); // Diffuse alpha component if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) { - r.o[D0].w = Float4(*Pointer(r.data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack + o[D0].w = Float4(*Pointer(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) { - Vector4f alpha = r.v[Color0]; - r.o[D0].w = alpha.w; + Vector4f alpha = v[Color0]; + o[D0].w = alpha.w; } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) { - Vector4f alpha = r.v[Color1]; - r.o[D0].w = alpha.w; + Vector4f alpha = v[Color1]; + o[D0].w = alpha.w; } else ASSERT(false); @@ -475,17 +475,17 @@ namespace sw // Specular alpha component if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) { - r.o[D1].w = Float4(*Pointer(r.data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack + o[D1].w = Float4(*Pointer(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) { - Vector4f alpha = r.v[Color0]; - r.o[D1].w = alpha.w; + Vector4f alpha = v[Color0]; + o[D1].w = alpha.w; } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) { - Vector4f alpha = r.v[Color1]; - r.o[D1].w = alpha.w; + Vector4f alpha = v[Color1]; + o[D1].w = alpha.w; } else ASSERT(false); } @@ -509,21 +509,21 @@ namespace sw case FOG_NONE: if(state.specularActive) { - r.o[Fog].x = r.o[D1].w; + o[Fog].x = o[D1].w; } else { - r.o[Fog].x = Float4(0.0f); + o[Fog].x = Float4(0.0f); } break; case FOG_LINEAR: - r.o[Fog].x = f * *Pointer(r.data + OFFSET(DrawData,fog.scale)) + *Pointer(r.data + OFFSET(DrawData,fog.offset)); + o[Fog].x = f * *Pointer(data + OFFSET(DrawData,fog.scale)) + *Pointer(data + OFFSET(DrawData,fog.offset)); break; case FOG_EXP: - r.o[Fog].x = exponential2(f * *Pointer(r.data + OFFSET(DrawData,fog.densityE)), true); + o[Fog].x = exponential2(f * *Pointer(data + OFFSET(DrawData,fog.densityE)), true); break; case FOG_EXP2: - r.o[Fog].x = exponential2((f * f) * *Pointer(r.data + OFFSET(DrawData,fog.density2E)), true); + o[Fog].x = exponential2((f * f) * *Pointer(data + OFFSET(DrawData,fog.density2E)), true); break; default: ASSERT(false); @@ -548,38 +548,38 @@ namespace sw { case TEXGEN_NONE: { - Vector4f v = r.v[TexCoord0 + i]; + Vector4f &&varying = v[TexCoord0 + i]; - r.o[T0 + stage].x = v.x; - r.o[T0 + stage].y = v.y; - r.o[T0 + stage].z = v.z; - r.o[T0 + stage].w = v.w; + o[T0 + stage].x = varying.x; + o[T0 + stage].y = varying.y; + o[T0 + stage].z = varying.z; + o[T0 + stage].w = varying.w; } break; case TEXGEN_PASSTHRU: { - Vector4f v = r.v[TexCoord0 + i]; + Vector4f &&varying = v[TexCoord0 + i]; - r.o[T0 + stage].x = v.x; - r.o[T0 + stage].y = v.y; - r.o[T0 + stage].z = v.z; - r.o[T0 + stage].w = v.w; + o[T0 + stage].x = varying.x; + o[T0 + stage].y = varying.y; + o[T0 + stage].z = varying.z; + o[T0 + stage].w = varying.w; if(state.input[TexCoord0 + i]) { switch(state.input[TexCoord0 + i].count) { case 1: - r.o[T0 + stage].y = Float4(1.0f); - r.o[T0 + stage].z = Float4(0.0f); - r.o[T0 + stage].w = Float4(0.0f); + o[T0 + stage].y = Float4(1.0f); + o[T0 + stage].z = Float4(0.0f); + o[T0 + stage].w = Float4(0.0f); break; case 2: - r.o[T0 + stage].z = Float4(1.0f); - r.o[T0 + stage].w = Float4(0.0f); + o[T0 + stage].z = Float4(1.0f); + o[T0 + stage].w = Float4(0.0f); break; case 3: - r.o[T0 + stage].w = Float4(1.0f); + o[T0 + stage].w = Float4(1.0f); break; case 4: break; @@ -606,22 +606,22 @@ namespace sw Nc.w = Float4(1.0f); - r.o[T0 + stage].x = Nc.x; - r.o[T0 + stage].y = Nc.y; - r.o[T0 + stage].z = Nc.z; - r.o[T0 + stage].w = Nc.w; + o[T0 + stage].x = Nc.x; + o[T0 + stage].y = Nc.y; + o[T0 + stage].z = Nc.z; + o[T0 + stage].w = Nc.w; } break; case TEXGEN_POSITION: { - Vector4f Pn = transformBlend(r.v[Position], Pointer(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space + Vector4f Pn = transformBlend(v[Position], Pointer(data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space Pn.w = Float4(1.0f); - r.o[T0 + stage].x = Pn.x; - r.o[T0 + stage].y = Pn.y; - r.o[T0 + stage].z = Pn.z; - r.o[T0 + stage].w = Pn.w; + o[T0 + stage].x = Pn.x; + o[T0 + stage].y = Pn.y; + o[T0 + stage].z = Pn.z; + o[T0 + stage].w = Pn.w; } break; case TEXGEN_REFLECTION: @@ -639,7 +639,7 @@ namespace sw Vector4f Ec; // Eye vector in camera space Vector4f N2; - Ec = transformBlend(r.v[Position], Pointer(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); + Ec = transformBlend(v[Position], Pointer(data + OFFSET(DrawData,ff.cameraTransformT)), true); Ec = normalize(Ec); // R = E - 2 * N * (E . N) @@ -669,10 +669,10 @@ namespace sw R.w = Float4(1.0f); - r.o[T0 + stage].x = R.x; - r.o[T0 + stage].y = R.y; - r.o[T0 + stage].z = R.z; - r.o[T0 + stage].w = R.w; + o[T0 + stage].x = R.x; + o[T0 + stage].y = R.y; + o[T0 + stage].z = R.z; + o[T0 + stage].w = R.w; } break; case TEXGEN_SPHEREMAP: @@ -690,7 +690,7 @@ namespace sw Vector4f Ec; // Eye vector in camera space Vector4f N2; - Ec = transformBlend(r.v[Position], Pointer(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); + Ec = transformBlend(v[Position], Pointer(data + OFFSET(DrawData,ff.cameraTransformT)), true); Ec = normalize(Ec); // R = E - 2 * N * (E . N) @@ -726,10 +726,10 @@ namespace sw R.z = Float4(1.0f); R.w = Float4(0.0f); - r.o[T0 + stage].x = R.x; - r.o[T0 + stage].y = R.y; - r.o[T0 + stage].z = R.z; - r.o[T0 + stage].w = R.w; + o[T0 + stage].x = R.x; + o[T0 + stage].y = R.y; + o[T0 + stage].z = R.z; + o[T0 + stage].w = R.w; } break; default: @@ -744,46 +744,46 @@ namespace sw Vector4f T; Vector4f t; - T.x = r.o[T0 + stage].x; - T.y = r.o[T0 + stage].y; - T.z = r.o[T0 + stage].z; - T.w = r.o[T0 + stage].w; + T.x = o[T0 + stage].x; + T.y = o[T0 + stage].y; + T.z = o[T0 + stage].z; + T.w = o[T0 + stage].w; switch(state.textureState[stage].textureTransformCountActive) { case 4: - texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer(r.data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack + texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer(data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack texTrans3.x = texTrans3.x.xxxx; texTrans3.y = texTrans3.y.yyyy; texTrans3.z = texTrans3.z.zzzz; texTrans3.w = texTrans3.w.wwww; t.w = dot4(T, texTrans3); case 3: - texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer(r.data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack + texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer(data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack texTrans2.x = texTrans2.x.xxxx; texTrans2.y = texTrans2.y.yyyy; texTrans2.z = texTrans2.z.zzzz; texTrans2.w = texTrans2.w.wwww; t.z = dot4(T, texTrans2); case 2: - texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer(r.data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack + texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer(data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack texTrans1.x = texTrans1.x.xxxx; texTrans1.y = texTrans1.y.yyyy; texTrans1.z = texTrans1.z.zzzz; texTrans1.w = texTrans1.w.wwww; t.y = dot4(T, texTrans1); case 1: - texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer(r.data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack + texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer(data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack texTrans0.x = texTrans0.x.xxxx; texTrans0.y = texTrans0.y.yyyy; texTrans0.z = texTrans0.z.zzzz; texTrans0.w = texTrans0.w.wwww; t.x = dot4(T, texTrans0); - r.o[T0 + stage].x = t.x; - r.o[T0 + stage].y = t.y; - r.o[T0 + stage].z = t.z; - r.o[T0 + stage].w = t.w; + o[T0 + stage].x = t.x; + o[T0 + stage].y = t.y; + o[T0 + stage].z = t.z; + o[T0 + stage].w = t.w; case 0: break; default: @@ -801,26 +801,26 @@ namespace sw if(state.input[PointSize]) { - r.o[Pts].y = r.v[PointSize].x; + o[Pts].y = v[PointSize].x; } else { - r.o[Pts].y = *Pointer(r.data + OFFSET(DrawData,point.pointSize)); + o[Pts].y = *Pointer(data + OFFSET(DrawData,point.pointSize)); } if(state.pointScaleActive && !state.preTransformed) { - Vector4f p = transformBlend(r.v[Position], Pointer(r.data + OFFSET(DrawData,ff.cameraTransformT)), true); + Vector4f p = transformBlend(v[Position], Pointer(data + OFFSET(DrawData,ff.cameraTransformT)), true); Float4 d = Sqrt(dot3(p, p)); // FIXME: length(p); - Float4 A = *Pointer(r.data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack - Float4 B = *Pointer(r.data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack - Float4 C = *Pointer(r.data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack + Float4 A = *Pointer(data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack + Float4 B = *Pointer(data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack + Float4 C = *Pointer(data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack A = RcpSqrt_pp(A + d * (B + d * C)); - r.o[Pts].y = r.o[Pts].y * Float4(*Pointer(r.data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack + o[Pts].y = o[Pts].y * Float4(*Pointer(data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack } } diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp index a6126dbd9..1999efe0f 100644 --- a/src/Shader/VertexProgram.cpp +++ b/src/Shader/VertexProgram.cpp @@ -20,7 +20,8 @@ namespace sw { - VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) : VertexRoutine(state, shader) + VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader) + : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries) { ifDepth = 0; loopRepDepth = 0; @@ -32,6 +33,24 @@ namespace sw { labelBlock[i] = 0; } + + loopDepth = -1; + enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + + if(shader && shader->containsBreakInstruction()) + { + enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + if(shader && shader->containsContinueInstruction()) + { + enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + } + + if(shader->instanceIdDeclared) + { + instanceID = *Pointer(data + OFFSET(DrawData,instanceID)); + } } VertexProgram::~VertexProgram() @@ -46,7 +65,7 @@ namespace sw { for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++) { - sampler[i] = new SamplerCore(r.constants, state.samplerState[i]); + sampler[i] = new SamplerCore(constants, state.samplerState[i]); } if(!state.preTransformed) @@ -65,12 +84,12 @@ namespace sw unsigned short version = shader->getVersion(); - r.enableIndex = 0; - r.stackIndex = 0; + enableIndex = 0; + stackIndex = 0; if(shader->containsLeaveInstruction()) { - r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); } // Create all call site return blocks up front @@ -342,79 +361,79 @@ namespace sw case Shader::PARAMETER_TEMP: if(dst.rel.type == Shader::PARAMETER_VOID) { - if(dst.x) pDst.x = r.r[dst.index].x; - if(dst.y) pDst.y = r.r[dst.index].y; - if(dst.z) pDst.z = r.r[dst.index].z; - if(dst.w) pDst.w = r.r[dst.index].w; + if(dst.x) pDst.x = r[dst.index].x; + if(dst.y) pDst.y = r[dst.index].y; + if(dst.z) pDst.z = r[dst.index].z; + if(dst.w) pDst.w = r[dst.index].w; } else { Int a = relativeAddress(dst); - if(dst.x) pDst.x = r.r[dst.index + a].x; - if(dst.y) pDst.y = r.r[dst.index + a].y; - if(dst.z) pDst.z = r.r[dst.index + a].z; - if(dst.w) pDst.w = r.r[dst.index + a].w; + if(dst.x) pDst.x = r[dst.index + a].x; + if(dst.y) pDst.y = r[dst.index + a].y; + if(dst.z) pDst.z = r[dst.index + a].z; + if(dst.w) pDst.w = r[dst.index + a].w; } break; - case Shader::PARAMETER_ADDR: pDst = r.a0; break; + case Shader::PARAMETER_ADDR: pDst = a0; break; case Shader::PARAMETER_RASTOUT: switch(dst.index) { case 0: - if(dst.x) pDst.x = r.o[Pos].x; - if(dst.y) pDst.y = r.o[Pos].y; - if(dst.z) pDst.z = r.o[Pos].z; - if(dst.w) pDst.w = r.o[Pos].w; + if(dst.x) pDst.x = o[Pos].x; + if(dst.y) pDst.y = o[Pos].y; + if(dst.z) pDst.z = o[Pos].z; + if(dst.w) pDst.w = o[Pos].w; break; case 1: - pDst.x = r.o[Fog].x; + pDst.x = o[Fog].x; break; case 2: - pDst.x = r.o[Pts].y; + pDst.x = o[Pts].y; break; default: ASSERT(false); } break; case Shader::PARAMETER_ATTROUT: - if(dst.x) pDst.x = r.o[D0 + dst.index].x; - if(dst.y) pDst.y = r.o[D0 + dst.index].y; - if(dst.z) pDst.z = r.o[D0 + dst.index].z; - if(dst.w) pDst.w = r.o[D0 + dst.index].w; + if(dst.x) pDst.x = o[D0 + dst.index].x; + if(dst.y) pDst.y = o[D0 + dst.index].y; + if(dst.z) pDst.z = o[D0 + dst.index].z; + if(dst.w) pDst.w = o[D0 + dst.index].w; break; case Shader::PARAMETER_TEXCRDOUT: // case Shader::PARAMETER_OUTPUT: if(version < 0x0300) { - if(dst.x) pDst.x = r.o[T0 + dst.index].x; - if(dst.y) pDst.y = r.o[T0 + dst.index].y; - if(dst.z) pDst.z = r.o[T0 + dst.index].z; - if(dst.w) pDst.w = r.o[T0 + dst.index].w; + if(dst.x) pDst.x = o[T0 + dst.index].x; + if(dst.y) pDst.y = o[T0 + dst.index].y; + if(dst.z) pDst.z = o[T0 + dst.index].z; + if(dst.w) pDst.w = o[T0 + dst.index].w; } else { if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative { - if(dst.x) pDst.x = r.o[dst.index].x; - if(dst.y) pDst.y = r.o[dst.index].y; - if(dst.z) pDst.z = r.o[dst.index].z; - if(dst.w) pDst.w = r.o[dst.index].w; + if(dst.x) pDst.x = o[dst.index].x; + if(dst.y) pDst.y = o[dst.index].y; + if(dst.z) pDst.z = o[dst.index].z; + if(dst.w) pDst.w = o[dst.index].w; } else { Int a = relativeAddress(dst); - if(dst.x) pDst.x = r.o[dst.index + a].x; - if(dst.y) pDst.y = r.o[dst.index + a].y; - if(dst.z) pDst.z = r.o[dst.index + a].z; - if(dst.w) pDst.w = r.o[dst.index + a].w; + if(dst.x) pDst.x = o[dst.index + a].x; + if(dst.y) pDst.y = o[dst.index + a].y; + if(dst.z) pDst.z = o[dst.index + a].z; + if(dst.w) pDst.w = o[dst.index + a].w; } } break; - case Shader::PARAMETER_LABEL: break; - case Shader::PARAMETER_PREDICATE: pDst = r.p0; break; - case Shader::PARAMETER_INPUT: break; + case Shader::PARAMETER_LABEL: break; + case Shader::PARAMETER_PREDICATE: pDst = p0; break; + case Shader::PARAMETER_INPUT: break; default: ASSERT(false); } @@ -430,10 +449,10 @@ namespace sw { unsigned char pSwizzle = instruction->predicateSwizzle; - Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03]; - Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03]; - Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03]; - Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03]; + Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; + Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; + Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; + Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; if(!instruction->predicateNot) { @@ -469,83 +488,83 @@ namespace sw case Shader::PARAMETER_TEMP: if(dst.rel.type == Shader::PARAMETER_VOID) { - if(dst.x) r.r[dst.index].x = d.x; - if(dst.y) r.r[dst.index].y = d.y; - if(dst.z) r.r[dst.index].z = d.z; - if(dst.w) r.r[dst.index].w = d.w; + if(dst.x) r[dst.index].x = d.x; + if(dst.y) r[dst.index].y = d.y; + if(dst.z) r[dst.index].z = d.z; + if(dst.w) r[dst.index].w = d.w; } else { Int a = relativeAddress(dst); - if(dst.x) r.r[dst.index + a].x = d.x; - if(dst.y) r.r[dst.index + a].y = d.y; - if(dst.z) r.r[dst.index + a].z = d.z; - if(dst.w) r.r[dst.index + a].w = d.w; + if(dst.x) r[dst.index + a].x = d.x; + if(dst.y) r[dst.index + a].y = d.y; + if(dst.z) r[dst.index + a].z = d.z; + if(dst.w) r[dst.index + a].w = d.w; } break; case Shader::PARAMETER_ADDR: - if(dst.x) r.a0.x = d.x; - if(dst.y) r.a0.y = d.y; - if(dst.z) r.a0.z = d.z; - if(dst.w) r.a0.w = d.w; + if(dst.x) a0.x = d.x; + if(dst.y) a0.y = d.y; + if(dst.z) a0.z = d.z; + if(dst.w) a0.w = d.w; break; case Shader::PARAMETER_RASTOUT: switch(dst.index) { case 0: - if(dst.x) r.o[Pos].x = d.x; - if(dst.y) r.o[Pos].y = d.y; - if(dst.z) r.o[Pos].z = d.z; - if(dst.w) r.o[Pos].w = d.w; + if(dst.x) o[Pos].x = d.x; + if(dst.y) o[Pos].y = d.y; + if(dst.z) o[Pos].z = d.z; + if(dst.w) o[Pos].w = d.w; break; case 1: - r.o[Fog].x = d.x; + o[Fog].x = d.x; break; case 2: - r.o[Pts].y = d.x; + o[Pts].y = d.x; break; default: ASSERT(false); } break; case Shader::PARAMETER_ATTROUT: - if(dst.x) r.o[D0 + dst.index].x = d.x; - if(dst.y) r.o[D0 + dst.index].y = d.y; - if(dst.z) r.o[D0 + dst.index].z = d.z; - if(dst.w) r.o[D0 + dst.index].w = d.w; + if(dst.x) o[D0 + dst.index].x = d.x; + if(dst.y) o[D0 + dst.index].y = d.y; + if(dst.z) o[D0 + dst.index].z = d.z; + if(dst.w) o[D0 + dst.index].w = d.w; break; case Shader::PARAMETER_TEXCRDOUT: // case Shader::PARAMETER_OUTPUT: if(version < 0x0300) { - if(dst.x) r.o[T0 + dst.index].x = d.x; - if(dst.y) r.o[T0 + dst.index].y = d.y; - if(dst.z) r.o[T0 + dst.index].z = d.z; - if(dst.w) r.o[T0 + dst.index].w = d.w; + if(dst.x) o[T0 + dst.index].x = d.x; + if(dst.y) o[T0 + dst.index].y = d.y; + if(dst.z) o[T0 + dst.index].z = d.z; + if(dst.w) o[T0 + dst.index].w = d.w; } else { if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative { - if(dst.x) r.o[dst.index].x = d.x; - if(dst.y) r.o[dst.index].y = d.y; - if(dst.z) r.o[dst.index].z = d.z; - if(dst.w) r.o[dst.index].w = d.w; + if(dst.x) o[dst.index].x = d.x; + if(dst.y) o[dst.index].y = d.y; + if(dst.z) o[dst.index].z = d.z; + if(dst.w) o[dst.index].w = d.w; } else { Int a = relativeAddress(dst); - if(dst.x) r.o[dst.index + a].x = d.x; - if(dst.y) r.o[dst.index + a].y = d.y; - if(dst.z) r.o[dst.index + a].z = d.z; - if(dst.w) r.o[dst.index + a].w = d.w; + if(dst.x) o[dst.index + a].x = d.x; + if(dst.y) o[dst.index + a].y = d.y; + if(dst.z) o[dst.index + a].z = d.z; + if(dst.w) o[dst.index + a].w = d.w; } } break; - case Shader::PARAMETER_LABEL: break; - case Shader::PARAMETER_PREDICATE: r.p0 = d; break; - case Shader::PARAMETER_INPUT: break; + case Shader::PARAMETER_LABEL: break; + case Shader::PARAMETER_PREDICATE: p0 = d; break; + case Shader::PARAMETER_INPUT: break; default: ASSERT(false); } @@ -571,28 +590,28 @@ namespace sw case 0xFF: continue; case Shader::USAGE_PSIZE: - r.o[i].y = r.v[i].x; + o[i].y = v[i].x; break; case Shader::USAGE_TEXCOORD: - r.o[i].x = r.v[i].x; - r.o[i].y = r.v[i].y; - r.o[i].z = r.v[i].z; - r.o[i].w = r.v[i].w; + o[i].x = v[i].x; + o[i].y = v[i].y; + o[i].z = v[i].z; + o[i].w = v[i].w; break; case Shader::USAGE_POSITION: - r.o[i].x = r.v[i].x; - r.o[i].y = r.v[i].y; - r.o[i].z = r.v[i].z; - r.o[i].w = r.v[i].w; + o[i].x = v[i].x; + o[i].y = v[i].y; + o[i].z = v[i].z; + o[i].w = v[i].w; break; case Shader::USAGE_COLOR: - r.o[i].x = r.v[i].x; - r.o[i].y = r.v[i].y; - r.o[i].z = r.v[i].z; - r.o[i].w = r.v[i].w; + o[i].x = v[i].x; + o[i].y = v[i].y; + o[i].z = v[i].z; + o[i].w = v[i].w; break; case Shader::USAGE_FOG: - r.o[i].x = r.v[i].x; + o[i].x = v[i].x; break; default: ASSERT(false); @@ -601,28 +620,28 @@ namespace sw } else { - r.o[Pos].x = r.v[PositionT].x; - r.o[Pos].y = r.v[PositionT].y; - r.o[Pos].z = r.v[PositionT].z; - r.o[Pos].w = r.v[PositionT].w; + o[Pos].x = v[PositionT].x; + o[Pos].y = v[PositionT].y; + o[Pos].z = v[PositionT].z; + o[Pos].w = v[PositionT].w; for(int i = 0; i < 2; i++) { - r.o[D0 + i].x = r.v[Color0 + i].x; - r.o[D0 + i].y = r.v[Color0 + i].y; - r.o[D0 + i].z = r.v[Color0 + i].z; - r.o[D0 + i].w = r.v[Color0 + i].w; + o[D0 + i].x = v[Color0 + i].x; + o[D0 + i].y = v[Color0 + i].y; + o[D0 + i].z = v[Color0 + i].z; + o[D0 + i].w = v[Color0 + i].w; } for(int i = 0; i < 8; i++) { - r.o[T0 + i].x = r.v[TexCoord0 + i].x; - r.o[T0 + i].y = r.v[TexCoord0 + i].y; - r.o[T0 + i].z = r.v[TexCoord0 + i].z; - r.o[T0 + i].w = r.v[TexCoord0 + i].w; + o[T0 + i].x = v[TexCoord0 + i].x; + o[T0 + i].y = v[TexCoord0 + i].y; + o[T0 + i].z = v[TexCoord0 + i].z; + o[T0 + i].w = v[TexCoord0 + i].w; } - r.o[Pts].y = r.v[PointSize].x; + o[Pts].y = v[PointSize].x; } } @@ -636,11 +655,11 @@ namespace sw case Shader::PARAMETER_TEMP: if(src.rel.type == Shader::PARAMETER_VOID) { - reg = r.r[i]; + reg = r[i]; } else { - reg = r.r[i + relativeAddress(src)]; + reg = r[i + relativeAddress(src)]; } break; case Shader::PARAMETER_CONST: @@ -649,25 +668,25 @@ namespace sw case Shader::PARAMETER_INPUT: if(src.rel.type == Shader::PARAMETER_VOID) { - reg = r.v[i]; + reg = v[i]; } else { - reg = r.v[i + relativeAddress(src)]; + reg = v[i + relativeAddress(src)]; } break; - case Shader::PARAMETER_VOID: return r.r[0]; // Dummy + case Shader::PARAMETER_VOID: return r[0]; // Dummy case Shader::PARAMETER_FLOAT4LITERAL: reg.x = Float4(src.value[0]); reg.y = Float4(src.value[1]); reg.z = Float4(src.value[2]); reg.w = Float4(src.value[3]); break; - case Shader::PARAMETER_ADDR: reg = r.a0; break; - case Shader::PARAMETER_CONSTBOOL: return r.r[0]; // Dummy - case Shader::PARAMETER_CONSTINT: return r.r[0]; // Dummy - case Shader::PARAMETER_LOOP: return r.r[0]; // Dummy - case Shader::PARAMETER_PREDICATE: return r.r[0]; // Dummy + case Shader::PARAMETER_ADDR: reg = a0; break; + case Shader::PARAMETER_CONSTBOOL: return r[0]; // Dummy + case Shader::PARAMETER_CONSTINT: return r[0]; // Dummy + case Shader::PARAMETER_LOOP: return r[0]; // Dummy + case Shader::PARAMETER_PREDICATE: return r[0]; // Dummy case Shader::PARAMETER_SAMPLER: if(src.rel.type == Shader::PARAMETER_VOID) { @@ -675,21 +694,21 @@ namespace sw } else if(src.rel.type == Shader::PARAMETER_TEMP) { - reg.x = As(Int4(i) + As(r.r[src.rel.index].x)); + reg.x = As(Int4(i) + As(r[src.rel.index].x)); } return reg; case Shader::PARAMETER_OUTPUT: if(src.rel.type == Shader::PARAMETER_VOID) { - reg = r.o[i]; + reg = o[i]; } else { - reg = r.o[i + relativeAddress(src)]; + reg = o[i + relativeAddress(src)]; } break; case Shader::PARAMETER_MISCTYPE: - reg.x = As(Int(r.instanceID)); + reg.x = As(Int(instanceID)); return reg; default: ASSERT(false); @@ -748,7 +767,7 @@ namespace sw if(src.rel.type == Shader::PARAMETER_VOID) // Not relative { - c.x = c.y = c.z = c.w = *Pointer(r.data + OFFSET(DrawData,vs.c[i])); + c.x = c.y = c.z = c.w = *Pointer(data + OFFSET(DrawData,vs.c[i])); c.x = c.x.xxxx; c.y = c.y.yyyy; @@ -778,9 +797,9 @@ namespace sw } else if(src.rel.type == Shader::PARAMETER_LOOP) { - Int loopCounter = r.aL[r.loopDepth]; + Int loopCounter = aL[loopDepth]; - c.x = c.y = c.z = c.w = *Pointer(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16); + c.x = c.y = c.z = c.w = *Pointer(data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16); c.x = c.x.xxxx; c.y = c.y.yyyy; @@ -793,7 +812,7 @@ namespace sw { Int a = relativeAddress(src); - c.x = c.y = c.z = c.w = *Pointer(r.data + OFFSET(DrawData,vs.c[i]) + a * 16); + c.x = c.y = c.z = c.w = *Pointer(data + OFFSET(DrawData,vs.c[i]) + a * 16); c.x = c.x.xxxx; c.y = c.y.yyyy; @@ -807,11 +826,11 @@ namespace sw switch(src.rel.type) { - case Shader::PARAMETER_ADDR: a = r.a0[component]; break; - case Shader::PARAMETER_TEMP: a = r.r[src.rel.index][component]; break; - case Shader::PARAMETER_INPUT: a = r.v[src.rel.index][component]; break; - case Shader::PARAMETER_OUTPUT: a = r.o[src.rel.index][component]; break; - case Shader::PARAMETER_CONST: a = *Pointer(r.data + OFFSET(DrawData,vs.c[src.rel.index][component])); break; + case Shader::PARAMETER_ADDR: a = a0[component]; break; + case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break; + case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break; + case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break; + case Shader::PARAMETER_CONST: a = *Pointer(data + OFFSET(DrawData,vs.c[src.rel.index][component])); break; default: ASSERT(false); } @@ -824,10 +843,10 @@ namespace sw Int index2 = Extract(index, 2); Int index3 = Extract(index, 3); - c.x = *Pointer(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16); - c.y = *Pointer(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16); - c.z = *Pointer(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16); - c.w = *Pointer(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16); + c.x = *Pointer(data + OFFSET(DrawData,vs.c) + index0 * 16, 16); + c.y = *Pointer(data + OFFSET(DrawData,vs.c) + index1 * 16, 16); + c.z = *Pointer(data + OFFSET(DrawData,vs.c) + index2 * 16, 16); + c.w = *Pointer(data + OFFSET(DrawData,vs.c) + index3 * 16, 16); transpose4x4(c.x, c.y, c.z, c.w); } @@ -842,25 +861,25 @@ namespace sw if(var.rel.type == Shader::PARAMETER_TEMP) { - return As(Extract(r.r[var.rel.index].x, 0)) * var.rel.scale; + return As(Extract(r[var.rel.index].x, 0)) * var.rel.scale; } else if(var.rel.type == Shader::PARAMETER_INPUT) { - return As(Extract(r.v[var.rel.index].x, 0)) * var.rel.scale; + return As(Extract(v[var.rel.index].x, 0)) * var.rel.scale; } else if(var.rel.type == Shader::PARAMETER_OUTPUT) { - return As(Extract(r.o[var.rel.index].x, 0)) * var.rel.scale; + return As(Extract(o[var.rel.index].x, 0)) * var.rel.scale; } else if(var.rel.type == Shader::PARAMETER_CONST) { - RValue c = *Pointer(r.data + OFFSET(DrawData, vs.c[var.rel.index])); + RValue c = *Pointer(data + OFFSET(DrawData, vs.c[var.rel.index])); return Extract(c, 0) * var.rel.scale; } else if(var.rel.type == Shader::PARAMETER_LOOP) { - return r.aL[r.loopDepth]; + return aL[loopDepth]; } else ASSERT(false); @@ -869,23 +888,23 @@ namespace sw Int4 VertexProgram::enableMask(const Shader::Instruction *instruction) { - Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF); + Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF); if(!whileTest) { if(shader->containsBreakInstruction() && instruction->analysisBreak) { - enable &= r.enableBreak; + enable &= enableBreak; } if(shader->containsContinueInstruction() && instruction->analysisContinue) { - enable &= r.enableContinue; + enable &= enableContinue; } if(shader->containsLeaveInstruction() && instruction->analysisLeave) { - enable &= r.enableLeave; + enable &= enableLeave; } } @@ -956,20 +975,20 @@ namespace sw if(breakDepth == 0) { - r.enableIndex = r.enableIndex - breakDepth; + enableIndex = enableIndex - breakDepth; Nucleus::createBr(endBlock); } else { - r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex]; - Bool allBreak = SignMask(r.enableBreak) == 0x0; + enableBreak = enableBreak & ~enableStack[enableIndex]; + Bool allBreak = SignMask(enableBreak) == 0x0; - r.enableIndex = r.enableIndex - breakDepth; + enableIndex = enableIndex - breakDepth; branch(allBreak, endBlock, deadBlock); } Nucleus::setInsertBlock(deadBlock); - r.enableIndex = r.enableIndex + breakDepth; + enableIndex = enableIndex + breakDepth; } void VertexProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) @@ -993,7 +1012,7 @@ namespace sw void VertexProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC { - Int4 condition = As(r.p0[predicateRegister.swizzle & 0x3]); + Int4 condition = As(p0[predicateRegister.swizzle & 0x3]); if(predicateRegister.modifier == Shader::MODIFIER_NOT) { @@ -1005,24 +1024,24 @@ namespace sw void VertexProgram::BREAK(Int4 &condition) { - condition &= r.enableStack[r.enableIndex]; + condition &= enableStack[enableIndex]; llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock(); llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; - r.enableBreak = r.enableBreak & ~condition; - Bool allBreak = SignMask(r.enableBreak) == 0x0; + enableBreak = enableBreak & ~condition; + Bool allBreak = SignMask(enableBreak) == 0x0; - r.enableIndex = r.enableIndex - breakDepth; + enableIndex = enableIndex - breakDepth; branch(allBreak, endBlock, continueBlock); Nucleus::setInsertBlock(continueBlock); - r.enableIndex = r.enableIndex + breakDepth; + enableIndex = enableIndex + breakDepth; } void VertexProgram::CONTINUE() { - r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex]; + enableContinue = enableContinue & ~enableStack[enableIndex]; } void VertexProgram::TEST() @@ -1039,15 +1058,15 @@ namespace sw if(callRetBlock[labelIndex].size() > 1) { - r.callStack[r.stackIndex++] = UInt(callSiteIndex); + callStack[stackIndex++] = UInt(callSiteIndex); } - Int4 restoreLeave = r.enableLeave; + Int4 restoreLeave = enableLeave; Nucleus::createBr(labelBlock[labelIndex]); Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); - r.enableLeave = restoreLeave; + enableLeave = restoreLeave; } void VertexProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) @@ -1065,7 +1084,7 @@ namespace sw void VertexProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) { - Bool condition = (*Pointer(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME + Bool condition = (*Pointer(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME if(boolRegister.modifier == Shader::MODIFIER_NOT) { @@ -1079,27 +1098,27 @@ namespace sw if(callRetBlock[labelIndex].size() > 1) { - r.callStack[r.stackIndex++] = UInt(callSiteIndex); + callStack[stackIndex++] = UInt(callSiteIndex); } - Int4 restoreLeave = r.enableLeave; + Int4 restoreLeave = enableLeave; branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); - r.enableLeave = restoreLeave; + enableLeave = restoreLeave; } void VertexProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) { - Int4 condition = As(r.p0[predicateRegister.swizzle & 0x3]); + Int4 condition = As(p0[predicateRegister.swizzle & 0x3]); if(predicateRegister.modifier == Shader::MODIFIER_NOT) { condition = ~condition; } - condition &= r.enableStack[r.enableIndex]; + condition &= enableStack[enableIndex]; if(!labelBlock[labelIndex]) { @@ -1108,19 +1127,19 @@ namespace sw if(callRetBlock[labelIndex].size() > 1) { - r.callStack[r.stackIndex++] = UInt(callSiteIndex); + callStack[stackIndex++] = UInt(callSiteIndex); } - r.enableIndex++; - r.enableStack[r.enableIndex] = condition; - Int4 restoreLeave = r.enableLeave; + enableIndex++; + enableStack[enableIndex] = condition; + Int4 restoreLeave = enableLeave; Bool notAllFalse = SignMask(condition) != 0; branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); - r.enableIndex--; - r.enableLeave = restoreLeave; + enableIndex--; + enableLeave = restoreLeave; } void VertexProgram::ELSE() @@ -1132,12 +1151,12 @@ namespace sw if(isConditionalIf[ifDepth]) { - Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1]; + Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; Bool notAllFalse = SignMask(condition) != 0; branch(notAllFalse, falseBlock, endBlock); - r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1]; + enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; } else { @@ -1162,7 +1181,7 @@ namespace sw if(isConditionalIf[ifDepth]) { breakDepth--; - r.enableIndex--; + enableIndex--; } } @@ -1170,7 +1189,7 @@ namespace sw { loopRepDepth--; - r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth]; // FIXME: += + aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; @@ -1178,8 +1197,8 @@ namespace sw Nucleus::createBr(testBlock); Nucleus::setInsertBlock(endBlock); - r.loopDepth--; - r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + loopDepth--; + enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); } void VertexProgram::ENDREP() @@ -1192,8 +1211,8 @@ namespace sw Nucleus::createBr(testBlock); Nucleus::setInsertBlock(endBlock); - r.loopDepth--; - r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + loopDepth--; + enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); } void VertexProgram::ENDWHILE() @@ -1206,8 +1225,8 @@ namespace sw Nucleus::createBr(testBlock); Nucleus::setInsertBlock(endBlock); - r.enableIndex--; - r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + enableIndex--; + enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); whileTest = false; } @@ -1232,7 +1251,7 @@ namespace sw { ASSERT(ifDepth < 24 + 4); - Bool condition = (*Pointer(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME + Bool condition = (*Pointer(data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME if(boolRegister.modifier == Shader::MODIFIER_NOT) { @@ -1252,7 +1271,7 @@ namespace sw void VertexProgram::IFp(const Src &predicateRegister) { - Int4 condition = As(r.p0[predicateRegister.swizzle & 0x3]); + Int4 condition = As(p0[predicateRegister.swizzle & 0x3]); if(predicateRegister.modifier == Shader::MODIFIER_NOT) { @@ -1283,10 +1302,10 @@ namespace sw void VertexProgram::IF(Int4 &condition) { - condition &= r.enableStack[r.enableIndex]; + condition &= enableStack[enableIndex]; - r.enableIndex++; - r.enableStack[r.enableIndex] = condition; + enableIndex++; + enableStack[enableIndex] = condition; llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock(); llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock(); @@ -1315,16 +1334,16 @@ namespace sw void VertexProgram::LOOP(const Src &integerRegister) { - r.loopDepth++; + loopDepth++; - r.iteration[r.loopDepth] = *Pointer(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0])); - r.aL[r.loopDepth] = *Pointer(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1])); - r.increment[r.loopDepth] = *Pointer(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2])); + iteration[loopDepth] = *Pointer(data + OFFSET(DrawData,vs.i[integerRegister.index][0])); + aL[loopDepth] = *Pointer(data + OFFSET(DrawData,vs.i[integerRegister.index][1])); + increment[loopDepth] = *Pointer(data + OFFSET(DrawData,vs.i[integerRegister.index][2])); // FIXME: Compiles to two instructions? - If(r.increment[r.loopDepth] == 0) + If(increment[loopDepth] == 0) { - r.increment[r.loopDepth] = 1; + increment[loopDepth] = 1; } llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); @@ -1338,10 +1357,10 @@ namespace sw Nucleus::createBr(testBlock); Nucleus::setInsertBlock(testBlock); - branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock); + branch(iteration[loopDepth] > 0, loopBlock, endBlock); Nucleus::setInsertBlock(loopBlock); - r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: -- + iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- loopRepDepth++; breakDepth = 0; @@ -1349,10 +1368,10 @@ namespace sw void VertexProgram::REP(const Src &integerRegister) { - r.loopDepth++; + loopDepth++; - r.iteration[r.loopDepth] = *Pointer(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0])); - r.aL[r.loopDepth] = r.aL[r.loopDepth - 1]; + iteration[loopDepth] = *Pointer(data + OFFSET(DrawData,vs.i[integerRegister.index][0])); + aL[loopDepth] = aL[loopDepth - 1]; llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); @@ -1365,10 +1384,10 @@ namespace sw Nucleus::createBr(testBlock); Nucleus::setInsertBlock(testBlock); - branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock); + branch(iteration[loopDepth] > 0, loopBlock, endBlock); Nucleus::setInsertBlock(loopBlock); - r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: -- + iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- loopRepDepth++; breakDepth = 0; @@ -1376,7 +1395,7 @@ namespace sw void VertexProgram::WHILE(const Src &temporaryRegister) { - r.enableIndex++; + enableIndex++; llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock(); llvm::BasicBlock *testBlock = Nucleus::createBasicBlock(); @@ -1385,24 +1404,24 @@ namespace sw loopRepTestBlock[loopRepDepth] = testBlock; loopRepEndBlock[loopRepDepth] = endBlock; - Int4 restoreBreak = r.enableBreak; - Int4 restoreContinue = r.enableContinue; + Int4 restoreBreak = enableBreak; + Int4 restoreContinue = enableContinue; // FIXME: jump(testBlock) Nucleus::createBr(testBlock); Nucleus::setInsertBlock(testBlock); - r.enableContinue = restoreContinue; + enableContinue = restoreContinue; const Vector4f &src = fetchRegisterF(temporaryRegister); Int4 condition = As(src.x); - condition &= r.enableStack[r.enableIndex - 1]; - r.enableStack[r.enableIndex] = condition; + condition &= enableStack[enableIndex - 1]; + enableStack[enableIndex] = condition; Bool notAllFalse = SignMask(condition) != 0; branch(notAllFalse, loopBlock, endBlock); Nucleus::setInsertBlock(endBlock); - r.enableBreak = restoreBreak; + enableBreak = restoreBreak; Nucleus::setInsertBlock(loopBlock); @@ -1424,7 +1443,7 @@ namespace sw if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack { // FIXME: Encapsulate - UInt index = r.callStack[--r.stackIndex]; + UInt index = callStack[--stackIndex]; llvm::Value *value = index.loadValue(); llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); @@ -1450,7 +1469,7 @@ namespace sw void VertexProgram::LEAVE() { - r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex]; + enableLeave = enableLeave & ~enableStack[enableIndex]; // FIXME: Return from function if all instances left // FIXME: Use enableLeave in other control-flow constructs @@ -1511,7 +1530,7 @@ namespace sw void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) { - Pointer textureMipmap = r.data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap); + Pointer textureMipmap = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap); for(int i = 0; i < 4; ++i) { Pointer mipmap = textureMipmap + (As(Extract(lod, i)) + Int(1)) * sizeof(Mipmap); @@ -1525,8 +1544,8 @@ namespace sw { if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID) { - Pointer texture = r.data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture); - sampler[s.index]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true); + Pointer texture = data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture); + sampler[s.index]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true); } else { @@ -1538,8 +1557,8 @@ namespace sw { If(index == i) { - Pointer texture = r.data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture); - sampler[i]->sampleTexture(texture, c, u, v, w, q, r.a0, r.a0, false, false, true); + Pointer texture = data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture); + sampler[i]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true); // FIXME: When the sampler states are the same, we could use one sampler and just index the texture } } diff --git a/src/Shader/VertexProgram.hpp b/src/Shader/VertexProgram.hpp index 445801ac7..8cacf31b1 100644 --- a/src/Shader/VertexProgram.hpp +++ b/src/Shader/VertexProgram.hpp @@ -32,6 +32,28 @@ namespace sw virtual ~VertexProgram(); private: + const VertexShader *const shader; + + RegisterArray<4096> r; // Temporary registers + Vector4f a0; + Array aL; + Vector4f p0; + + Array increment; + Array iteration; + + Int loopDepth; + Int stackIndex; // FIXME: Inc/decrement callStack + Array callStack; + + Int enableIndex; + Array enableStack; + Int4 enableBreak; + Int4 enableContinue; + Int4 enableLeave; + + Int instanceID; + typedef Shader::DestinationParameter Dst; typedef Shader::SourceParameter Src; typedef Shader::Control Control; diff --git a/src/Shader/VertexRoutine.cpp b/src/Shader/VertexRoutine.cpp index 87d1fa00f..96f48e609 100644 --- a/src/Shader/VertexRoutine.cpp +++ b/src/Shader/VertexRoutine.cpp @@ -23,7 +23,10 @@ namespace sw extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] - VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : r(shader), state(state), shader(shader) + VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) + : v(shader && shader->dynamicallyIndexedInput), + o(shader && shader->dynamicallyIndexedOutput), + state(state) { } @@ -41,12 +44,7 @@ namespace sw UInt vertexCount = *Pointer(task + OFFSET(VertexTask,vertexCount)); - r.data = data; - r.constants = *Pointer>(data + OFFSET(DrawData,constants)); - if(shader && shader->instanceIdDeclared) - { - r.instanceID = *Pointer(data + OFFSET(DrawData, instanceID)); - } + constants = *Pointer>(data + OFFSET(DrawData,constants)); Do { @@ -84,10 +82,10 @@ namespace sw { for(int i = 0; i < VERTEX_ATTRIBUTES; i++) { - Pointer input = *Pointer>(r.data + OFFSET(DrawData,input) + sizeof(void*) * i); - UInt stride = *Pointer(r.data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i); + Pointer input = *Pointer>(data + OFFSET(DrawData,input) + sizeof(void*) * i); + UInt stride = *Pointer(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i); - r.v[i] = readStream(input, stride, state.input[i], index); + v[i] = readStream(input, stride, state.input[i], index); } } @@ -95,39 +93,39 @@ namespace sw { int pos = state.positionRegister; - Int4 maxX = CmpLT(r.o[pos].w, r.o[pos].x); - Int4 maxY = CmpLT(r.o[pos].w, r.o[pos].y); - Int4 maxZ = CmpLT(r.o[pos].w, r.o[pos].z); + Int4 maxX = CmpLT(o[pos].w, o[pos].x); + Int4 maxY = CmpLT(o[pos].w, o[pos].y); + Int4 maxZ = CmpLT(o[pos].w, o[pos].z); - Int4 minX = CmpNLE(-r.o[pos].w, r.o[pos].x); - Int4 minY = CmpNLE(-r.o[pos].w, r.o[pos].y); - Int4 minZ = CmpNLE(Float4(0.0f), r.o[pos].z); + Int4 minX = CmpNLE(-o[pos].w, o[pos].x); + Int4 minY = CmpNLE(-o[pos].w, o[pos].y); + Int4 minZ = CmpNLE(Float4(0.0f), o[pos].z); Int flags; flags = SignMask(maxX); - r.clipFlags = *Pointer(r.constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing + clipFlags = *Pointer(constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing flags = SignMask(maxY); - r.clipFlags |= *Pointer(r.constants + OFFSET(Constants,maxY) + flags * 4); + clipFlags |= *Pointer(constants + OFFSET(Constants,maxY) + flags * 4); flags = SignMask(maxZ); - r.clipFlags |= *Pointer(r.constants + OFFSET(Constants,maxZ) + flags * 4); + clipFlags |= *Pointer(constants + OFFSET(Constants,maxZ) + flags * 4); flags = SignMask(minX); - r.clipFlags |= *Pointer(r.constants + OFFSET(Constants,minX) + flags * 4); + clipFlags |= *Pointer(constants + OFFSET(Constants,minX) + flags * 4); flags = SignMask(minY); - r.clipFlags |= *Pointer(r.constants + OFFSET(Constants,minY) + flags * 4); + clipFlags |= *Pointer(constants + OFFSET(Constants,minY) + flags * 4); flags = SignMask(minZ); - r.clipFlags |= *Pointer(r.constants + OFFSET(Constants,minZ) + flags * 4); + clipFlags |= *Pointer(constants + OFFSET(Constants,minZ) + flags * 4); - Int4 finiteX = CmpLE(Abs(r.o[pos].x), *Pointer(r.constants + OFFSET(Constants,maxPos))); - Int4 finiteY = CmpLE(Abs(r.o[pos].y), *Pointer(r.constants + OFFSET(Constants,maxPos))); - Int4 finiteZ = CmpLE(Abs(r.o[pos].z), *Pointer(r.constants + OFFSET(Constants,maxPos))); + Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer(constants + OFFSET(Constants,maxPos))); + Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer(constants + OFFSET(Constants,maxPos))); + Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer(constants + OFFSET(Constants,maxPos))); flags = SignMask(finiteX & finiteY & finiteZ); - r.clipFlags |= *Pointer(r.constants + OFFSET(Constants,fini) + flags * 4); + clipFlags |= *Pointer(constants + OFFSET(Constants,fini) + flags * 4); if(state.preTransformed) { - r.clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane + clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane } } @@ -179,10 +177,10 @@ namespace sw if(stream.normalized) { - if(stream.count >= 1) v.x *= *Pointer(r.constants + OFFSET(Constants,unscaleByte)); - if(stream.count >= 2) v.y *= *Pointer(r.constants + OFFSET(Constants,unscaleByte)); - if(stream.count >= 3) v.z *= *Pointer(r.constants + OFFSET(Constants,unscaleByte)); - if(stream.count >= 4) v.w *= *Pointer(r.constants + OFFSET(Constants,unscaleByte)); + if(stream.count >= 1) v.x *= *Pointer(constants + OFFSET(Constants,unscaleByte)); + if(stream.count >= 2) v.y *= *Pointer(constants + OFFSET(Constants,unscaleByte)); + if(stream.count >= 3) v.z *= *Pointer(constants + OFFSET(Constants,unscaleByte)); + if(stream.count >= 4) v.w *= *Pointer(constants + OFFSET(Constants,unscaleByte)); } } break; @@ -197,19 +195,19 @@ namespace sw if(stream.normalized) { - if(stream.count >= 1) v.x *= *Pointer(r.constants + OFFSET(Constants,unscaleSByte)); - if(stream.count >= 2) v.y *= *Pointer(r.constants + OFFSET(Constants,unscaleSByte)); - if(stream.count >= 3) v.z *= *Pointer(r.constants + OFFSET(Constants,unscaleSByte)); - if(stream.count >= 4) v.w *= *Pointer(r.constants + OFFSET(Constants,unscaleSByte)); + if(stream.count >= 1) v.x *= *Pointer(constants + OFFSET(Constants,unscaleSByte)); + if(stream.count >= 2) v.y *= *Pointer(constants + OFFSET(Constants,unscaleSByte)); + if(stream.count >= 3) v.z *= *Pointer(constants + OFFSET(Constants,unscaleSByte)); + if(stream.count >= 4) v.w *= *Pointer(constants + OFFSET(Constants,unscaleSByte)); } } break; case STREAMTYPE_COLOR: { - v.x = Float4(*Pointer(source0)) * *Pointer(r.constants + OFFSET(Constants,unscaleByte)); - v.y = Float4(*Pointer(source1)) * *Pointer(r.constants + OFFSET(Constants,unscaleByte)); - v.z = Float4(*Pointer(source2)) * *Pointer(r.constants + OFFSET(Constants,unscaleByte)); - v.w = Float4(*Pointer(source3)) * *Pointer(r.constants + OFFSET(Constants,unscaleByte)); + v.x = Float4(*Pointer(source0)) * *Pointer(constants + OFFSET(Constants,unscaleByte)); + v.y = Float4(*Pointer(source1)) * *Pointer(constants + OFFSET(Constants,unscaleByte)); + v.z = Float4(*Pointer(source2)) * *Pointer(constants + OFFSET(Constants,unscaleByte)); + v.w = Float4(*Pointer(source3)) * *Pointer(constants + OFFSET(Constants,unscaleByte)); transpose4x4(v.x, v.y, v.z, v.w); @@ -230,10 +228,10 @@ namespace sw if(stream.normalized) { - if(stream.count >= 1) v.x *= *Pointer(r.constants + OFFSET(Constants,unscaleShort)); - if(stream.count >= 2) v.y *= *Pointer(r.constants + OFFSET(Constants,unscaleShort)); - if(stream.count >= 3) v.z *= *Pointer(r.constants + OFFSET(Constants,unscaleShort)); - if(stream.count >= 4) v.w *= *Pointer(r.constants + OFFSET(Constants,unscaleShort)); + if(stream.count >= 1) v.x *= *Pointer(constants + OFFSET(Constants,unscaleShort)); + if(stream.count >= 2) v.y *= *Pointer(constants + OFFSET(Constants,unscaleShort)); + if(stream.count >= 3) v.z *= *Pointer(constants + OFFSET(Constants,unscaleShort)); + if(stream.count >= 4) v.w *= *Pointer(constants + OFFSET(Constants,unscaleShort)); } } break; @@ -248,10 +246,10 @@ namespace sw if(stream.normalized) { - if(stream.count >= 1) v.x *= *Pointer(r.constants + OFFSET(Constants,unscaleUShort)); - if(stream.count >= 2) v.y *= *Pointer(r.constants + OFFSET(Constants,unscaleUShort)); - if(stream.count >= 3) v.z *= *Pointer(r.constants + OFFSET(Constants,unscaleUShort)); - if(stream.count >= 4) v.w *= *Pointer(r.constants + OFFSET(Constants,unscaleUShort)); + if(stream.count >= 1) v.x *= *Pointer(constants + OFFSET(Constants,unscaleUShort)); + if(stream.count >= 2) v.y *= *Pointer(constants + OFFSET(Constants,unscaleUShort)); + if(stream.count >= 3) v.z *= *Pointer(constants + OFFSET(Constants,unscaleUShort)); + if(stream.count >= 4) v.w *= *Pointer(constants + OFFSET(Constants,unscaleUShort)); } } break; @@ -356,10 +354,10 @@ namespace sw break; case STREAMTYPE_FIXED: { - v.x = Float4(*Pointer(source0)) * *Pointer(r.constants + OFFSET(Constants,unscaleFixed)); - v.y = Float4(*Pointer(source1)) * *Pointer(r.constants + OFFSET(Constants,unscaleFixed)); - v.z = Float4(*Pointer(source2)) * *Pointer(r.constants + OFFSET(Constants,unscaleFixed)); - v.w = Float4(*Pointer(source3)) * *Pointer(r.constants + OFFSET(Constants,unscaleFixed)); + v.x = Float4(*Pointer(source0)) * *Pointer(constants + OFFSET(Constants,unscaleFixed)); + v.y = Float4(*Pointer(source1)) * *Pointer(constants + OFFSET(Constants,unscaleFixed)); + v.z = Float4(*Pointer(source2)) * *Pointer(constants + OFFSET(Constants,unscaleFixed)); + v.w = Float4(*Pointer(source3)) * *Pointer(constants + OFFSET(Constants,unscaleFixed)); transpose4xN(v.x, v.y, v.z, v.w, stream.count); } @@ -373,10 +371,10 @@ namespace sw UShort x2 = *Pointer(source2 + 0); UShort x3 = *Pointer(source3 + 0); - v.x.x = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(x0) * 4); - v.x.y = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(x1) * 4); - v.x.z = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(x2) * 4); - v.x.w = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(x3) * 4); + v.x.x = *Pointer(constants + OFFSET(Constants,half2float) + Int(x0) * 4); + v.x.y = *Pointer(constants + OFFSET(Constants,half2float) + Int(x1) * 4); + v.x.z = *Pointer(constants + OFFSET(Constants,half2float) + Int(x2) * 4); + v.x.w = *Pointer(constants + OFFSET(Constants,half2float) + Int(x3) * 4); } if(stream.count >= 2) @@ -386,10 +384,10 @@ namespace sw UShort y2 = *Pointer(source2 + 2); UShort y3 = *Pointer(source3 + 2); - v.y.x = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(y0) * 4); - v.y.y = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(y1) * 4); - v.y.z = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(y2) * 4); - v.y.w = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(y3) * 4); + v.y.x = *Pointer(constants + OFFSET(Constants,half2float) + Int(y0) * 4); + v.y.y = *Pointer(constants + OFFSET(Constants,half2float) + Int(y1) * 4); + v.y.z = *Pointer(constants + OFFSET(Constants,half2float) + Int(y2) * 4); + v.y.w = *Pointer(constants + OFFSET(Constants,half2float) + Int(y3) * 4); } if(stream.count >= 3) @@ -399,10 +397,10 @@ namespace sw UShort z2 = *Pointer(source2 + 4); UShort z3 = *Pointer(source3 + 4); - v.z.x = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(z0) * 4); - v.z.y = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(z1) * 4); - v.z.z = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(z2) * 4); - v.z.w = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(z3) * 4); + v.z.x = *Pointer(constants + OFFSET(Constants,half2float) + Int(z0) * 4); + v.z.y = *Pointer(constants + OFFSET(Constants,half2float) + Int(z1) * 4); + v.z.z = *Pointer(constants + OFFSET(Constants,half2float) + Int(z2) * 4); + v.z.w = *Pointer(constants + OFFSET(Constants,half2float) + Int(z3) * 4); } if(stream.count >= 4) @@ -412,10 +410,10 @@ namespace sw UShort w2 = *Pointer(source2 + 6); UShort w3 = *Pointer(source3 + 6); - v.w.x = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(w0) * 4); - v.w.y = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(w1) * 4); - v.w.z = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(w2) * 4); - v.w.w = *Pointer(r.constants + OFFSET(Constants,half2float) + Int(w3) * 4); + v.w.x = *Pointer(constants + OFFSET(Constants,half2float) + Int(w0) * 4); + v.w.y = *Pointer(constants + OFFSET(Constants,half2float) + Int(w1) * 4); + v.w.z = *Pointer(constants + OFFSET(Constants,half2float) + Int(w2) * 4); + v.w.w = *Pointer(constants + OFFSET(Constants,half2float) + Int(w3) * 4); } } break; @@ -446,34 +444,34 @@ namespace sw // Backtransform if(state.preTransformed) { - Float4 rhw = Float4(1.0f) / r.o[pos].w; + Float4 rhw = Float4(1.0f) / o[pos].w; - Float4 W = *Pointer(r.data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f); - Float4 H = *Pointer(r.data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f); - Float4 L = *Pointer(r.data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f); - Float4 T = *Pointer(r.data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f); + Float4 W = *Pointer(data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f); + Float4 H = *Pointer(data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f); + Float4 L = *Pointer(data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f); + Float4 T = *Pointer(data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f); - r.o[pos].x = (r.o[pos].x - L) / W * rhw; - r.o[pos].y = (r.o[pos].y - T) / H * rhw; - r.o[pos].z = r.o[pos].z * rhw; - r.o[pos].w = rhw; + o[pos].x = (o[pos].x - L) / W * rhw; + o[pos].y = (o[pos].y - T) / H * rhw; + o[pos].z = o[pos].z * rhw; + o[pos].w = rhw; } if(!halfIntegerCoordinates && !state.preTransformed) { - r.o[pos].x = r.o[pos].x + *Pointer(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w; - r.o[pos].y = r.o[pos].y + *Pointer(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w; + o[pos].x = o[pos].x + *Pointer(data + OFFSET(DrawData,halfPixelX)) * o[pos].w; + o[pos].y = o[pos].y + *Pointer(data + OFFSET(DrawData,halfPixelY)) * o[pos].w; } if(state.superSampling) { - r.o[pos].x = r.o[pos].x + *Pointer(r.data + OFFSET(DrawData,XXXX)) * r.o[pos].w; - r.o[pos].y = r.o[pos].y + *Pointer(r.data + OFFSET(DrawData,YYYY)) * r.o[pos].w; + o[pos].x = o[pos].x + *Pointer(data + OFFSET(DrawData,XXXX)) * o[pos].w; + o[pos].y = o[pos].y + *Pointer(data + OFFSET(DrawData,YYYY)) * o[pos].w; } if(symmetricNormalizedDepth && !state.fixedFunction) { - r.o[pos].z = (r.o[pos].z + r.o[pos].w) * Float4(0.5f); + o[pos].z = (o[pos].z + o[pos].w) * Float4(0.5f); } } @@ -485,10 +483,10 @@ namespace sw { if(state.output[i].write) { - v.x = r.o[i].x; - v.y = r.o[i].y; - v.z = r.o[i].z; - v.w = r.o[i].w; + v.x = o[i].x; + v.y = o[i].y; + v.z = o[i].z; + v.w = o[i].w; if(state.output[i].xClamp) { @@ -540,23 +538,23 @@ namespace sw } } - *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (r.clipFlags >> 0) & 0x0000000FF; // FIXME: unsigned char Vertex::clipFlags - *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (r.clipFlags >> 8) & 0x0000000FF; - *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (r.clipFlags >> 16) & 0x0000000FF; - *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (r.clipFlags >> 24) & 0x0000000FF; + *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF; // FIXME: unsigned char Vertex::clipFlags + *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF; + *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF; + *Pointer(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF; int pos = state.positionRegister; - v.x = r.o[pos].x; - v.y = r.o[pos].y; - v.z = r.o[pos].z; - v.w = r.o[pos].w; + v.x = o[pos].x; + v.y = o[pos].y; + v.z = o[pos].z; + v.w = o[pos].w; Float4 w = As(As(v.w) | (As(CmpEQ(v.w, Float4(0.0f))) & As(Float4(1.0f)))); Float4 rhw = Float4(1.0f) / w; - v.x = As(RoundInt(*Pointer(r.data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer(r.data + OFFSET(DrawData,Wx16)))); - v.y = As(RoundInt(*Pointer(r.data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer(r.data + OFFSET(DrawData,Hx16)))); + v.x = As(RoundInt(*Pointer(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer(data + OFFSET(DrawData,Wx16)))); + v.y = As(RoundInt(*Pointer(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer(data + OFFSET(DrawData,Hx16)))); v.z = v.z * rhw; v.w = rhw; diff --git a/src/Shader/VertexRoutine.hpp b/src/Shader/VertexRoutine.hpp index 3eec92490..1861d10af 100644 --- a/src/Shader/VertexRoutine.hpp +++ b/src/Shader/VertexRoutine.hpp @@ -41,59 +41,14 @@ namespace sw void generate(); protected: - struct Registers - { - Registers(const VertexShader *shader) : - v(shader && shader->dynamicallyIndexedInput), - r(shader && shader->dynamicallyIndexedTemporaries), - o(shader && shader->dynamicallyIndexedOutput) - { - loopDepth = -1; - enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + Pointer constants; - if(shader && shader->containsBreakInstruction()) - { - enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - } + Int clipFlags; - if(shader && shader->containsContinueInstruction()) - { - enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - } - } - - Pointer data; - Pointer constants; - - Int clipFlags; - - RegisterArray<16> v; - RegisterArray<4096> r; - RegisterArray<12> o; - Vector4f a0; - Array aL; - Vector4f p0; - - Array increment; - Array iteration; - - Int loopDepth; - Int stackIndex; // FIXME: Inc/decrement callStack - Array callStack; - - Int enableIndex; - Array enableStack; - Int4 enableBreak; - Int4 enableContinue; - Int4 enableLeave; - - Int instanceID; - }; - - Registers r; + RegisterArray<16> v; // Varying registers + RegisterArray<12> o; // Output registers const VertexProcessor::State &state; - const VertexShader *const shader; private: virtual void pipeline() = 0; -- 2.11.0