X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=src%2FShader%2FPixelRoutine.cpp;h=44fafd3a4ee6cd111ec7194646c5c29ead48d913;hb=7d9bdcb9e149c6b23c19db549904f0f67874378c;hp=4c2ce1bfcc0eb9ec2f298b9e14ee5e4319ccfb14;hpb=a36f3f9a6e33f451f3e8a9a61363d57ca91c46e5;p=android-x86%2Fexternal-swiftshader.git diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp index 4c2ce1bfc..44fafd3a4 100644 --- a/src/Shader/PixelRoutine.cpp +++ b/src/Shader/PixelRoutine.cpp @@ -1,13 +1,16 @@ -// SwiftShader Software Renderer +// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // -// Copyright(c) 2005-2013 TransGaming Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// All rights reserved. No part of this software may be copied, distributed, transmitted, -// transcribed, stored in a retrieval system, translated into any human or computer -// language by any means, or disclosed to third parties without the explicit written -// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express -// or implied, including but not limited to any patent rights, are granted to you. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "PixelRoutine.hpp" @@ -15,7 +18,6 @@ #include "QuadRasterizer.hpp" #include "Surface.hpp" #include "Primitive.hpp" -#include "CPUID.hpp" #include "SamplerCore.hpp" #include "Constants.hpp" #include "Debug.hpp" @@ -27,27 +29,20 @@ namespace sw extern bool exactColorRounding; extern bool forceClearRegisters; - PixelRoutine::Registers::Registers(const PixelShader *shader) : - QuadRasterizer::Registers(), - rf(shader && shader->dynamicallyIndexedTemporaries), - vf(shader && shader->dynamicallyIndexedInput) + PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput) { if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters) { - for(int i = 0; i < 10; i++) + for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++) { - vf[i].x = Float4(0.0f); - vf[i].y = Float4(0.0f); - vf[i].z = Float4(0.0f); - vf[i].w = Float4(0.0f); + v[i].x = Float4(0.0f); + v[i].y = Float4(0.0f); + v[i].z = Float4(0.0f); + v[i].w = Float4(0.0f); } } } - PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader) - { - } - PixelRoutine::~PixelRoutine() { for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) @@ -56,17 +51,15 @@ namespace sw } } - void PixelRoutine::quad(QuadRasterizer::Registers &rBase, Pointer cBuffer[4], Pointer &zBuffer, Pointer &sBuffer, Int cMask[4], Int &x, Int &y) + void PixelRoutine::quad(Pointer cBuffer[RENDERTARGETS], Pointer &zBuffer, Pointer &sBuffer, Int cMask[4], Int &x, Int &y) { - Registers& r = *static_cast(&rBase); - #if PERF_PROFILE Long pipeTime = Ticks(); #endif for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) { - sampler[i] = new SamplerCore(r.constants, state.sampler[i]); + sampler[i] = new SamplerCore(constants, state.sampler[i]); } const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive(); @@ -82,30 +75,26 @@ namespace sw for(unsigned int q = 0; q < state.multiSample; q++) { - stencilTest(r, sBuffer, q, x, sMask[q], cMask[q]); + stencilTest(sBuffer, q, x, sMask[q], cMask[q]); } Float4 f; - - Float4 (&z)[4] = r.z; - Float4 &w = r.w; - Float4 &rhw = r.rhw; Float4 rhwCentroid; - Float4 xxxx = Float4(Float(x)) + *Pointer(r.primitive + OFFSET(Primitive,xQuad), 16); + Float4 xxxx = Float4(Float(x)) + *Pointer(primitive + OFFSET(Primitive,xQuad), 16); if(interpolateZ()) { for(unsigned int q = 0; q < state.multiSample; q++) { Float4 x = xxxx; - + if(state.multiSample > 1) { - x -= *Pointer(r.constants + OFFSET(Constants,X) + q * sizeof(float4)); + x -= *Pointer(constants + OFFSET(Constants,X) + q * sizeof(float4)); } - z[q] = interpolate(x, r.Dz[q], z[q], r.primitive + OFFSET(Primitive,z), false, false); + z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false); } } @@ -115,7 +104,7 @@ namespace sw { for(unsigned int q = 0; q < state.multiSample; q++) { - depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); + depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); } } @@ -125,7 +114,7 @@ namespace sw Long interpTime = Ticks(); #endif - Float4 yyyy = Float4(Float(y)) + *Pointer(r.primitive + OFFSET(Primitive,yQuad), 16); + Float4 yyyy = Float4(Float(y)) + *Pointer(primitive + OFFSET(Primitive,yQuad), 16); // Centroid locations Float4 XXXX = Float4(0.0f); @@ -137,9 +126,9 @@ namespace sw for(unsigned int q = 0; q < state.multiSample; q++) { - XXXX += *Pointer(r.constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); - YYYY += *Pointer(r.constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); - WWWW += *Pointer(r.constants + OFFSET(Constants,weight) + 16 * cMask[q]); + XXXX += *Pointer(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); + YYYY += *Pointer(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); + WWWW += *Pointer(constants + OFFSET(Constants,weight) + 16 * cMask[q]); } WWWW = Rcp_pp(WWWW); @@ -152,16 +141,16 @@ namespace sw if(interpolateW()) { - w = interpolate(xxxx, r.Dw, rhw, r.primitive + OFFSET(Primitive,w), false, false); - rhw = reciprocal(w); + w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false); + rhw = reciprocal(w, false, false, true); if(state.centroid) { - rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,w), false, false)); + rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false)); } } - for(int interpolant = 0; interpolant < 10; interpolant++) + for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) { for(int component = 0; component < 4; component++) { @@ -169,11 +158,11 @@ namespace sw { if(!state.interpolant[interpolant].centroid) { - r.vf[interpolant][component] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); + v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); } else { - r.vf[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); + v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); } } } @@ -185,32 +174,32 @@ namespace sw case 0: break; case 1: - rcp = reciprocal(r.vf[interpolant].y); - r.vf[interpolant].x = r.vf[interpolant].x * rcp; + rcp = reciprocal(v[interpolant].y); + v[interpolant].x = v[interpolant].x * rcp; break; case 2: - rcp = reciprocal(r.vf[interpolant].z); - r.vf[interpolant].x = r.vf[interpolant].x * rcp; - r.vf[interpolant].y = r.vf[interpolant].y * rcp; + rcp = reciprocal(v[interpolant].z); + v[interpolant].x = v[interpolant].x * rcp; + v[interpolant].y = v[interpolant].y * rcp; break; case 3: - rcp = reciprocal(r.vf[interpolant].w); - r.vf[interpolant].x = r.vf[interpolant].x * rcp; - r.vf[interpolant].y = r.vf[interpolant].y * rcp; - r.vf[interpolant].z = r.vf[interpolant].z * rcp; + rcp = reciprocal(v[interpolant].w); + v[interpolant].x = v[interpolant].x * rcp; + v[interpolant].y = v[interpolant].y * rcp; + v[interpolant].z = v[interpolant].z * rcp; break; } } if(state.fog.component) { - f = interpolate(xxxx, r.Df, rhw, r.primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective); + f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective); } - setBuiltins(r, x, y, z, w); + setBuiltins(x, y, z, w); #if PERF_PROFILE - r.cycles[PERF_INTERP] += Ticks() - interpTime; + cycles[PERF_INTERP] += Ticks() - interpTime; #endif Bool alphaPass = true; @@ -221,13 +210,13 @@ namespace sw Long shaderTime = Ticks(); #endif - applyShader(r, cMask); + applyShader(cMask); #if PERF_PROFILE - r.cycles[PERF_SHADER] += Ticks() - shaderTime; + cycles[PERF_SHADER] += Ticks() - shaderTime; #endif - alphaPass = alphaTest(r, cMask); + alphaPass = alphaTest(cMask); if((shader && shader->containsKill()) || state.alphaTestActive()) { @@ -245,7 +234,7 @@ namespace sw { for(unsigned int q = 0; q < state.multiSample; q++) { - depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); + depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); } } @@ -259,11 +248,11 @@ namespace sw { if(state.multiSampleMask & (1 << q)) { - writeDepth(r, zBuffer, q, x, z[q], zMask[q]); + writeDepth(zBuffer, q, x, z[q], zMask[q]); if(state.occlusionEnabled) { - r.occlusion += *Pointer(r.constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); + occlusion += *Pointer(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); } } } @@ -274,12 +263,12 @@ namespace sw AddAtomic(Pointer(&profiler.ropOperations), 4); #endif - rasterOperation(r, f, cBuffer, x, sMask, zMask, cMask); + rasterOperation(f, cBuffer, x, sMask, zMask, cMask); } } #if PERF_PROFILE - r.cycles[PERF_ROP] += Ticks() - ropTime; + cycles[PERF_ROP] += Ticks() - ropTime; #endif } } @@ -288,12 +277,12 @@ namespace sw { if(state.multiSampleMask & (1 << q)) { - writeStencil(r, sBuffer, q, x, sMask[q], zMask[q], cMask[q]); + writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); } } #if PERF_PROFILE - r.cycles[PERF_PIPE] += Ticks() - pipeTime; + cycles[PERF_PIPE] += Ticks() - pipeTime; #endif } @@ -315,7 +304,7 @@ namespace sw return interpolant; } - void PixelRoutine::stencilTest(Registers &r, Pointer &sBuffer, int q, Int &x, Int &sMask, Int &cMask) + void PixelRoutine::stencilTest(Pointer &sBuffer, int q, Int &x, Int &sMask, Int &cMask) { if(!state.stencilActive) { @@ -328,83 +317,83 @@ namespace sw if(q > 0) { - buffer += q * *Pointer(r.data + OFFSET(DrawData,stencilSliceB)); + buffer += q * *Pointer(data + OFFSET(DrawData,stencilSliceB)); } - Byte8 value = As(Long1(*Pointer(buffer))); + Byte8 value = *Pointer(buffer); Byte8 valueCCW = value; if(!state.noStencilMask) { - value &= *Pointer(r.data + OFFSET(DrawData,stencil[0].testMaskQ)); + value &= *Pointer(data + OFFSET(DrawData,stencil[0].testMaskQ)); } - stencilTest(r, value, state.stencilCompareMode, false); + stencilTest(value, state.stencilCompareMode, false); if(state.twoSidedStencil) { if(!state.noStencilMaskCCW) { - valueCCW &= *Pointer(r.data + OFFSET(DrawData,stencil[1].testMaskQ)); + valueCCW &= *Pointer(data + OFFSET(DrawData,stencil[1].testMaskQ)); } - stencilTest(r, valueCCW, state.stencilCompareModeCCW, true); + stencilTest(valueCCW, state.stencilCompareModeCCW, true); - value &= *Pointer(r.primitive + OFFSET(Primitive,clockwiseMask)); - valueCCW &= *Pointer(r.primitive + OFFSET(Primitive,invClockwiseMask)); + value &= *Pointer(primitive + OFFSET(Primitive,clockwiseMask)); + valueCCW &= *Pointer(primitive + OFFSET(Primitive,invClockwiseMask)); value |= valueCCW; } sMask = SignMask(value) & cMask; } - void PixelRoutine::stencilTest(Registers &r, Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW) + void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW) { Byte8 equal; switch(stencilCompareMode) { case STENCIL_ALWAYS: - value = Byte8(0xFFFFFFFFFFFFFFFF); + value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); break; case STENCIL_NEVER: - value = Byte8(0x0000000000000000); + value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); break; case STENCIL_LESS: // a < b ~ b > a value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - value = CmpGT(As(value), *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); + value = CmpGT(As(value), *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); break; case STENCIL_EQUAL: - value = CmpEQ(value, *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); + value = CmpEQ(value, *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); break; case STENCIL_NOTEQUAL: // a != b ~ !(a == b) - value = CmpEQ(value, *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); - value ^= Byte8(0xFFFFFFFFFFFFFFFF); + value = CmpEQ(value, *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); + value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); break; case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b) equal = value; - equal = CmpEQ(equal, *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); + equal = CmpEQ(equal, *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - value = CmpGT(As(value), *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); + value = CmpGT(As(value), *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); value |= equal; break; case STENCIL_GREATER: // a > b - equal = *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)); + equal = *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)); value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); equal = CmpGT(As(equal), As(value)); value = equal; break; case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a) value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - value = CmpGT(As(value), *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); - value ^= Byte8(0xFFFFFFFFFFFFFFFF); + value = CmpGT(As(value), *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); + value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); break; default: ASSERT(false); } } - Bool PixelRoutine::depthTest(Registers &r, Pointer &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask) + Bool PixelRoutine::depthTest(Pointer &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask) { if(!state.depthTestActive) { @@ -417,11 +406,11 @@ namespace sw { if(complementaryDepthBuffer) { - Z = Float4(1.0f) - r.oDepth; + Z = Float4(1.0f) - oDepth; } else { - Z = r.oDepth; + Z = oDepth; } } @@ -431,7 +420,7 @@ namespace sw if(!state.quadLayoutDepthBuffer) { buffer = zBuffer + 4 * x; - pitch = *Pointer(r.data + OFFSET(DrawData,depthPitchB)); + pitch = *Pointer(data + OFFSET(DrawData,depthPitchB)); } else { @@ -440,7 +429,7 @@ namespace sw if(q > 0) { - buffer += q * *Pointer(r.data + OFFSET(DrawData,depthSliceB)); + buffer += q * *Pointer(data + OFFSET(DrawData,depthSliceB)); } Float4 zValue; @@ -531,7 +520,7 @@ namespace sw zMask = SignMask(zTest) & cMask; break; } - + if(state.stencilActive) { zMask &= sMask; @@ -540,7 +529,7 @@ namespace sw return zMask != 0; } - void PixelRoutine::alphaTest(Registers &r, Int &aMask, Short4 &alpha) + void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha) { Short4 cmp; Short4 equal; @@ -554,42 +543,42 @@ namespace sw aMask = 0x0; break; case ALPHA_EQUAL: - cmp = CmpEQ(alpha, *Pointer(r.data + OFFSET(DrawData,factor.alphaReference4))); - aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); + cmp = CmpEQ(alpha, *Pointer(data + OFFSET(DrawData,factor.alphaReference4))); + aMask = SignMask(Pack(cmp, Short4(0x0000))); break; - case ALPHA_NOTEQUAL: // a != b ~ !(a == b) - cmp = CmpEQ(alpha, *Pointer(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME - aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); + case ALPHA_NOTEQUAL: // a != b ~ !(a == b) + cmp = CmpEQ(alpha, *Pointer(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME + aMask = SignMask(Pack(cmp, Short4(0x0000))); break; - case ALPHA_LESS: // a < b ~ b > a - cmp = CmpGT(*Pointer(r.data + OFFSET(DrawData,factor.alphaReference4)), alpha); - aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); + case ALPHA_LESS: // a < b ~ b > a + cmp = CmpGT(*Pointer(data + OFFSET(DrawData,factor.alphaReference4)), alpha); + aMask = SignMask(Pack(cmp, Short4(0x0000))); break; - case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate - equal = CmpEQ(alpha, *Pointer(r.data + OFFSET(DrawData,factor.alphaReference4))); - cmp = CmpGT(alpha, *Pointer(r.data + OFFSET(DrawData,factor.alphaReference4))); + case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate + equal = CmpEQ(alpha, *Pointer(data + OFFSET(DrawData,factor.alphaReference4))); + cmp = CmpGT(alpha, *Pointer(data + OFFSET(DrawData,factor.alphaReference4))); cmp |= equal; - aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); + aMask = SignMask(Pack(cmp, Short4(0x0000))); break; - case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) - cmp = CmpGT(alpha, *Pointer(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME - aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); + case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) + cmp = CmpGT(alpha, *Pointer(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME + aMask = SignMask(Pack(cmp, Short4(0x0000))); break; - case ALPHA_GREATER: // a > b - cmp = CmpGT(alpha, *Pointer(r.data + OFFSET(DrawData,factor.alphaReference4))); - aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); + case ALPHA_GREATER: // a > b + cmp = CmpGT(alpha, *Pointer(data + OFFSET(DrawData,factor.alphaReference4))); + aMask = SignMask(Pack(cmp, Short4(0x0000))); break; default: ASSERT(false); } } - void PixelRoutine::alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha) + void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha) { - Int4 coverage0 = CmpNLT(alpha, *Pointer(r.data + OFFSET(DrawData,a2c0))); - Int4 coverage1 = CmpNLT(alpha, *Pointer(r.data + OFFSET(DrawData,a2c1))); - Int4 coverage2 = CmpNLT(alpha, *Pointer(r.data + OFFSET(DrawData,a2c2))); - Int4 coverage3 = CmpNLT(alpha, *Pointer(r.data + OFFSET(DrawData,a2c3))); + Int4 coverage0 = CmpNLT(alpha, *Pointer(data + OFFSET(DrawData,a2c0))); + Int4 coverage1 = CmpNLT(alpha, *Pointer(data + OFFSET(DrawData,a2c1))); + Int4 coverage2 = CmpNLT(alpha, *Pointer(data + OFFSET(DrawData,a2c2))); + Int4 coverage3 = CmpNLT(alpha, *Pointer(data + OFFSET(DrawData,a2c3))); Int aMask0 = SignMask(coverage0); Int aMask1 = SignMask(coverage1); @@ -602,7 +591,7 @@ namespace sw cMask[3] &= aMask3; } - void PixelRoutine::fogBlend(Registers &r, Vector4f &c0, Float4 &fog, Float4 &z, Float4 &rhw) + void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog) { if(!state.fogActive) { @@ -611,26 +600,26 @@ namespace sw if(state.pixelFogMode != FOG_NONE) { - pixelFog(r, fog, z, rhw); + pixelFog(fog); fog = Min(fog, Float4(1.0f)); fog = Max(fog, Float4(0.0f)); } - c0.x -= *Pointer(r.data + OFFSET(DrawData,fog.colorF[0])); - c0.y -= *Pointer(r.data + OFFSET(DrawData,fog.colorF[1])); - c0.z -= *Pointer(r.data + OFFSET(DrawData,fog.colorF[2])); + c0.x -= *Pointer(data + OFFSET(DrawData,fog.colorF[0])); + c0.y -= *Pointer(data + OFFSET(DrawData,fog.colorF[1])); + c0.z -= *Pointer(data + OFFSET(DrawData,fog.colorF[2])); c0.x *= fog; c0.y *= fog; c0.z *= fog; - c0.x += *Pointer(r.data + OFFSET(DrawData,fog.colorF[0])); - c0.y += *Pointer(r.data + OFFSET(DrawData,fog.colorF[1])); - c0.z += *Pointer(r.data + OFFSET(DrawData,fog.colorF[2])); + c0.x += *Pointer(data + OFFSET(DrawData,fog.colorF[0])); + c0.y += *Pointer(data + OFFSET(DrawData,fog.colorF[1])); + c0.z += *Pointer(data + OFFSET(DrawData,fog.colorF[2])); } - void PixelRoutine::pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw) + void PixelRoutine::pixelFog(Float4 &visibility) { Float4 &zw = visibility; @@ -644,11 +633,11 @@ namespace sw { if(complementaryDepthBuffer) { - zw = Float4(1.0f) - z; + zw = Float4(1.0f) - z[0]; } else { - zw = z; + zw = z[0]; } } } @@ -658,16 +647,16 @@ namespace sw case FOG_NONE: break; case FOG_LINEAR: - zw *= *Pointer(r.data + OFFSET(DrawData,fog.scale)); - zw += *Pointer(r.data + OFFSET(DrawData,fog.offset)); + zw *= *Pointer(data + OFFSET(DrawData,fog.scale)); + zw += *Pointer(data + OFFSET(DrawData,fog.offset)); break; case FOG_EXP: - zw *= *Pointer(r.data + OFFSET(DrawData,fog.densityE)); + zw *= *Pointer(data + OFFSET(DrawData,fog.densityE)); zw = exponential2(zw, true); break; case FOG_EXP2: zw *= zw; - zw *= *Pointer(r.data + OFFSET(DrawData,fog.density2E)); + zw *= *Pointer(data + OFFSET(DrawData,fog.density2E)); zw = exponential2(zw, true); break; default: @@ -675,7 +664,7 @@ namespace sw } } - void PixelRoutine::writeDepth(Registers &r, Pointer &zBuffer, int q, Int &x, Float4 &z, Int &zMask) + void PixelRoutine::writeDepth(Pointer &zBuffer, int q, Int &x, Float4 &z, Int &zMask) { if(!state.depthWriteEnable) { @@ -688,11 +677,11 @@ namespace sw { if(complementaryDepthBuffer) { - Z = Float4(1.0f) - r.oDepth; + Z = Float4(1.0f) - oDepth; } else { - Z = r.oDepth; + Z = oDepth; } } @@ -700,18 +689,18 @@ namespace sw Int pitch; if(!state.quadLayoutDepthBuffer) - { + { buffer = zBuffer + 4 * x; - pitch = *Pointer(r.data + OFFSET(DrawData,depthPitchB)); + pitch = *Pointer(data + OFFSET(DrawData,depthPitchB)); } else - { + { buffer = zBuffer + 8 * x; } if(q > 0) { - buffer += q * *Pointer(r.data + OFFSET(DrawData,depthSliceB)); + buffer += q * *Pointer(data + OFFSET(DrawData,depthSliceB)); } Float4 zValue; @@ -730,8 +719,8 @@ namespace sw } } - Z = As(As(Z) & *Pointer(r.constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); - zValue = As(As(zValue) & *Pointer(r.constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); + Z = As(As(Z) & *Pointer(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); + zValue = As(As(zValue) & *Pointer(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); Z = As(As(Z) | As(zValue)); if(!state.quadLayoutDepthBuffer) @@ -746,7 +735,7 @@ namespace sw } } - void PixelRoutine::writeStencil(Registers &r, Pointer &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask) + void PixelRoutine::writeStencil(Pointer &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask) { if(!state.stencilActive) { @@ -770,19 +759,19 @@ namespace sw if(q > 0) { - buffer += q * *Pointer(r.data + OFFSET(DrawData,stencilSliceB)); + buffer += q * *Pointer(data + OFFSET(DrawData,stencilSliceB)); } - Byte8 bufferValue = As(Long1(*Pointer(buffer))); - + Byte8 bufferValue = *Pointer(buffer); + Byte8 newValue; - stencilOperation(r, newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); + stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); if(!state.noStencilWriteMask) { Byte8 maskedValue = bufferValue; - newValue &= *Pointer(r.data + OFFSET(DrawData,stencil[0].writeMaskQ)); - maskedValue &= *Pointer(r.data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); + newValue &= *Pointer(data + OFFSET(DrawData,stencil[0].writeMaskQ)); + maskedValue &= *Pointer(data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); newValue |= maskedValue; } @@ -790,62 +779,62 @@ namespace sw { Byte8 newValueCCW; - stencilOperation(r, newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask); + stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask); if(!state.noStencilWriteMaskCCW) { Byte8 maskedValue = bufferValue; - newValueCCW &= *Pointer(r.data + OFFSET(DrawData,stencil[1].writeMaskQ)); - maskedValue &= *Pointer(r.data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); + newValueCCW &= *Pointer(data + OFFSET(DrawData,stencil[1].writeMaskQ)); + maskedValue &= *Pointer(data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); newValueCCW |= maskedValue; } - newValue &= *Pointer(r.primitive + OFFSET(Primitive,clockwiseMask)); - newValueCCW &= *Pointer(r.primitive + OFFSET(Primitive,invClockwiseMask)); + newValue &= *Pointer(primitive + OFFSET(Primitive,clockwiseMask)); + newValueCCW &= *Pointer(primitive + OFFSET(Primitive,invClockwiseMask)); newValue |= newValueCCW; } - newValue &= *Pointer(r.constants + OFFSET(Constants,maskB4Q) + 8 * cMask); - bufferValue &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); + newValue &= *Pointer(constants + OFFSET(Constants,maskB4Q) + 8 * cMask); + bufferValue &= *Pointer(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); newValue |= bufferValue; - *Pointer(buffer) = UInt(As(newValue)); + *Pointer(buffer) = Byte4(newValue); } - void PixelRoutine::stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask) + void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask) { Byte8 &pass = newValue; Byte8 fail; Byte8 zFail; - stencilOperation(r, pass, bufferValue, stencilPassOperation, CCW); + stencilOperation(pass, bufferValue, stencilPassOperation, CCW); if(stencilZFailOperation != stencilPassOperation) { - stencilOperation(r, zFail, bufferValue, stencilZFailOperation, CCW); + stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW); } if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) { - stencilOperation(r, fail, bufferValue, stencilFailOperation, CCW); + stencilOperation(fail, bufferValue, stencilFailOperation, CCW); } if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) { if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same { - pass &= *Pointer(r.constants + OFFSET(Constants,maskB4Q) + 8 * zMask); - zFail &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); + pass &= *Pointer(constants + OFFSET(Constants,maskB4Q) + 8 * zMask); + zFail &= *Pointer(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); pass |= zFail; } - pass &= *Pointer(r.constants + OFFSET(Constants,maskB4Q) + 8 * sMask); - fail &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); + pass &= *Pointer(constants + OFFSET(Constants,maskB4Q) + 8 * sMask); + fail &= *Pointer(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); pass |= fail; } } - void PixelRoutine::stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW) + void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW) { switch(operation) { @@ -853,10 +842,10 @@ namespace sw output = bufferValue; break; case OPERATION_ZERO: - output = Byte8(0x0000000000000000); + output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); break; case OPERATION_REPLACE: - output = *Pointer(r.data + OFFSET(DrawData,stencil[CCW].referenceQ)); + output = *Pointer(data + OFFSET(DrawData,stencil[CCW].referenceQ)); break; case OPERATION_INCRSAT: output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); @@ -865,7 +854,7 @@ namespace sw output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); break; case OPERATION_INVERT: - output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF); + output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); break; case OPERATION_INCR: output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); @@ -878,7 +867,7 @@ namespace sw } } - void PixelRoutine::blendFactor(Registers &r, const Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorActive) + void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorActive) { switch(blendFactorActive) { @@ -935,31 +924,31 @@ namespace sw blendFactor.z = blendFactor.x; break; case BLEND_CONSTANT: - blendFactor.x = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4W[0])); - blendFactor.y = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4W[1])); - blendFactor.z = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4W[2])); + blendFactor.x = *Pointer(data + OFFSET(DrawData,factor.blendConstant4W[0])); + blendFactor.y = *Pointer(data + OFFSET(DrawData,factor.blendConstant4W[1])); + blendFactor.z = *Pointer(data + OFFSET(DrawData,factor.blendConstant4W[2])); break; case BLEND_INVCONSTANT: - blendFactor.x = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4W[0])); - blendFactor.y = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4W[1])); - blendFactor.z = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4W[2])); + blendFactor.x = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4W[0])); + blendFactor.y = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4W[1])); + blendFactor.z = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4W[2])); break; case BLEND_CONSTANTALPHA: - blendFactor.x = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); - blendFactor.y = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); - blendFactor.z = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.x = *Pointer(data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.y = *Pointer(data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.z = *Pointer(data + OFFSET(DrawData,factor.blendConstant4W[3])); break; case BLEND_INVCONSTANTALPHA: - blendFactor.x = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - blendFactor.y = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); - blendFactor.z = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.x = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.y = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.z = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); break; default: ASSERT(false); } } - - void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) + + void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) { switch(blendFactorAlphaActive) { @@ -998,30 +987,35 @@ namespace sw break; case BLEND_CONSTANT: case BLEND_CONSTANTALPHA: - blendFactor.w = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4W[3])); + blendFactor.w = *Pointer(data + OFFSET(DrawData,factor.blendConstant4W[3])); break; case BLEND_INVCONSTANT: case BLEND_INVCONSTANTALPHA: - blendFactor.w = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3])); + blendFactor.w = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); break; default: ASSERT(false); } } - void PixelRoutine::readPixel(Registers &r, int index, Pointer &cBuffer, Vector4s ¤t, Int &x, Vector4s &pixel) + bool PixelRoutine::isSRGB(int index) const + { + return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8; + } + + void PixelRoutine::readPixel(int index, Pointer &cBuffer, Int &x, Vector4s &pixel) { Short4 c01; Short4 c23; Pointer buffer; + Pointer buffer2; switch(state.targetFormat[index]) { case FORMAT_R5G6B5: buffer = cBuffer + 2 * x; - c01 = As(Insert(As(c01), *Pointer(buffer), 0)); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); - c01 = As(Insert(As(c01), *Pointer(buffer), 1)); + buffer2 = buffer + *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + c01 = As(Int2(*Pointer(buffer), *Pointer(buffer2))); pixel.x = c01 & Short4(0xF800u); pixel.y = (c01 & Short4(0x07E0u)) << 5; @@ -1031,7 +1025,7 @@ namespace sw case FORMAT_A8R8G8B8: buffer = cBuffer + 4 * x; c01 = *Pointer(buffer); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer(buffer); pixel.z = c01; pixel.y = c01; @@ -1048,9 +1042,10 @@ namespace sw pixel.w = UnpackHigh(As(pixel.w), As(pixel.w)); break; case FORMAT_A8B8G8R8: + case FORMAT_SRGB8_A8: buffer = cBuffer + 4 * x; c01 = *Pointer(buffer); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer(buffer); pixel.z = c01; pixel.y = c01; @@ -1069,7 +1064,7 @@ namespace sw case FORMAT_A8: buffer = cBuffer + 1 * x; pixel.w = Insert(pixel.w, *Pointer(buffer), 0); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); pixel.w = Insert(pixel.w, *Pointer(buffer), 1); pixel.w = UnpackLow(As(pixel.w), As(pixel.w)); pixel.x = Short4(0x0000); @@ -1079,7 +1074,7 @@ namespace sw case FORMAT_X8R8G8B8: buffer = cBuffer + 4 * x; c01 = *Pointer(buffer); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer(buffer); pixel.z = c01; pixel.y = c01; @@ -1095,9 +1090,10 @@ namespace sw pixel.w = Short4(0xFFFFu); break; case FORMAT_X8B8G8R8: + case FORMAT_SRGB8_X8: buffer = cBuffer + 4 * x; c01 = *Pointer(buffer); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer(buffer); pixel.z = c01; pixel.y = c01; @@ -1115,23 +1111,23 @@ namespace sw break; case FORMAT_A8G8R8B8Q: UNIMPLEMENTED(); - // pixel.z = UnpackLow(As(pixel.z), *Pointer(cBuffer + 8 * x + 0)); - // pixel.x = UnpackHigh(As(pixel.x), *Pointer(cBuffer + 8 * x + 0)); - // pixel.y = UnpackLow(As(pixel.y), *Pointer(cBuffer + 8 * x + 8)); - // pixel.w = UnpackHigh(As(pixel.w), *Pointer(cBuffer + 8 * x + 8)); + // pixel.z = UnpackLow(As(pixel.z), *Pointer(cBuffer + 8 * x + 0)); + // pixel.x = UnpackHigh(As(pixel.x), *Pointer(cBuffer + 8 * x + 0)); + // pixel.y = UnpackLow(As(pixel.y), *Pointer(cBuffer + 8 * x + 8)); + // pixel.w = UnpackHigh(As(pixel.w), *Pointer(cBuffer + 8 * x + 8)); break; case FORMAT_X8G8R8B8Q: UNIMPLEMENTED(); - // pixel.z = UnpackLow(As(pixel.z), *Pointer(cBuffer + 8 * x + 0)); - // pixel.x = UnpackHigh(As(pixel.x), *Pointer(cBuffer + 8 * x + 0)); - // pixel.y = UnpackLow(As(pixel.y), *Pointer(cBuffer + 8 * x + 8)); - // pixel.w = Short4(0xFFFFu); + // pixel.z = UnpackLow(As(pixel.z), *Pointer(cBuffer + 8 * x + 0)); + // pixel.x = UnpackHigh(As(pixel.x), *Pointer(cBuffer + 8 * x + 0)); + // pixel.y = UnpackLow(As(pixel.y), *Pointer(cBuffer + 8 * x + 8)); + // pixel.w = Short4(0xFFFFu); break; case FORMAT_A16B16G16R16: buffer = cBuffer; pixel.x = *Pointer(buffer + 8 * x); pixel.y = *Pointer(buffer + 8 * x + 8); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); pixel.z = *Pointer(buffer + 8 * x); pixel.w = *Pointer(buffer + 8 * x + 8); transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); @@ -1139,7 +1135,7 @@ namespace sw case FORMAT_G16R16: buffer = cBuffer; pixel.x = *Pointer(buffer + 4 * x); - buffer += *Pointer(r.data + OFFSET(DrawData, colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); pixel.y = *Pointer(buffer + 4 * x); pixel.z = pixel.x; pixel.x = As(UnpackLow(pixel.x, pixel.y)); @@ -1154,13 +1150,13 @@ namespace sw ASSERT(false); } - if(postBlendSRGB && state.writeSRGB) + if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) { - sRGBtoLinear16_12_16(r, pixel); + sRGBtoLinear16_12_16(pixel); } } - void PixelRoutine::alphaBlend(Registers &r, int index, Pointer &cBuffer, Vector4s ¤t, Int &x) + void PixelRoutine::alphaBlend(int index, Pointer &cBuffer, Vector4s ¤t, Int &x) { if(!state.alphaBlendActive) { @@ -1168,17 +1164,14 @@ namespace sw } Vector4s pixel; - Short4 c01; - Short4 c23; - - readPixel(r, index, cBuffer, current, x, pixel); + readPixel(index, cBuffer, x, pixel); // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor Vector4s sourceFactor; Vector4s destFactor; - blendFactor(r, sourceFactor, current, pixel, state.sourceBlendFactor); - blendFactor(r, destFactor, current, pixel, state.destBlendFactor); + blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor); + blendFactor(destFactor, current, pixel, state.destBlendFactor); if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) { @@ -1186,7 +1179,7 @@ namespace sw current.y = MulHigh(As(current.y), As(sourceFactor.y)); current.z = MulHigh(As(current.z), As(sourceFactor.z)); } - + if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) { pixel.x = MulHigh(As(pixel.x), As(destFactor.x)); @@ -1230,22 +1223,22 @@ namespace sw current.z = pixel.z; break; case BLENDOP_NULL: - current.x = Short4(0x0000, 0x0000, 0x0000, 0x0000); - current.y = Short4(0x0000, 0x0000, 0x0000, 0x0000); - current.z = Short4(0x0000, 0x0000, 0x0000, 0x0000); + current.x = Short4(0x0000); + current.y = Short4(0x0000); + current.z = Short4(0x0000); break; default: ASSERT(false); } - blendFactorAlpha(r, sourceFactor, current, pixel, state.sourceBlendFactorAlpha); - blendFactorAlpha(r, destFactor, current, pixel, state.destBlendFactorAlpha); + blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha); + blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha); if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) { current.w = MulHigh(As(current.w), As(sourceFactor.w)); } - + if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) { pixel.w = MulHigh(As(pixel.w), As(destFactor.w)); @@ -1275,14 +1268,14 @@ namespace sw current.w = pixel.w; break; case BLENDOP_NULL: - current.w = Short4(0x0000, 0x0000, 0x0000, 0x0000); + current.w = Short4(0x0000); break; default: ASSERT(false); } } - void PixelRoutine::logicOperation(Registers &r, int index, Pointer &cBuffer, Vector4s ¤t, Int &x) + void PixelRoutine::logicOperation(int index, Pointer &cBuffer, Vector4s ¤t, Int &x) { if(state.logicalOperation == LOGICALOP_COPY) { @@ -1290,21 +1283,19 @@ namespace sw } Vector4s pixel; - - // Read pixel - readPixel(r, index, cBuffer, current, x, pixel); + readPixel(index, cBuffer, x, pixel); switch(state.logicalOperation) { case LOGICALOP_CLEAR: - current.x = 0; - current.y = 0; - current.z = 0; + current.x = UShort4(0); + current.y = UShort4(0); + current.z = UShort4(0); break; case LOGICALOP_SET: - current.x = 0xFFFFu; - current.y = 0xFFFFu; - current.z = 0xFFFFu; + current.x = UShort4(0xFFFFu); + current.y = UShort4(0xFFFFu); + current.z = UShort4(0xFFFFu); break; case LOGICALOP_COPY: ASSERT(false); // Optimized out @@ -1379,11 +1370,11 @@ namespace sw } } - void PixelRoutine::writeColor(Registers &r, int index, Pointer &cBuffer, Int &x, Vector4s ¤t, Int &sMask, Int &zMask, Int &cMask) + void PixelRoutine::writeColor(int index, Pointer &cBuffer, Int &x, Vector4s ¤t, Int &sMask, Int &zMask, Int &cMask) { - if(postBlendSRGB && state.writeSRGB) + if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) { - linearToSRGB16_12_16(r, current); + linearToSRGB16_12_16(current); } if(exactColorRounding) @@ -1391,7 +1382,9 @@ namespace sw switch(state.targetFormat[index]) { case FORMAT_R5G6B5: - // UNIMPLEMENTED(); // FIXME + current.x = AddSat(As(current.x), UShort4(0x0400)); + current.y = AddSat(As(current.y), UShort4(0x0200)); + current.z = AddSat(As(current.z), UShort4(0x0400)); break; case FORMAT_X8G8R8B8Q: case FORMAT_A8G8R8B8Q: @@ -1399,19 +1392,22 @@ namespace sw case FORMAT_X8B8G8R8: case FORMAT_A8R8G8B8: case FORMAT_A8B8G8R8: - { - current.x = current.x - As(As(current.x) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); - current.y = current.y - As(As(current.y) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); - current.z = current.z - As(As(current.z) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); - current.w = current.w - As(As(current.w) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080); - } + case FORMAT_SRGB8_X8: + case FORMAT_SRGB8_A8: + case FORMAT_G8R8: + case FORMAT_R8: + current.x = current.x - As(As(current.x) >> 8) + Short4(0x0080); + current.y = current.y - As(As(current.y) >> 8) + Short4(0x0080); + current.z = current.z - As(As(current.z) >> 8) + Short4(0x0080); + current.w = current.w - As(As(current.w) >> 8) + Short4(0x0080); + break; + default: break; } } int rgbaWriteMask = state.colorWriteActive(index); - int bgraWriteMask = rgbaWriteMask & 0x0000000A | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2; - int brgaWriteMask = rgbaWriteMask & 0x00000008 | (rgbaWriteMask & 0x00000001) << 1 | (rgbaWriteMask & 0x00000002) << 1 | (rgbaWriteMask & 0x00000004) >> 2; + int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2; switch(state.targetFormat[index]) { @@ -1481,7 +1477,9 @@ namespace sw break; case FORMAT_X8B8G8R8: case FORMAT_A8B8G8R8: - if(state.targetFormat[index] == FORMAT_X8B8G8R8 || rgbaWriteMask == 0x7) + case FORMAT_SRGB8_X8: + case FORMAT_SRGB8_A8: + if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7) { current.x = As(As(current.x) >> 8); current.y = As(As(current.y) >> 8); @@ -1515,6 +1513,17 @@ namespace sw current.y = As(UnpackHigh(current.y, current.x)); } break; + case FORMAT_G8R8: + current.x = As(As(current.x) >> 8); + current.y = As(As(current.y) >> 8); + current.x = As(Pack(As(current.x), As(current.x))); + current.y = As(Pack(As(current.y), As(current.y))); + current.x = UnpackLow(As(current.x), As(current.y)); + break; + case FORMAT_R8: + current.x = As(As(current.x) >> 8); + current.x = As(Pack(As(current.x), As(current.x))); + break; case FORMAT_A8: current.w = As(As(current.w) >> 8); current.w = As(Pack(As(current.w), As(current.w))); @@ -1563,17 +1572,17 @@ namespace sw if((bgraWriteMask & 0x00000007) != 0x00000007) { Int masked = value; - c01 &= *Pointer(r.constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0])); + c01 &= *Pointer(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); + masked &= *Pointer(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); c01 |= masked; } - c01 &= *Pointer(r.constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8); + c01 &= *Pointer(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8); c01 |= value; *Pointer(buffer) = c01; - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); value = *Pointer(buffer); Int c23 = Extract(As(current.x), 1); @@ -1581,13 +1590,13 @@ namespace sw if((bgraWriteMask & 0x00000007) != 0x00000007) { Int masked = value; - c23 &= *Pointer(r.constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0])); + c23 &= *Pointer(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0])); + masked &= *Pointer(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0])); c23 |= masked; } - c23 &= *Pointer(r.constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8); + c23 &= *Pointer(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8); c23 |= value; *Pointer(buffer) = c23; } @@ -1602,13 +1611,13 @@ namespace sw // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? // { // Short4 masked = value; - // c01 &= *Pointer(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); - // masked &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); + // c01 &= *Pointer(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); + // masked &= *Pointer(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); // c01 |= masked; // } - // c01 &= *Pointer(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8); - // value &= *Pointer(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); + // c01 &= *Pointer(constants + OFFSET(Constants,maskD01Q) + xMask * 8); + // value &= *Pointer(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); // c01 |= value; // *Pointer(cBuffer + 8 * x + 0) = c01; @@ -1619,13 +1628,13 @@ namespace sw // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? // { // Short4 masked = value; - // c23 &= *Pointer(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); - // masked &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); + // c23 &= *Pointer(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); + // masked &= *Pointer(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); // c23 |= masked; // } - // c23 &= *Pointer(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8); - // value &= *Pointer(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); + // c23 &= *Pointer(constants + OFFSET(Constants,maskD23Q) + xMask * 8); + // value &= *Pointer(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); // c23 |= value; // *Pointer(cBuffer + 8 * x + 8) = c23; break; @@ -1640,17 +1649,17 @@ namespace sw (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? { Short4 masked = value; - c01 &= *Pointer(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); + c01 &= *Pointer(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); c01 |= masked; } - c01 &= *Pointer(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); + c01 &= *Pointer(constants + OFFSET(Constants,maskD01Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); c01 |= value; *Pointer(buffer) = c01; - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); value = *Pointer(buffer); if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) || @@ -1658,69 +1667,114 @@ namespace sw (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh? { Short4 masked = value; - c23 &= *Pointer(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); + c23 &= *Pointer(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0])); c23 |= masked; } - c23 &= *Pointer(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); + c23 &= *Pointer(constants + OFFSET(Constants,maskD23Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); c23 |= value; *Pointer(buffer) = c23; } break; case FORMAT_A8B8G8R8: case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha? + case FORMAT_SRGB8_X8: + case FORMAT_SRGB8_A8: { Pointer buffer = cBuffer + x * 4; Short4 value = *Pointer(buffer); - if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) || - ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) && - (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh? + bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) || + (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) && + ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh? + + if(masked) { Short4 masked = value; - c01 &= *Pointer(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); + c01 &= *Pointer(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); c01 |= masked; } - c01 &= *Pointer(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); + c01 &= *Pointer(constants + OFFSET(Constants,maskD01Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); c01 |= value; *Pointer(buffer) = c01; - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); value = *Pointer(buffer); - if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) || - ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) && - (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh? + if(masked) { Short4 masked = value; - c23 &= *Pointer(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); + c23 &= *Pointer(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0])); c23 |= masked; } - c23 &= *Pointer(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); + c23 &= *Pointer(constants + OFFSET(Constants,maskD23Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); c23 |= value; *Pointer(buffer) = c23; } break; + case FORMAT_G8R8: + if((rgbaWriteMask & 0x00000003) != 0x0) + { + Pointer buffer = cBuffer + 2 * x; + Int2 value; + value = Insert(value, *Pointer(buffer), 0); + Int pitch = *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + value = Insert(value, *Pointer(buffer + pitch), 1); + + Int2 packedCol = As(current.x); + + UInt2 mergedMask = *Pointer(constants + OFFSET(Constants, maskW4Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + Int tmpMask = *Pointer(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); + UInt2 rgbaMask = As(Int2(tmpMask, tmpMask)); + mergedMask &= rgbaMask; + } + + packedCol = As((As(packedCol) & mergedMask) | (As(value) & ~mergedMask)); + + *Pointer(buffer) = As(Extract(packedCol, 0)); + *Pointer(buffer + pitch) = As(Extract(packedCol, 1)); + } + break; + case FORMAT_R8: + if(rgbaWriteMask & 0x00000001) + { + Pointer buffer = cBuffer + 1 * x; + Short4 value; + value = Insert(value, *Pointer(buffer), 0); + Int pitch = *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + value = Insert(value, *Pointer(buffer + pitch), 1); + value = UnpackLow(As(value), As(value)); + + current.x &= *Pointer(constants + OFFSET(Constants, maskB4Q) + 8 * xMask); + value &= *Pointer(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask); + current.x |= value; + + *Pointer(buffer) = Extract(current.x, 0); + *Pointer(buffer + pitch) = Extract(current.x, 1); + } + break; case FORMAT_A8: if(rgbaWriteMask & 0x00000008) { Pointer buffer = cBuffer + 1 * x; Short4 value; - Insert(value, *Pointer(buffer), 0); - Int pitch = *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); - Insert(value, *Pointer(buffer + pitch), 1); + value = Insert(value, *Pointer(buffer), 0); + Int pitch = *Pointer(data + OFFSET(DrawData,colorPitchB[index])); + value = Insert(value, *Pointer(buffer + pitch), 1); value = UnpackLow(As(value), As(value)); - current.w &= *Pointer(r.constants + OFFSET(Constants,maskB4Q) + 8 * xMask); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask); + current.w &= *Pointer(constants + OFFSET(Constants,maskB4Q) + 8 * xMask); + value &= *Pointer(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask); current.w |= value; *Pointer(buffer) = Extract(current.w, 0); @@ -1736,30 +1790,30 @@ namespace sw if((rgbaWriteMask & 0x00000003) != 0x00000003) { Short4 masked = value; - current.x &= *Pointer(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0])); + current.x &= *Pointer(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); + masked &= *Pointer(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); current.x |= masked; } - current.x &= *Pointer(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); + current.x &= *Pointer(constants + OFFSET(Constants,maskD01Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8); current.x |= value; *Pointer(buffer) = current.x; - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); value = *Pointer(buffer); if((rgbaWriteMask & 0x00000003) != 0x00000003) { Short4 masked = value; - current.y &= *Pointer(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0])); + current.y &= *Pointer(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0])); + masked &= *Pointer(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0])); current.y |= masked; } - current.y &= *Pointer(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); + current.y &= *Pointer(constants + OFFSET(Constants,maskD23Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8); current.y |= value; *Pointer(buffer) = current.y; } @@ -1774,13 +1828,13 @@ namespace sw if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.x &= *Pointer(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.x &= *Pointer(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); current.x |= masked; } - current.x &= *Pointer(r.constants + OFFSET(Constants,maskQ0Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); + current.x &= *Pointer(constants + OFFSET(Constants,maskQ0Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8); current.x |= value; *Pointer(buffer) = current.x; } @@ -1791,18 +1845,18 @@ namespace sw if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.y &= *Pointer(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.y &= *Pointer(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); current.y |= masked; } - current.y &= *Pointer(r.constants + OFFSET(Constants,maskQ1Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); + current.y &= *Pointer(constants + OFFSET(Constants,maskQ1Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8); current.y |= value; *Pointer(buffer + 8) = current.y; } - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); { Short4 value = *Pointer(buffer); @@ -1810,13 +1864,13 @@ namespace sw if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.z &= *Pointer(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.z &= *Pointer(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); current.z |= masked; } - current.z &= *Pointer(r.constants + OFFSET(Constants,maskQ2Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); + current.z &= *Pointer(constants + OFFSET(Constants,maskQ2Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8); current.z |= value; *Pointer(buffer) = current.z; } @@ -1827,13 +1881,13 @@ namespace sw if(rgbaWriteMask != 0x0000000F) { Short4 masked = value; - current.w &= *Pointer(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); - masked &= *Pointer(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); + current.w &= *Pointer(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0])); + masked &= *Pointer(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0])); current.w |= masked; } - current.w &= *Pointer(r.constants + OFFSET(Constants,maskQ3Q) + xMask * 8); - value &= *Pointer(r.constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); + current.w &= *Pointer(constants + OFFSET(Constants,maskQ3Q) + xMask * 8); + value &= *Pointer(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8); current.w |= value; *Pointer(buffer + 8) = current.w; } @@ -1844,7 +1898,7 @@ namespace sw } } - void PixelRoutine::blendFactor(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) + void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive) { switch(blendFactorActive) { @@ -1901,21 +1955,21 @@ namespace sw blendFactor.z = blendFactor.x; break; case BLEND_CONSTANT: - blendFactor.x = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4F[0])); - blendFactor.y = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4F[1])); - blendFactor.z = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4F[2])); + blendFactor.x = *Pointer(data + OFFSET(DrawData,factor.blendConstant4F[0])); + blendFactor.y = *Pointer(data + OFFSET(DrawData,factor.blendConstant4F[1])); + blendFactor.z = *Pointer(data + OFFSET(DrawData,factor.blendConstant4F[2])); break; case BLEND_INVCONSTANT: - blendFactor.x = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4F[0])); - blendFactor.y = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4F[1])); - blendFactor.z = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4F[2])); + blendFactor.x = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4F[0])); + blendFactor.y = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4F[1])); + blendFactor.z = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4F[2])); break; default: ASSERT(false); } } - void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) + void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive) { switch(blendFactorAlphaActive) { @@ -1953,17 +2007,17 @@ namespace sw blendFactor.w = Float4(1.0f); break; case BLEND_CONSTANT: - blendFactor.w = *Pointer(r.data + OFFSET(DrawData,factor.blendConstant4F[3])); + blendFactor.w = *Pointer(data + OFFSET(DrawData,factor.blendConstant4F[3])); break; case BLEND_INVCONSTANT: - blendFactor.w = *Pointer(r.data + OFFSET(DrawData,factor.invBlendConstant4F[3])); + blendFactor.w = *Pointer(data + OFFSET(DrawData,factor.invBlendConstant4F[3])); break; default: ASSERT(false); } } - void PixelRoutine::alphaBlend(Registers &r, int index, Pointer &cBuffer, Vector4f &oC, Int &x) + void PixelRoutine::alphaBlend(int index, Pointer &cBuffer, Vector4f &oC, Int &x) { if(!state.alphaBlendActive) { @@ -1977,47 +2031,65 @@ namespace sw Short4 c01; Short4 c23; + Float4 one; + if(Surface::isFloatFormat(state.targetFormat[index])) + { + one = Float4(1.0f); + } + else if(Surface::isNonNormalizedInteger(state.targetFormat[index])) + { + one = As(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF)); + } + switch(state.targetFormat[index]) { + case FORMAT_R32I: + case FORMAT_R32UI: case FORMAT_R32F: buffer = cBuffer; // FIXME: movlps pixel.x.x = *Pointer(buffer + 4 * x + 0); pixel.x.y = *Pointer(buffer + 4 * x + 4); - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); // FIXME: movhps pixel.x.z = *Pointer(buffer + 4 * x + 0); pixel.x.w = *Pointer(buffer + 4 * x + 4); - pixel.y = Float4(1.0f); - pixel.z = Float4(1.0f); - pixel.w = Float4(1.0f); + pixel.y = pixel.z = pixel.w = one; break; + case FORMAT_G32R32I: + case FORMAT_G32R32UI: case FORMAT_G32R32F: buffer = cBuffer; pixel.x = *Pointer(buffer + 8 * x, 16); - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); pixel.y = *Pointer(buffer + 8 * x, 16); pixel.z = pixel.x; pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88); pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD); pixel.y = pixel.z; - pixel.z = Float4(1.0f); - pixel.w = Float4(1.0f); + pixel.z = pixel.w = one; break; + case FORMAT_X32B32G32R32F: case FORMAT_A32B32G32R32F: + case FORMAT_A32B32G32R32I: + case FORMAT_A32B32G32R32UI: buffer = cBuffer; pixel.x = *Pointer(buffer + 16 * x, 16); pixel.y = *Pointer(buffer + 16 * x + 16, 16); - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); pixel.z = *Pointer(buffer + 16 * x, 16); pixel.w = *Pointer(buffer + 16 * x + 16, 16); transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); + if(state.targetFormat[index] == FORMAT_X32B32G32R32F) + { + pixel.w = Float4(1.0f); + } break; default: ASSERT(false); } - if(postBlendSRGB && state.writeSRGB) + if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) { sRGBtoLinear(pixel.x); sRGBtoLinear(pixel.y); @@ -2028,8 +2100,8 @@ namespace sw Vector4f sourceFactor; Vector4f destFactor; - blendFactor(r, sourceFactor, oC, pixel, state.sourceBlendFactor); - blendFactor(r, destFactor, oC, pixel, state.destBlendFactor); + blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor); + blendFactor(destFactor, oC, pixel, state.destBlendFactor); if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) { @@ -2037,7 +2109,7 @@ namespace sw oC.y *= sourceFactor.y; oC.z *= sourceFactor.z; } - + if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) { pixel.x *= destFactor.x; @@ -2089,14 +2161,14 @@ namespace sw ASSERT(false); } - blendFactorAlpha(r, sourceFactor, oC, pixel, state.sourceBlendFactorAlpha); - blendFactorAlpha(r, destFactor, oC, pixel, state.destBlendFactorAlpha); + blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha); + blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha); if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO) { oC.w *= sourceFactor.w; } - + if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO) { pixel.w *= destFactor.w; @@ -2114,10 +2186,10 @@ namespace sw pixel.w -= oC.w; oC.w = pixel.w; break; - case BLENDOP_MIN: + case BLENDOP_MIN: oC.w = Min(oC.w, pixel.w); break; - case BLENDOP_MAX: + case BLENDOP_MAX: oC.w = Max(oC.w, pixel.w); break; case BLENDOP_SOURCE: @@ -2134,19 +2206,38 @@ namespace sw } } - void PixelRoutine::writeColor(Registers &r, int index, Pointer &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask) + void PixelRoutine::writeColor(int index, Pointer &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask) { switch(state.targetFormat[index]) { case FORMAT_R32F: + case FORMAT_R32I: + case FORMAT_R32UI: + case FORMAT_R16I: + case FORMAT_R16UI: + case FORMAT_R8I: + case FORMAT_R8UI: break; case FORMAT_G32R32F: + case FORMAT_G32R32I: + case FORMAT_G32R32UI: + case FORMAT_G16R16I: + case FORMAT_G16R16UI: + case FORMAT_G8R8I: + case FORMAT_G8R8UI: oC.z = oC.x; oC.x = UnpackLow(oC.x, oC.y); oC.z = UnpackHigh(oC.z, oC.y); oC.y = oC.z; break; + case FORMAT_X32B32G32R32F: case FORMAT_A32B32G32R32F: + case FORMAT_A32B32G32R32I: + case FORMAT_A32B32G32R32UI: + case FORMAT_A16B16G16R16I: + case FORMAT_A16B16G16R16UI: + case FORMAT_A8B8G8R8I: + case FORMAT_A8B8G8R8UI: transpose4x4(oC.x, oC.y, oC.z, oC.w); break; default: @@ -2177,6 +2268,8 @@ namespace sw switch(state.targetFormat[index]) { case FORMAT_R32F: + case FORMAT_R32I: + case FORMAT_R32UI: if(rgbaWriteMask & 0x00000001) { buffer = cBuffer + 4 * x; @@ -2185,28 +2278,109 @@ namespace sw value.x = *Pointer(buffer + 0); value.y = *Pointer(buffer + 4); - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); // FIXME: movhps value.z = *Pointer(buffer + 0); value.w = *Pointer(buffer + 4); - oC.x = As(As(oC.x) & *Pointer(r.constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); - value = As(As(value) & *Pointer(r.constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); + oC.x = As(As(oC.x) & *Pointer(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16)); oC.x = As(As(oC.x) | As(value)); // FIXME: movhps *Pointer(buffer + 0) = oC.x.z; *Pointer(buffer + 4) = oC.x.w; - buffer -= *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer -= *Pointer(data + OFFSET(DrawData,colorPitchB[index])); // FIXME: movlps *Pointer(buffer + 0) = oC.x.x; *Pointer(buffer + 4) = oC.x.y; } break; + case FORMAT_R16I: + case FORMAT_R16UI: + if(rgbaWriteMask & 0x00000001) + { + buffer = cBuffer + 2 * x; + + UShort4 xyzw; + xyzw = As(Insert(As(xyzw), *Pointer(buffer), 0)); + + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + + xyzw = As(Insert(As(xyzw), *Pointer(buffer), 1)); + value = As(Int4(xyzw)); + + oC.x = As(As(oC.x) & *Pointer(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16)); + oC.x = As(As(oC.x) | As(value)); + + if(state.targetFormat[index] == FORMAT_R16I) + { + Float component = oC.x.z; + *Pointer(buffer + 0) = Short(As(component)); + component = oC.x.w; + *Pointer(buffer + 2) = Short(As(component)); + + buffer -= *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + + component = oC.x.x; + *Pointer(buffer + 0) = Short(As(component)); + component = oC.x.y; + *Pointer(buffer + 2) = Short(As(component)); + } + else // FORMAT_R16UI + { + Float component = oC.x.z; + *Pointer(buffer + 0) = UShort(As(component)); + component = oC.x.w; + *Pointer(buffer + 2) = UShort(As(component)); + + buffer -= *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + + component = oC.x.x; + *Pointer(buffer + 0) = UShort(As(component)); + component = oC.x.y; + *Pointer(buffer + 2) = UShort(As(component)); + } + } + break; + case FORMAT_R8I: + case FORMAT_R8UI: + if(rgbaWriteMask & 0x00000001) + { + buffer = cBuffer + x; + + UInt xyzw, packedCol; + + xyzw = UInt(*Pointer(buffer)) & 0xFFFF; + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + xyzw |= UInt(*Pointer(buffer)) << 16; + + Short4 tmpCol = Short4(As(oC.x)); + if(state.targetFormat[index] == FORMAT_R8I) + { + tmpCol = As(Pack(tmpCol, tmpCol)); + } + else + { + tmpCol = As(Pack(As(tmpCol), As(tmpCol))); + } + packedCol = Extract(As(tmpCol), 0); + + packedCol = (packedCol & *Pointer(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) | + (xyzw & *Pointer(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask)); + + *Pointer(buffer) = UShort(packedCol >> 16); + buffer -= *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + *Pointer(buffer) = UShort(packedCol); + } + break; case FORMAT_G32R32F: + case FORMAT_G32R32I: + case FORMAT_G32R32UI: buffer = cBuffer + 8 * x; value = *Pointer(buffer); @@ -2214,17 +2388,17 @@ namespace sw if((rgbaWriteMask & 0x00000003) != 0x00000003) { Float4 masked = value; - oC.x = As(As(oC.x) & *Pointer(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); - masked = As(As(masked) & *Pointer(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0]))); + oC.x = As(As(oC.x) & *Pointer(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); + masked = As(As(masked) & *Pointer(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); oC.x = As(As(oC.x) | As(masked)); } - oC.x = As(As(oC.x) & *Pointer(r.constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); - value = As(As(value) & *Pointer(r.constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); + oC.x = As(As(oC.x) & *Pointer(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16)); oC.x = As(As(oC.x) | As(value)); *Pointer(buffer) = oC.x; - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); value = *Pointer(buffer); @@ -2233,17 +2407,86 @@ namespace sw Float4 masked; masked = value; - oC.y = As(As(oC.y) & *Pointer(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); - masked = As(As(masked) & *Pointer(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0]))); + oC.y = As(As(oC.y) & *Pointer(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0]))); + masked = As(As(masked) & *Pointer(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0]))); oC.y = As(As(oC.y) | As(masked)); } - oC.y = As(As(oC.y) & *Pointer(r.constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); - value = As(As(value) & *Pointer(r.constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); + oC.y = As(As(oC.y) & *Pointer(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16)); oC.y = As(As(oC.y) | As(value)); *Pointer(buffer) = oC.y; break; + case FORMAT_G16R16I: + case FORMAT_G16R16UI: + if((rgbaWriteMask & 0x00000003) != 0x0) + { + buffer = cBuffer + 4 * x; + + UInt2 rgbaMask; + UShort4 packedCol = UShort4(As(oC.x)); + UShort4 value = *Pointer(buffer); + UInt2 mergedMask = *Pointer(constants + OFFSET(Constants, maskD01Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + Int tmpMask = *Pointer(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0])); + rgbaMask = As(Int2(tmpMask, tmpMask)); + mergedMask &= rgbaMask; + } + *Pointer(buffer) = (As(packedCol) & mergedMask) | (As(value) & ~mergedMask); + + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + + packedCol = UShort4(As(oC.y)); + value = *Pointer(buffer); + mergedMask = *Pointer(constants + OFFSET(Constants, maskD23Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + mergedMask &= rgbaMask; + } + *Pointer(buffer) = (As(packedCol) & mergedMask) | (As(value) & ~mergedMask); + } + break; + case FORMAT_G8R8I: + case FORMAT_G8R8UI: + if((rgbaWriteMask & 0x00000003) != 0x0) + { + buffer = cBuffer + 2 * x; + + Int2 xyzw, packedCol; + + xyzw = Insert(xyzw, *Pointer(buffer), 0); + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + xyzw = Insert(xyzw, *Pointer(buffer), 1); + + if(state.targetFormat[index] == FORMAT_G8R8I) + { + packedCol = As(Pack(Short4(As(oC.x)), Short4(As(oC.y)))); + } + else + { + packedCol = As(Pack(UShort4(As(oC.x)), UShort4(As(oC.y)))); + } + + UInt2 mergedMask = *Pointer(constants + OFFSET(Constants, maskW4Q) + xMask * 8); + if((rgbaWriteMask & 0x3) != 0x3) + { + Int tmpMask = *Pointer(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0])); + UInt2 rgbaMask = As(Int2(tmpMask, tmpMask)); + mergedMask &= rgbaMask; + } + + packedCol = As((As(packedCol) & mergedMask) | (As(xyzw) & ~mergedMask)); + + *Pointer(buffer) = As(Extract(packedCol, 1)); + buffer -= *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + *Pointer(buffer) = As(Extract(packedCol, 0)); + } + break; + case FORMAT_X32B32G32R32F: case FORMAT_A32B32G32R32F: + case FORMAT_A32B32G32R32I: + case FORMAT_A32B32G32R32UI: buffer = cBuffer + 16 * x; { @@ -2252,13 +2495,13 @@ namespace sw if(rgbaWriteMask != 0x0000000F) { Float4 masked = value; - oC.x = As(As(oC.x) & *Pointer(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As(As(masked) & *Pointer(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); + oC.x = As(As(oC.x) & *Pointer(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + masked = As(As(masked) & *Pointer(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); oC.x = As(As(oC.x) | As(masked)); } - - oC.x = As(As(oC.x) & *Pointer(r.constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); - value = As(As(value) & *Pointer(r.constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); + + oC.x = As(As(oC.x) & *Pointer(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16)); oC.x = As(As(oC.x) | As(value)); *Pointer(buffer, 16) = oC.x; } @@ -2267,20 +2510,20 @@ namespace sw value = *Pointer(buffer + 16, 16); if(rgbaWriteMask != 0x0000000F) - { + { Float4 masked = value; - oC.y = As(As(oC.y) & *Pointer(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As(As(masked) & *Pointer(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); + oC.y = As(As(oC.y) & *Pointer(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + masked = As(As(masked) & *Pointer(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); oC.y = As(As(oC.y) | As(masked)); } - oC.y = As(As(oC.y) & *Pointer(r.constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); - value = As(As(value) & *Pointer(r.constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); + oC.y = As(As(oC.y) & *Pointer(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16)); oC.y = As(As(oC.y) | As(value)); *Pointer(buffer + 16, 16) = oC.y; } - buffer += *Pointer(r.data + OFFSET(DrawData,colorPitchB[index])); + buffer += *Pointer(data + OFFSET(DrawData,colorPitchB[index])); { value = *Pointer(buffer, 16); @@ -2288,13 +2531,13 @@ namespace sw if(rgbaWriteMask != 0x0000000F) { Float4 masked = value; - oC.z = As(As(oC.z) & *Pointer(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As(As(masked) & *Pointer(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); + oC.z = As(As(oC.z) & *Pointer(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + masked = As(As(masked) & *Pointer(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); oC.z = As(As(oC.z) | As(masked)); } - oC.z = As(As(oC.z) & *Pointer(r.constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); - value = As(As(value) & *Pointer(r.constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); + oC.z = As(As(oC.z) & *Pointer(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16)); oC.z = As(As(oC.z) | As(value)); *Pointer(buffer, 16) = oC.z; } @@ -2305,17 +2548,90 @@ namespace sw if(rgbaWriteMask != 0x0000000F) { Float4 masked = value; - oC.w = As(As(oC.w) & *Pointer(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); - masked = As(As(masked) & *Pointer(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); + oC.w = As(As(oC.w) & *Pointer(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0]))); + masked = As(As(masked) & *Pointer(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0]))); oC.w = As(As(oC.w) | As(masked)); } - oC.w = As(As(oC.w) & *Pointer(r.constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); - value = As(As(value) & *Pointer(r.constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); + oC.w = As(As(oC.w) & *Pointer(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16)); + value = As(As(value) & *Pointer(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16)); oC.w = As(As(oC.w) | As(value)); *Pointer(buffer + 16, 16) = oC.w; } break; + case FORMAT_A16B16G16R16I: + case FORMAT_A16B16G16R16UI: + if((rgbaWriteMask & 0x0000000F) != 0x0) + { + buffer = cBuffer + 8 * x; + + UInt4 rgbaMask; + UShort8 value = *Pointer(buffer); + UShort8 packedCol = UShort8(UShort4(As(oC.x)), UShort4(As(oC.y))); + UInt4 mergedMask = *Pointer(constants + OFFSET(Constants, maskQ01X) + xMask * 16); + if((rgbaWriteMask & 0xF) != 0xF) + { + UInt2 tmpMask = *Pointer(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0])); + rgbaMask = UInt4(tmpMask, tmpMask); + mergedMask &= rgbaMask; + } + *Pointer(buffer) = (As(packedCol) & mergedMask) | (As(value) & ~mergedMask); + + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + + value = *Pointer(buffer); + packedCol = UShort8(UShort4(As(oC.z)), UShort4(As(oC.w))); + mergedMask = *Pointer(constants + OFFSET(Constants, maskQ23X) + xMask * 16); + if((rgbaWriteMask & 0xF) != 0xF) + { + mergedMask &= rgbaMask; + } + *Pointer(buffer) = (As(packedCol) & mergedMask) | (As(value) & ~mergedMask); + } + break; + case FORMAT_A8B8G8R8I: + case FORMAT_A8B8G8R8UI: + if((rgbaWriteMask & 0x0000000F) != 0x0) + { + UInt2 value, packedCol, mergedMask; + + buffer = cBuffer + 4 * x; + + if(state.targetFormat[index] == FORMAT_A8B8G8R8I) + { + packedCol = As(Pack(Short4(As(oC.x)), Short4(As(oC.y)))); + } + else + { + packedCol = As(Pack(UShort4(As(oC.x)), UShort4(As(oC.y)))); + } + value = *Pointer(buffer, 16); + mergedMask = *Pointer(constants + OFFSET(Constants, maskD01Q) + xMask * 8); + if(rgbaWriteMask != 0xF) + { + mergedMask &= *Pointer(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); + } + *Pointer(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); + + buffer += *Pointer(data + OFFSET(DrawData, colorPitchB[index])); + + if(state.targetFormat[index] == FORMAT_A8B8G8R8I) + { + packedCol = As(Pack(Short4(As(oC.z)), Short4(As(oC.w)))); + } + else + { + packedCol = As(Pack(UShort4(As(oC.z)), UShort4(As(oC.w)))); + } + value = *Pointer(buffer, 16); + mergedMask = *Pointer(constants + OFFSET(Constants, maskD23Q) + xMask * 8); + if(rgbaWriteMask != 0xF) + { + mergedMask &= *Pointer(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0])); + } + *Pointer(buffer) = (packedCol & mergedMask) | (value & ~mergedMask); + } + break; default: ASSERT(false); } @@ -2326,18 +2642,18 @@ namespace sw return UShort4(cf * Float4(0xFFFF), saturate); } - void PixelRoutine::sRGBtoLinear16_12_16(Registers &r, Vector4s &c) + void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c) { c.x = As(c.x) >> 4; c.y = As(c.y) >> 4; c.z = As(c.z) >> 4; - sRGBtoLinear12_16(r, c); + sRGBtoLinear12_16(c); } - void PixelRoutine::sRGBtoLinear12_16(Registers &r, Vector4s &c) + void PixelRoutine::sRGBtoLinear12_16(Vector4s &c) { - Pointer LUT = r.constants + OFFSET(Constants,sRGBtoLinear12_16); + Pointer LUT = constants + OFFSET(Constants,sRGBtoLinear12_16); c.x = Insert(c.x, *Pointer(LUT + 2 * Int(Extract(c.x, 0))), 0); c.x = Insert(c.x, *Pointer(LUT + 2 * Int(Extract(c.x, 1))), 1); @@ -2355,18 +2671,18 @@ namespace sw c.z = Insert(c.z, *Pointer(LUT + 2 * Int(Extract(c.z, 3))), 3); } - void PixelRoutine::linearToSRGB16_12_16(Registers &r, Vector4s &c) + void PixelRoutine::linearToSRGB16_12_16(Vector4s &c) { c.x = As(c.x) >> 4; c.y = As(c.y) >> 4; c.z = As(c.z) >> 4; - linearToSRGB12_16(r, c); + linearToSRGB12_16(c); } - void PixelRoutine::linearToSRGB12_16(Registers &r, Vector4s &c) + void PixelRoutine::linearToSRGB12_16(Vector4s &c) { - Pointer LUT = r.constants + OFFSET(Constants,linearToSRGB12_16); + Pointer LUT = constants + OFFSET(Constants,linearToSRGB12_16); c.x = Insert(c.x, *Pointer(LUT + 2 * Int(Extract(c.x, 0))), 0); c.x = Insert(c.x, *Pointer(LUT + 2 * Int(Extract(c.x, 1))), 1);