OSDN Git Service

Add support for D16_UNORM depth test & write
authorChris Forbes <chrisforbes@google.com>
Tue, 12 Mar 2019 21:50:55 +0000 (14:50 -0700)
committerChris Forbes <chrisforbes@google.com>
Wed, 13 Mar 2019 04:04:26 +0000 (04:04 +0000)
D16_UNORM depth operations are performed in fixed point to avoid quirks
of ushort <-> float roundtrip

Some slight oddities to work around missing primitives:
- We only have CmpEQ, CmpGT. This matches SSE hardware reality.
  Everything else is made out of negations and operand swaps.
- We only have *signed* CmpGT. Make an unsigned CmpGT by biasing both
  16-bit operands by -0x8000.
- In non-quad-layout we don't have a good way to separate .xy and .zw
  halfs; reading via masked short4 reads, writing via individual
  component writes.

Bug: b/128363727
Test: dEQP-VK.pipeline.depth.format.*
Change-Id: I9c8b249470a9c91589c91135988dc4d1a58bbc5f
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/27030
Tested-by: Chris Forbes <chrisforbes@google.com>
Presubmit-Ready: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
src/Pipeline/PixelRoutine.cpp
src/Pipeline/PixelRoutine.hpp

index b27fab9..6c528d9 100644 (file)
@@ -363,13 +363,8 @@ namespace sw
                }
        }
 
-       Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
+       Bool PixelRoutine::depthTest32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
        {
-               if(!state.depthTestActive)
-               {
-                       return true;
-               }
-
                Float4 Z = z;
 
                if(spirvShader && spirvShader->getModes().DepthReplacing)
@@ -464,6 +459,119 @@ namespace sw
                return zMask != 0;
        }
 
+       Bool PixelRoutine::depthTest16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
+       {
+               Short4 Z = convertFixed16(z, true);
+
+               if(spirvShader && spirvShader->getModes().DepthReplacing)
+               {
+                       Z = convertFixed16(oDepth, true);
+               }
+
+               Pointer<Byte> buffer;
+               Int pitch;
+
+               if(!state.quadLayoutDepthBuffer)
+               {
+                       buffer = zBuffer + 2 * x;
+                       pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+               }
+               else
+               {
+                       buffer = zBuffer + 4 * x;
+               }
+
+               if(q > 0)
+               {
+                       buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+               }
+
+               Short4 zValue;
+
+               if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
+               {
+                       if(!state.quadLayoutDepthBuffer)
+                       {
+                               // FIXME: Properly optimizes?
+                               zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
+                               zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
+                       }
+                       else
+                       {
+                               zValue = *Pointer<Short4>(buffer, 8);
+                       }
+               }
+
+               Int4 zTest;
+
+               // Bias values to make unsigned compares out of Reactor's (due SSE's) signed compares only
+               zValue = zValue - Short4(0x8000);
+               Z = Z - Short4(0x8000);
+
+               switch(state.depthCompareMode)
+               {
+               case VK_COMPARE_OP_ALWAYS:
+                       // Optimized
+                       break;
+               case VK_COMPARE_OP_NEVER:
+                       // Optimized
+                       break;
+               case VK_COMPARE_OP_EQUAL:
+                       zTest = Int4(CmpEQ(zValue, Z));
+                       break;
+               case VK_COMPARE_OP_NOT_EQUAL:
+                       zTest = ~Int4(CmpEQ(zValue, Z));
+                       break;
+               case VK_COMPARE_OP_LESS:
+                       zTest = Int4(CmpGT(zValue, Z));
+                       break;
+               case VK_COMPARE_OP_GREATER_OR_EQUAL:
+                       zTest = ~Int4(CmpGT(zValue, Z));
+                       break;
+               case VK_COMPARE_OP_LESS_OR_EQUAL:
+                       zTest = ~Int4(CmpGT(Z, zValue));
+                       break;
+               case VK_COMPARE_OP_GREATER:
+                       zTest = Int4(CmpGT(Z, zValue));
+                       break;
+               default:
+                       ASSERT(false);
+               }
+
+               switch(state.depthCompareMode)
+               {
+               case VK_COMPARE_OP_ALWAYS:
+                       zMask = cMask;
+                       break;
+               case VK_COMPARE_OP_NEVER:
+                       zMask = 0x0;
+                       break;
+               default:
+                       zMask = SignMask(zTest) & cMask;
+                       break;
+               }
+
+               if(state.stencilActive)
+               {
+                       zMask &= sMask;
+               }
+
+               return zMask != 0;
+       }
+
+       Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
+       {
+               if(!state.depthTestActive)
+               {
+                       return true;
+               }
+
+               if (state.depthFormat == VK_FORMAT_D16_UNORM)
+                       return depthTest16(zBuffer, q, x, z, sMask, zMask, cMask);
+               else
+                       return depthTest32F(zBuffer, q, x, z, sMask, zMask, cMask);
+       }
+
        void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
        {
                Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
@@ -482,13 +590,8 @@ namespace sw
                cMask[3] &= aMask3;
        }
 
-       void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
+       void PixelRoutine::writeDepth32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
        {
-               if(!state.depthWriteEnable)
-               {
-                       return;
-               }
-
                Float4 Z = z;
 
                if(spirvShader && spirvShader->getModes().DepthReplacing)
@@ -546,6 +649,80 @@ namespace sw
                }
        }
 
+       void PixelRoutine::writeDepth16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
+       {
+               Short4 Z = As<Short4>(convertFixed16(z, true));
+
+               if(spirvShader && spirvShader->getModes().DepthReplacing)
+               {
+                       Z = As<Short4>(convertFixed16(oDepth, true));
+               }
+
+               Pointer<Byte> buffer;
+               Int pitch;
+
+               if(!state.quadLayoutDepthBuffer)
+               {
+                       buffer = zBuffer + 2 * x;
+                       pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
+               }
+               else
+               {
+                       buffer = zBuffer + 4 * x;
+               }
+
+               if(q > 0)
+               {
+                       buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
+               }
+
+               Short4 zValue;
+
+               if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
+               {
+                       if(!state.quadLayoutDepthBuffer)
+                       {
+                               // FIXME: Properly optimizes?
+                               zValue = *Pointer<Short4>(buffer) & Short4(-1, -1, 0, 0);
+                               zValue = zValue | (*Pointer<Short4>(buffer + pitch - 4) & Short4(0, 0, -1, -1));
+                       }
+                       else
+                       {
+                               zValue = *Pointer<Short4>(buffer, 8);
+                       }
+               }
+
+               Z = Z & *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q) + zMask * 8, 8);
+               zValue = zValue & *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q) + zMask * 8, 8);
+               Z = Z | zValue;
+
+               if(!state.quadLayoutDepthBuffer)
+               {
+                       // FIXME: Properly optimizes?
+                       *Pointer<Short>(buffer) = Extract(Z, 0);
+                       *Pointer<Short>(buffer+2) = Extract(Z, 1);
+                       *Pointer<Short>(buffer+pitch) = Extract(Z, 2);
+                       *Pointer<Short>(buffer+pitch+2) = Extract(Z, 3);
+               }
+               else
+               {
+                       *Pointer<Short4>(buffer, 8) = Z;
+               }
+       }
+
+       void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
+       {
+               if(!state.depthWriteEnable)
+               {
+                       return;
+               }
+
+               if (state.depthFormat == VK_FORMAT_D16_UNORM)
+                       writeDepth16(zBuffer, q, x, z, zMask);
+               else
+                       writeDepth32F(zBuffer, q, x, z, zMask);
+       }
+
        void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
        {
                if(!state.stencilActive)
index 7b8f780..ff2044f 100644 (file)
@@ -82,6 +82,12 @@ namespace sw
                void linearToSRGB16_12_16(Vector4s &c);
                Float4 sRGBtoLinear(const Float4 &x);
 
+               Bool depthTest32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask);
+               Bool depthTest16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask);
+
+               void writeDepth32F(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask);
+               void writeDepth16(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask);
+
                bool colorUsed();
        };
 }