1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "PixelRoutine.hpp"
17 #include "SamplerCore.hpp"
18 #include "Constants.hpp"
19 #include "Device/Renderer.hpp"
20 #include "Device/QuadRasterizer.hpp"
21 #include "Device/Surface.hpp"
22 #include "Device/Primitive.hpp"
23 #include "System/Debug.hpp"
27 extern bool complementaryDepthBuffer;
28 extern bool postBlendSRGB;
29 extern bool exactColorRounding;
30 extern bool forceClearRegisters;
32 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader)
33 : QuadRasterizer(state, shader), v(shader && shader->indirectAddressableInput)
35 if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters)
37 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++)
39 v[i].x = Float4(0.0f);
40 v[i].y = Float4(0.0f);
41 v[i].z = Float4(0.0f);
42 v[i].w = Float4(0.0f);
47 PixelRoutine::~PixelRoutine()
51 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
54 Long pipeTime = Ticks();
57 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
59 Int zMask[4]; // Depth mask
60 Int sMask[4]; // Stencil mask
62 for(unsigned int q = 0; q < state.multiSample; q++)
68 for(unsigned int q = 0; q < state.multiSample; q++)
70 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
76 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
80 for(unsigned int q = 0; q < state.multiSample; q++)
84 if(state.multiSample > 1)
86 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
89 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
93 Bool depthPass = false;
97 for(unsigned int q = 0; q < state.multiSample; q++)
99 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
103 If(depthPass || Bool(!earlyDepthTest))
106 Long interpTime = Ticks();
109 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
111 // Centroid locations
112 Float4 XXXX = Float4(0.0f);
113 Float4 YYYY = Float4(0.0f);
117 Float4 WWWW(1.0e-9f);
119 for(unsigned int q = 0; q < state.multiSample; q++)
121 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
122 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
123 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
136 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
137 rhw = reciprocal(w, false, false, true);
141 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
145 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
147 for(int component = 0; component < 4; component++)
149 if(state.interpolant[interpolant].component & (1 << component))
151 if(!state.interpolant[interpolant].centroid)
153 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective, false);
157 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
164 switch(state.interpolant[interpolant].project)
169 rcp = reciprocal(v[interpolant].y);
170 v[interpolant].x = v[interpolant].x * rcp;
173 rcp = reciprocal(v[interpolant].z);
174 v[interpolant].x = v[interpolant].x * rcp;
175 v[interpolant].y = v[interpolant].y * rcp;
178 rcp = reciprocal(v[interpolant].w);
179 v[interpolant].x = v[interpolant].x * rcp;
180 v[interpolant].y = v[interpolant].y * rcp;
181 v[interpolant].z = v[interpolant].z * rcp;
186 if(state.fog.component)
188 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective, false);
191 setBuiltins(x, y, z, w);
194 cycles[PERF_INTERP] += Ticks() - interpTime;
197 Bool alphaPass = true;
202 Long shaderTime = Ticks();
208 cycles[PERF_SHADER] += Ticks() - shaderTime;
211 alphaPass = alphaTest(cMask);
213 if((shader && shader->containsKill()) || state.alphaTestActive())
215 for(unsigned int q = 0; q < state.multiSample; q++)
217 zMask[q] &= cMask[q];
218 sMask[q] &= cMask[q];
227 for(unsigned int q = 0; q < state.multiSample; q++)
229 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
234 Long ropTime = Ticks();
237 If(depthPass || Bool(earlyDepthTest))
239 for(unsigned int q = 0; q < state.multiSample; q++)
241 if(state.multiSampleMask & (1 << q))
243 writeDepth(zBuffer, q, x, z[q], zMask[q]);
245 if(state.occlusionEnabled)
247 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
255 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
258 rasterOperation(f, cBuffer, x, sMask, zMask, cMask);
263 cycles[PERF_ROP] += Ticks() - ropTime;
268 for(unsigned int q = 0; q < state.multiSample; q++)
270 if(state.multiSampleMask & (1 << q))
272 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
277 cycles[PERF_PIPE] += Ticks() - pipeTime;
281 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
283 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
287 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
288 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
299 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
301 if(!state.stencilActive)
306 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
308 Pointer<Byte> buffer = sBuffer + 2 * x;
312 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
315 Byte8 value = *Pointer<Byte8>(buffer);
316 Byte8 valueCCW = value;
318 if(!state.noStencilMask)
320 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
323 stencilTest(value, state.stencilCompareMode, false);
325 if(state.twoSidedStencil)
327 if(!state.noStencilMaskCCW)
329 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
332 stencilTest(valueCCW, state.stencilCompareModeCCW, true);
334 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
335 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
339 sMask = SignMask(value) & cMask;
342 void PixelRoutine::stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool CCW)
346 switch(stencilCompareMode)
348 case VK_COMPARE_OP_ALWAYS:
349 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
351 case VK_COMPARE_OP_NEVER:
352 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
354 case VK_COMPARE_OP_LESS: // a < b ~ b > a
355 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
356 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
358 case VK_COMPARE_OP_EQUAL:
359 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
361 case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b)
362 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
363 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
365 case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ (b > a) || (a == b)
367 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
368 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
369 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
372 case VK_COMPARE_OP_GREATER: // a > b
373 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
374 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
375 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
378 case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ !(a < b) ~ !(b > a)
379 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
380 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
381 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
388 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
390 if(!state.depthTestActive)
397 if(shader && shader->depthOverride())
399 if(complementaryDepthBuffer)
401 Z = Float4(1.0f) - oDepth;
409 Pointer<Byte> buffer;
412 if(!state.quadLayoutDepthBuffer)
414 buffer = zBuffer + 4 * x;
415 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
419 buffer = zBuffer + 8 * x;
424 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
429 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
431 if(!state.quadLayoutDepthBuffer)
433 // FIXME: Properly optimizes?
434 zValue.xy = *Pointer<Float4>(buffer);
435 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
439 zValue = *Pointer<Float4>(buffer, 16);
445 switch(state.depthCompareMode)
447 case VK_COMPARE_OP_ALWAYS:
450 case VK_COMPARE_OP_NEVER:
453 case VK_COMPARE_OP_EQUAL:
454 zTest = CmpEQ(zValue, Z);
456 case VK_COMPARE_OP_NOT_EQUAL:
457 zTest = CmpNEQ(zValue, Z);
459 case VK_COMPARE_OP_LESS:
460 if(complementaryDepthBuffer)
462 zTest = CmpLT(zValue, Z);
466 zTest = CmpNLE(zValue, Z);
469 case VK_COMPARE_OP_GREATER_OR_EQUAL:
470 if(complementaryDepthBuffer)
472 zTest = CmpNLT(zValue, Z);
476 zTest = CmpLE(zValue, Z);
479 case VK_COMPARE_OP_LESS_OR_EQUAL:
480 if(complementaryDepthBuffer)
482 zTest = CmpLE(zValue, Z);
486 zTest = CmpNLT(zValue, Z);
489 case VK_COMPARE_OP_GREATER:
490 if(complementaryDepthBuffer)
492 zTest = CmpNLE(zValue, Z);
496 zTest = CmpLT(zValue, Z);
503 switch(state.depthCompareMode)
505 case VK_COMPARE_OP_ALWAYS:
508 case VK_COMPARE_OP_NEVER:
512 zMask = SignMask(zTest) & cMask;
516 if(state.stencilActive)
524 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha)
529 switch(state.alphaCompareMode)
531 case VK_COMPARE_OP_ALWAYS:
534 case VK_COMPARE_OP_NEVER:
537 case VK_COMPARE_OP_EQUAL:
538 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
539 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
541 case VK_COMPARE_OP_NOT_EQUAL: // a != b ~ !(a == b)
542 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
543 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
545 case VK_COMPARE_OP_LESS: // a < b ~ b > a
546 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
547 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
549 case VK_COMPARE_OP_GREATER_OR_EQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
550 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
551 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
553 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
555 case VK_COMPARE_OP_LESS_OR_EQUAL: // a <= b ~ !(a > b)
556 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
557 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
559 case VK_COMPARE_OP_GREATER: // a > b
560 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
561 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
568 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
570 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
571 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
572 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
573 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
575 Int aMask0 = SignMask(coverage0);
576 Int aMask1 = SignMask(coverage1);
577 Int aMask2 = SignMask(coverage2);
578 Int aMask3 = SignMask(coverage3);
586 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
588 if(!state.depthWriteEnable)
595 if(shader && shader->depthOverride())
597 if(complementaryDepthBuffer)
599 Z = Float4(1.0f) - oDepth;
607 Pointer<Byte> buffer;
610 if(!state.quadLayoutDepthBuffer)
612 buffer = zBuffer + 4 * x;
613 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
617 buffer = zBuffer + 8 * x;
622 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
627 if(state.depthCompareMode != VK_COMPARE_OP_NEVER || (state.depthCompareMode != VK_COMPARE_OP_ALWAYS && !state.depthWriteEnable))
629 if(!state.quadLayoutDepthBuffer)
631 // FIXME: Properly optimizes?
632 zValue.xy = *Pointer<Float4>(buffer);
633 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
637 zValue = *Pointer<Float4>(buffer, 16);
641 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
642 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
643 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
645 if(!state.quadLayoutDepthBuffer)
647 // FIXME: Properly optimizes?
648 *Pointer<Float2>(buffer) = Float2(Z.xy);
649 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
653 *Pointer<Float4>(buffer, 16) = Z;
657 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
659 if(!state.stencilActive)
664 if(state.stencilPassOperation == VK_STENCIL_OP_KEEP && state.stencilZFailOperation == VK_STENCIL_OP_KEEP && state.stencilFailOperation == VK_STENCIL_OP_KEEP)
666 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == VK_STENCIL_OP_KEEP && state.stencilZFailOperationCCW == VK_STENCIL_OP_KEEP && state.stencilFailOperationCCW == VK_STENCIL_OP_KEEP))
672 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
677 Pointer<Byte> buffer = sBuffer + 2 * x;
681 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
684 Byte8 bufferValue = *Pointer<Byte8>(buffer);
687 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
689 if(!state.noStencilWriteMask)
691 Byte8 maskedValue = bufferValue;
692 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
693 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
694 newValue |= maskedValue;
697 if(state.twoSidedStencil)
701 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
703 if(!state.noStencilWriteMaskCCW)
705 Byte8 maskedValue = bufferValue;
706 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
707 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
708 newValueCCW |= maskedValue;
711 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
712 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
713 newValue |= newValueCCW;
716 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
717 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
718 newValue |= bufferValue;
720 *Pointer<Byte4>(buffer) = Byte4(newValue);
723 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, VkStencilOp stencilPassOperation, VkStencilOp stencilZFailOperation, VkStencilOp stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
725 Byte8 &pass = newValue;
729 stencilOperation(pass, bufferValue, stencilPassOperation, CCW);
731 if(stencilZFailOperation != stencilPassOperation)
733 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW);
736 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
738 stencilOperation(fail, bufferValue, stencilFailOperation, CCW);
741 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
743 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
745 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
746 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
750 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
751 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
756 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, VkStencilOp operation, bool CCW)
760 case VK_STENCIL_OP_KEEP:
761 output = bufferValue;
763 case VK_STENCIL_OP_ZERO:
764 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
766 case VK_STENCIL_OP_REPLACE:
767 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ));
769 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
770 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
772 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
773 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
775 case VK_STENCIL_OP_INVERT:
776 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
778 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
779 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
781 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
782 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
789 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, VkBlendFactor blendFactorActive)
791 switch(blendFactorActive)
793 case VK_BLEND_FACTOR_ZERO:
796 case VK_BLEND_FACTOR_ONE:
799 case VK_BLEND_FACTOR_SRC_COLOR:
800 blendFactor.x = current.x;
801 blendFactor.y = current.y;
802 blendFactor.z = current.z;
804 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
805 blendFactor.x = Short4(0xFFFFu) - current.x;
806 blendFactor.y = Short4(0xFFFFu) - current.y;
807 blendFactor.z = Short4(0xFFFFu) - current.z;
809 case VK_BLEND_FACTOR_DST_COLOR:
810 blendFactor.x = pixel.x;
811 blendFactor.y = pixel.y;
812 blendFactor.z = pixel.z;
814 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
815 blendFactor.x = Short4(0xFFFFu) - pixel.x;
816 blendFactor.y = Short4(0xFFFFu) - pixel.y;
817 blendFactor.z = Short4(0xFFFFu) - pixel.z;
819 case VK_BLEND_FACTOR_SRC_ALPHA:
820 blendFactor.x = current.w;
821 blendFactor.y = current.w;
822 blendFactor.z = current.w;
824 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
825 blendFactor.x = Short4(0xFFFFu) - current.w;
826 blendFactor.y = Short4(0xFFFFu) - current.w;
827 blendFactor.z = Short4(0xFFFFu) - current.w;
829 case VK_BLEND_FACTOR_DST_ALPHA:
830 blendFactor.x = pixel.w;
831 blendFactor.y = pixel.w;
832 blendFactor.z = pixel.w;
834 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
835 blendFactor.x = Short4(0xFFFFu) - pixel.w;
836 blendFactor.y = Short4(0xFFFFu) - pixel.w;
837 blendFactor.z = Short4(0xFFFFu) - pixel.w;
839 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
840 blendFactor.x = Short4(0xFFFFu) - pixel.w;
841 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
842 blendFactor.y = blendFactor.x;
843 blendFactor.z = blendFactor.x;
845 case VK_BLEND_FACTOR_CONSTANT_COLOR:
846 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
847 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
848 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
850 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
851 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
852 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
853 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
855 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
856 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
857 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
858 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
860 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
861 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
862 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
863 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
870 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, VkBlendFactor blendFactorAlphaActive)
872 switch(blendFactorAlphaActive)
874 case VK_BLEND_FACTOR_ZERO:
877 case VK_BLEND_FACTOR_ONE:
880 case VK_BLEND_FACTOR_SRC_COLOR:
881 blendFactor.w = current.w;
883 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
884 blendFactor.w = Short4(0xFFFFu) - current.w;
886 case VK_BLEND_FACTOR_DST_COLOR:
887 blendFactor.w = pixel.w;
889 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
890 blendFactor.w = Short4(0xFFFFu) - pixel.w;
892 case VK_BLEND_FACTOR_SRC_ALPHA:
893 blendFactor.w = current.w;
895 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
896 blendFactor.w = Short4(0xFFFFu) - current.w;
898 case VK_BLEND_FACTOR_DST_ALPHA:
899 blendFactor.w = pixel.w;
901 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
902 blendFactor.w = Short4(0xFFFFu) - pixel.w;
904 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
905 blendFactor.w = Short4(0xFFFFu);
907 case VK_BLEND_FACTOR_CONSTANT_COLOR:
908 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
909 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
911 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
912 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
913 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
920 bool PixelRoutine::isSRGB(int index) const
922 return Surface::isSRGBformat(state.targetFormat[index]);
925 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
929 Pointer<Byte> buffer;
930 Pointer<Byte> buffer2;
932 switch(state.targetFormat[index])
934 case VK_FORMAT_R5G6B5_UNORM_PACK16:
935 buffer = cBuffer + 2 * x;
936 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
937 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
939 pixel.x = c01 & Short4(0xF800u);
940 pixel.y = (c01 & Short4(0x07E0u)) << 5;
941 pixel.z = (c01 & Short4(0x001Fu)) << 11;
942 pixel.w = Short4(0xFFFFu);
944 case VK_FORMAT_B8G8R8A8_UNORM:
945 buffer = cBuffer + 4 * x;
946 c01 = *Pointer<Short4>(buffer);
947 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
948 c23 = *Pointer<Short4>(buffer);
951 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
952 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
954 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
955 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
958 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
959 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
960 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
961 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
963 case VK_FORMAT_R8G8B8A8_UNORM:
964 case VK_FORMAT_R8G8B8A8_SRGB:
965 buffer = cBuffer + 4 * x;
966 c01 = *Pointer<Short4>(buffer);
967 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
968 c23 = *Pointer<Short4>(buffer);
971 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
972 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
974 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
975 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
978 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
979 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
980 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
981 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
983 case VK_FORMAT_R8_UNORM:
984 buffer = cBuffer + 1 * x;
985 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
986 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
987 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
988 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
989 pixel.y = Short4(0x0000);
990 pixel.z = Short4(0x0000);
991 pixel.w = Short4(0xFFFFu);
993 case VK_FORMAT_R8G8_UNORM:
994 buffer = cBuffer + 2 * x;
995 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
996 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
997 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
998 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
999 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
1000 pixel.z = Short4(0x0000u);
1001 pixel.w = Short4(0xFFFFu);
1003 case VK_FORMAT_R16G16B16A16_UNORM:
1005 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1006 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
1007 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1008 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1009 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1010 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
1012 case VK_FORMAT_R16G16_UNORM:
1014 pixel.x = *Pointer<Short4>(buffer + 4 * x);
1015 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1016 pixel.y = *Pointer<Short4>(buffer + 4 * x);
1018 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1019 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1021 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1022 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1023 pixel.z = Short4(0xFFFFu);
1024 pixel.w = Short4(0xFFFFu);
1030 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
1032 sRGBtoLinear16_12_16(pixel);
1036 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x)
1038 if(!state.alphaBlendActive)
1044 readPixel(index, cBuffer, x, pixel);
1046 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
1047 Vector4s sourceFactor;
1048 Vector4s destFactor;
1050 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1051 blendFactor(destFactor, current, pixel, state.destBlendFactor);
1053 if(state.sourceBlendFactor != VK_BLEND_FACTOR_ONE && state.sourceBlendFactor != VK_BLEND_FACTOR_ZERO)
1055 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1056 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1057 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
1060 if(state.destBlendFactor != VK_BLEND_FACTOR_ONE && state.destBlendFactor != VK_BLEND_FACTOR_ZERO)
1062 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1063 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1064 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
1067 switch(state.blendOperation)
1069 case VK_BLEND_OP_ADD:
1070 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1071 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1072 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1074 case VK_BLEND_OP_SUBTRACT:
1075 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1076 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1077 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
1079 case VK_BLEND_OP_REVERSE_SUBTRACT:
1080 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1081 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1082 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
1084 case VK_BLEND_OP_MIN:
1085 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1086 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1087 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
1089 case VK_BLEND_OP_MAX:
1090 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1091 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1092 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
1094 case VK_BLEND_OP_SRC_EXT:
1097 case VK_BLEND_OP_DST_EXT:
1098 current.x = pixel.x;
1099 current.y = pixel.y;
1100 current.z = pixel.z;
1102 case VK_BLEND_OP_ZERO_EXT:
1103 current.x = Short4(0x0000);
1104 current.y = Short4(0x0000);
1105 current.z = Short4(0x0000);
1111 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1112 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
1114 if(state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
1116 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
1119 if(state.destBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.destBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
1121 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
1124 switch(state.blendOperationAlpha)
1126 case VK_BLEND_OP_ADD:
1127 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1129 case VK_BLEND_OP_SUBTRACT:
1130 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
1132 case VK_BLEND_OP_REVERSE_SUBTRACT:
1133 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
1135 case VK_BLEND_OP_MIN:
1136 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
1138 case VK_BLEND_OP_MAX:
1139 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
1141 case VK_BLEND_OP_SRC_EXT:
1144 case VK_BLEND_OP_DST_EXT:
1145 current.w = pixel.w;
1147 case VK_BLEND_OP_ZERO_EXT:
1148 current.w = Short4(0x0000);
1155 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s ¤t, Int &x)
1157 if(state.logicalOperation == VK_LOGIC_OP_COPY)
1163 readPixel(index, cBuffer, x, pixel);
1165 switch(state.logicalOperation)
1167 case VK_LOGIC_OP_CLEAR:
1168 current.x = UShort4(0);
1169 current.y = UShort4(0);
1170 current.z = UShort4(0);
1172 case VK_LOGIC_OP_SET:
1173 current.x = UShort4(0xFFFFu);
1174 current.y = UShort4(0xFFFFu);
1175 current.z = UShort4(0xFFFFu);
1177 case VK_LOGIC_OP_COPY:
1178 ASSERT(false); // Optimized out
1180 case VK_LOGIC_OP_COPY_INVERTED:
1181 current.x = ~current.x;
1182 current.y = ~current.y;
1183 current.z = ~current.z;
1185 case VK_LOGIC_OP_NO_OP:
1186 current.x = pixel.x;
1187 current.y = pixel.y;
1188 current.z = pixel.z;
1190 case VK_LOGIC_OP_INVERT:
1191 current.x = ~pixel.x;
1192 current.y = ~pixel.y;
1193 current.z = ~pixel.z;
1195 case VK_LOGIC_OP_AND:
1196 current.x = pixel.x & current.x;
1197 current.y = pixel.y & current.y;
1198 current.z = pixel.z & current.z;
1200 case VK_LOGIC_OP_NAND:
1201 current.x = ~(pixel.x & current.x);
1202 current.y = ~(pixel.y & current.y);
1203 current.z = ~(pixel.z & current.z);
1205 case VK_LOGIC_OP_OR:
1206 current.x = pixel.x | current.x;
1207 current.y = pixel.y | current.y;
1208 current.z = pixel.z | current.z;
1210 case VK_LOGIC_OP_NOR:
1211 current.x = ~(pixel.x | current.x);
1212 current.y = ~(pixel.y | current.y);
1213 current.z = ~(pixel.z | current.z);
1215 case VK_LOGIC_OP_XOR:
1216 current.x = pixel.x ^ current.x;
1217 current.y = pixel.y ^ current.y;
1218 current.z = pixel.z ^ current.z;
1220 case VK_LOGIC_OP_EQUIVALENT:
1221 current.x = ~(pixel.x ^ current.x);
1222 current.y = ~(pixel.y ^ current.y);
1223 current.z = ~(pixel.z ^ current.z);
1225 case VK_LOGIC_OP_AND_REVERSE:
1226 current.x = ~pixel.x & current.x;
1227 current.y = ~pixel.y & current.y;
1228 current.z = ~pixel.z & current.z;
1230 case VK_LOGIC_OP_AND_INVERTED:
1231 current.x = pixel.x & ~current.x;
1232 current.y = pixel.y & ~current.y;
1233 current.z = pixel.z & ~current.z;
1235 case VK_LOGIC_OP_OR_REVERSE:
1236 current.x = ~pixel.x | current.x;
1237 current.y = ~pixel.y | current.y;
1238 current.z = ~pixel.z | current.z;
1240 case VK_LOGIC_OP_OR_INVERTED:
1241 current.x = pixel.x | ~current.x;
1242 current.y = pixel.y | ~current.y;
1243 current.z = pixel.z | ~current.z;
1250 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s ¤t, Int &sMask, Int &zMask, Int &cMask)
1252 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
1254 linearToSRGB16_12_16(current);
1257 if(exactColorRounding)
1259 switch(state.targetFormat[index])
1261 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1262 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1263 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1264 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
1266 case VK_FORMAT_B8G8R8A8_UNORM:
1267 case VK_FORMAT_R8G8B8A8_UNORM:
1268 case VK_FORMAT_R8G8B8A8_SRGB:
1269 case VK_FORMAT_R8G8_UNORM:
1270 case VK_FORMAT_R8_UNORM:
1271 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1272 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1273 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1274 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
1281 int rgbaWriteMask = state.colorWriteActive(index);
1282 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
1284 switch(state.targetFormat[index])
1286 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1288 current.x = current.x & Short4(0xF800u);
1289 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1290 current.z = As<UShort4>(current.z) >> 11;
1292 current.x = current.x | current.y | current.z;
1295 case VK_FORMAT_B8G8R8A8_UNORM:
1296 if(rgbaWriteMask == 0x7)
1298 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1299 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1300 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1302 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1303 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1305 current.x = current.z;
1306 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1307 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1308 current.y = current.z;
1309 current.z = As<Short4>(UnpackLow(current.z, current.x));
1310 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1314 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1315 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1316 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1317 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1319 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1320 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1322 current.x = current.z;
1323 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1324 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1325 current.y = current.z;
1326 current.z = As<Short4>(UnpackLow(current.z, current.x));
1327 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1330 case VK_FORMAT_R8G8B8A8_UNORM:
1331 case VK_FORMAT_R8G8B8A8_SRGB:
1332 if(rgbaWriteMask == 0x7)
1334 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1335 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1336 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1338 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1339 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1341 current.x = current.z;
1342 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1343 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1344 current.y = current.z;
1345 current.z = As<Short4>(UnpackLow(current.z, current.x));
1346 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1350 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1351 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1352 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1353 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1355 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1356 current.y = As<Short4>(PackUnsigned(current.y, current.w));
1358 current.x = current.z;
1359 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1360 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1361 current.y = current.z;
1362 current.z = As<Short4>(UnpackLow(current.z, current.x));
1363 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1366 case VK_FORMAT_R8G8_UNORM:
1367 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1368 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1369 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1370 current.y = As<Short4>(PackUnsigned(current.y, current.y));
1371 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1373 case VK_FORMAT_R8_UNORM:
1374 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1375 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1377 case VK_FORMAT_R16G16_UNORM:
1378 current.z = current.x;
1379 current.x = As<Short4>(UnpackLow(current.x, current.y));
1380 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1381 current.y = current.z;
1383 case VK_FORMAT_R16G16B16A16_UNORM:
1384 transpose4x4(current.x, current.y, current.z, current.w);
1390 Short4 c01 = current.z;
1391 Short4 c23 = current.y;
1393 Int xMask; // Combination of all masks
1395 if(state.depthTestActive)
1404 if(state.stencilActive)
1409 switch(state.targetFormat[index])
1411 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1413 Pointer<Byte> buffer = cBuffer + 2 * x;
1414 Int value = *Pointer<Int>(buffer);
1416 Int c01 = Extract(As<Int2>(current.x), 0);
1418 if((bgraWriteMask & 0x00000007) != 0x00000007)
1421 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1422 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
1426 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1427 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
1429 *Pointer<Int>(buffer) = c01;
1431 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1432 value = *Pointer<Int>(buffer);
1434 Int c23 = Extract(As<Int2>(current.x), 1);
1436 if((bgraWriteMask & 0x00000007) != 0x00000007)
1439 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1440 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
1444 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1445 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
1447 *Pointer<Int>(buffer) = c23;
1450 case VK_FORMAT_B8G8R8A8_UNORM:
1452 Pointer<Byte> buffer = cBuffer + x * 4;
1453 Short4 value = *Pointer<Short4>(buffer);
1455 if(state.targetFormat[index] == VK_FORMAT_B8G8R8A8_UNORM && bgraWriteMask != 0x0000000F) // FIXME: Need for masking when XRGB && Fh?
1457 Short4 masked = value;
1458 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1459 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1463 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1464 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1466 *Pointer<Short4>(buffer) = c01;
1468 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1469 value = *Pointer<Short4>(buffer);
1471 if(state.targetFormat[index] == VK_FORMAT_B8G8R8A8_UNORM && bgraWriteMask != 0x0000000F) // FIXME: Need for masking when XRGB && Fh?
1473 Short4 masked = value;
1474 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1475 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1479 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1480 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1482 *Pointer<Short4>(buffer) = c23;
1485 case VK_FORMAT_R8G8B8A8_UNORM:
1486 case VK_FORMAT_R8G8B8A8_SRGB:
1488 Pointer<Byte> buffer = cBuffer + x * 4;
1489 Short4 value = *Pointer<Short4>(buffer);
1491 bool masked = ((state.targetFormat[index] == VK_FORMAT_R8G8B8A8_UNORM || state.targetFormat[index] == VK_FORMAT_R8G8B8A8_SRGB) && rgbaWriteMask != 0x0000000F); // FIXME: Need for masking when XBGR && Fh?
1495 Short4 masked = value;
1496 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1497 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1501 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1502 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1504 *Pointer<Short4>(buffer) = c01;
1506 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1507 value = *Pointer<Short4>(buffer);
1511 Short4 masked = value;
1512 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1513 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1517 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1518 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1520 *Pointer<Short4>(buffer) = c23;
1523 case VK_FORMAT_R8G8_UNORM:
1524 if((rgbaWriteMask & 0x00000003) != 0x0)
1526 Pointer<Byte> buffer = cBuffer + 2 * x;
1528 value = Insert(value, *Pointer<Int>(buffer), 0);
1529 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1530 value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
1532 Int2 packedCol = As<Int2>(current.x);
1534 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1535 if((rgbaWriteMask & 0x3) != 0x3)
1537 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1538 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1539 mergedMask &= rgbaMask;
1542 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1544 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1545 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
1548 case VK_FORMAT_R8_UNORM:
1549 if(rgbaWriteMask & 0x00000001)
1551 Pointer<Byte> buffer = cBuffer + 1 * x;
1553 value = Insert(value, *Pointer<Short>(buffer), 0);
1554 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1555 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
1557 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1558 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1561 *Pointer<Short>(buffer) = Extract(current.x, 0);
1562 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1565 case VK_FORMAT_R16G16_UNORM:
1567 Pointer<Byte> buffer = cBuffer + 4 * x;
1569 Short4 value = *Pointer<Short4>(buffer);
1571 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1573 Short4 masked = value;
1574 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1575 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
1576 current.x |= masked;
1579 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1580 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1582 *Pointer<Short4>(buffer) = current.x;
1584 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1586 value = *Pointer<Short4>(buffer);
1588 if((rgbaWriteMask & 0x00000003) != 0x00000003)
1590 Short4 masked = value;
1591 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1592 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
1593 current.y |= masked;
1596 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1597 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1599 *Pointer<Short4>(buffer) = current.y;
1602 case VK_FORMAT_R16G16B16A16_UNORM:
1604 Pointer<Byte> buffer = cBuffer + 8 * x;
1607 Short4 value = *Pointer<Short4>(buffer);
1609 if(rgbaWriteMask != 0x0000000F)
1611 Short4 masked = value;
1612 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1613 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1614 current.x |= masked;
1617 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1618 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
1620 *Pointer<Short4>(buffer) = current.x;
1624 Short4 value = *Pointer<Short4>(buffer + 8);
1626 if(rgbaWriteMask != 0x0000000F)
1628 Short4 masked = value;
1629 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1630 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1631 current.y |= masked;
1634 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1635 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
1637 *Pointer<Short4>(buffer + 8) = current.y;
1640 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1643 Short4 value = *Pointer<Short4>(buffer);
1645 if(rgbaWriteMask != 0x0000000F)
1647 Short4 masked = value;
1648 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1649 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1650 current.z |= masked;
1653 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1654 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
1656 *Pointer<Short4>(buffer) = current.z;
1660 Short4 value = *Pointer<Short4>(buffer + 8);
1662 if(rgbaWriteMask != 0x0000000F)
1664 Short4 masked = value;
1665 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1666 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1667 current.w |= masked;
1670 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1671 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
1673 *Pointer<Short4>(buffer + 8) = current.w;
1682 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorActive)
1684 switch(blendFactorActive)
1686 case VK_BLEND_FACTOR_ZERO:
1689 case VK_BLEND_FACTOR_ONE:
1692 case VK_BLEND_FACTOR_SRC_COLOR:
1693 blendFactor.x = oC.x;
1694 blendFactor.y = oC.y;
1695 blendFactor.z = oC.z;
1697 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1698 blendFactor.x = Float4(1.0f) - oC.x;
1699 blendFactor.y = Float4(1.0f) - oC.y;
1700 blendFactor.z = Float4(1.0f) - oC.z;
1702 case VK_BLEND_FACTOR_DST_COLOR:
1703 blendFactor.x = pixel.x;
1704 blendFactor.y = pixel.y;
1705 blendFactor.z = pixel.z;
1707 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1708 blendFactor.x = Float4(1.0f) - pixel.x;
1709 blendFactor.y = Float4(1.0f) - pixel.y;
1710 blendFactor.z = Float4(1.0f) - pixel.z;
1712 case VK_BLEND_FACTOR_SRC_ALPHA:
1713 blendFactor.x = oC.w;
1714 blendFactor.y = oC.w;
1715 blendFactor.z = oC.w;
1717 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
1718 blendFactor.x = Float4(1.0f) - oC.w;
1719 blendFactor.y = Float4(1.0f) - oC.w;
1720 blendFactor.z = Float4(1.0f) - oC.w;
1722 case VK_BLEND_FACTOR_DST_ALPHA:
1723 blendFactor.x = pixel.w;
1724 blendFactor.y = pixel.w;
1725 blendFactor.z = pixel.w;
1727 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
1728 blendFactor.x = Float4(1.0f) - pixel.w;
1729 blendFactor.y = Float4(1.0f) - pixel.w;
1730 blendFactor.z = Float4(1.0f) - pixel.w;
1732 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
1733 blendFactor.x = Float4(1.0f) - pixel.w;
1734 blendFactor.x = Min(blendFactor.x, oC.w);
1735 blendFactor.y = blendFactor.x;
1736 blendFactor.z = blendFactor.x;
1738 case VK_BLEND_FACTOR_CONSTANT_COLOR:
1739 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1740 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1741 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
1743 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1744 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1745 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1746 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
1753 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, VkBlendFactor blendFactorAlphaActive)
1755 switch(blendFactorAlphaActive)
1757 case VK_BLEND_FACTOR_ZERO:
1760 case VK_BLEND_FACTOR_ONE:
1763 case VK_BLEND_FACTOR_SRC_COLOR:
1764 blendFactor.w = oC.w;
1766 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1767 blendFactor.w = Float4(1.0f) - oC.w;
1769 case VK_BLEND_FACTOR_DST_COLOR:
1770 blendFactor.w = pixel.w;
1772 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1773 blendFactor.w = Float4(1.0f) - pixel.w;
1775 case VK_BLEND_FACTOR_SRC_ALPHA:
1776 blendFactor.w = oC.w;
1778 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
1779 blendFactor.w = Float4(1.0f) - oC.w;
1781 case VK_BLEND_FACTOR_DST_ALPHA:
1782 blendFactor.w = pixel.w;
1784 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
1785 blendFactor.w = Float4(1.0f) - pixel.w;
1787 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
1788 blendFactor.w = Float4(1.0f);
1790 case VK_BLEND_FACTOR_CONSTANT_COLOR:
1791 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
1793 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1794 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
1801 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
1803 if(!state.alphaBlendActive)
1808 Pointer<Byte> buffer;
1816 if(Surface::isFloatFormat(state.targetFormat[index]))
1820 else if(Surface::isNonNormalizedInteger(state.targetFormat[index]))
1822 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
1825 switch(state.targetFormat[index])
1827 case VK_FORMAT_R32_SINT:
1828 case VK_FORMAT_R32_UINT:
1829 case VK_FORMAT_R32_SFLOAT:
1832 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
1833 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
1834 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1836 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
1837 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
1838 pixel.y = pixel.z = pixel.w = one;
1840 case VK_FORMAT_R32G32_SINT:
1841 case VK_FORMAT_R32G32_UINT:
1842 case VK_FORMAT_R32G32_SFLOAT:
1844 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
1845 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1846 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
1848 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
1849 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
1851 pixel.z = pixel.w = one;
1853 case VK_FORMAT_R32G32B32A32_SFLOAT:
1854 case VK_FORMAT_R32G32B32A32_SINT:
1855 case VK_FORMAT_R32G32B32A32_UINT:
1857 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
1858 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
1859 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
1860 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
1861 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
1862 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
1868 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
1870 sRGBtoLinear(pixel.x);
1871 sRGBtoLinear(pixel.y);
1872 sRGBtoLinear(pixel.z);
1875 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
1876 Vector4f sourceFactor;
1877 Vector4f destFactor;
1879 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
1880 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
1882 if(state.sourceBlendFactor != VK_BLEND_FACTOR_ONE && state.sourceBlendFactor != VK_BLEND_FACTOR_ZERO)
1884 oC.x *= sourceFactor.x;
1885 oC.y *= sourceFactor.y;
1886 oC.z *= sourceFactor.z;
1889 if(state.destBlendFactor != VK_BLEND_FACTOR_ONE && state.destBlendFactor != VK_BLEND_FACTOR_ZERO)
1891 pixel.x *= destFactor.x;
1892 pixel.y *= destFactor.y;
1893 pixel.z *= destFactor.z;
1896 switch(state.blendOperation)
1898 case VK_BLEND_OP_ADD:
1903 case VK_BLEND_OP_SUBTRACT:
1908 case VK_BLEND_OP_REVERSE_SUBTRACT:
1909 oC.x = pixel.x - oC.x;
1910 oC.y = pixel.y - oC.y;
1911 oC.z = pixel.z - oC.z;
1913 case VK_BLEND_OP_MIN:
1914 oC.x = Min(oC.x, pixel.x);
1915 oC.y = Min(oC.y, pixel.y);
1916 oC.z = Min(oC.z, pixel.z);
1918 case VK_BLEND_OP_MAX:
1919 oC.x = Max(oC.x, pixel.x);
1920 oC.y = Max(oC.y, pixel.y);
1921 oC.z = Max(oC.z, pixel.z);
1923 case VK_BLEND_OP_SRC_EXT:
1926 case VK_BLEND_OP_DST_EXT:
1931 case VK_BLEND_OP_ZERO_EXT:
1932 oC.x = Float4(0.0f);
1933 oC.y = Float4(0.0f);
1934 oC.z = Float4(0.0f);
1940 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
1941 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
1943 if(state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.sourceBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
1945 oC.w *= sourceFactor.w;
1948 if(state.destBlendFactorAlpha != VK_BLEND_FACTOR_ONE && state.destBlendFactorAlpha != VK_BLEND_FACTOR_ZERO)
1950 pixel.w *= destFactor.w;
1953 switch(state.blendOperationAlpha)
1955 case VK_BLEND_OP_ADD:
1958 case VK_BLEND_OP_SUBTRACT:
1961 case VK_BLEND_OP_REVERSE_SUBTRACT:
1965 case VK_BLEND_OP_MIN:
1966 oC.w = Min(oC.w, pixel.w);
1968 case VK_BLEND_OP_MAX:
1969 oC.w = Max(oC.w, pixel.w);
1971 case VK_BLEND_OP_SRC_EXT:
1974 case VK_BLEND_OP_DST_EXT:
1977 case VK_BLEND_OP_ZERO_EXT:
1978 oC.w = Float4(0.0f);
1985 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
1987 switch(state.targetFormat[index])
1989 case VK_FORMAT_R32_SFLOAT:
1990 case VK_FORMAT_R32_SINT:
1991 case VK_FORMAT_R32_UINT:
1992 case VK_FORMAT_R16_SINT:
1993 case VK_FORMAT_R16_UINT:
1994 case VK_FORMAT_R8_SINT:
1995 case VK_FORMAT_R8_UINT:
1997 case VK_FORMAT_R32G32_SFLOAT:
1998 case VK_FORMAT_R32G32_SINT:
1999 case VK_FORMAT_R32G32_UINT:
2000 case VK_FORMAT_R16G16_SINT:
2001 case VK_FORMAT_R16G16_UINT:
2002 case VK_FORMAT_R8G8_SINT:
2003 case VK_FORMAT_R8G8_UINT:
2005 oC.x = UnpackLow(oC.x, oC.y);
2006 oC.z = UnpackHigh(oC.z, oC.y);
2009 case VK_FORMAT_R32G32B32A32_SFLOAT:
2010 case VK_FORMAT_R32G32B32A32_SINT:
2011 case VK_FORMAT_R32G32B32A32_UINT:
2012 case VK_FORMAT_R16G16B16A16_SINT:
2013 case VK_FORMAT_R16G16B16A16_UINT:
2014 case VK_FORMAT_R8G8B8A8_SINT:
2015 case VK_FORMAT_R8G8B8A8_UINT:
2016 transpose4x4(oC.x, oC.y, oC.z, oC.w);
2022 int rgbaWriteMask = state.colorWriteActive(index);
2024 Int xMask; // Combination of all masks
2026 if(state.depthTestActive)
2035 if(state.stencilActive)
2040 Pointer<Byte> buffer;
2043 switch(state.targetFormat[index])
2045 case VK_FORMAT_R32_SFLOAT:
2046 case VK_FORMAT_R32_SINT:
2047 case VK_FORMAT_R32_UINT:
2048 if(rgbaWriteMask & 0x00000001)
2050 buffer = cBuffer + 4 * x;
2053 value.x = *Pointer<Float>(buffer + 0);
2054 value.y = *Pointer<Float>(buffer + 4);
2056 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2059 value.z = *Pointer<Float>(buffer + 0);
2060 value.w = *Pointer<Float>(buffer + 4);
2062 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2063 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
2064 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2067 *Pointer<Float>(buffer + 0) = oC.x.z;
2068 *Pointer<Float>(buffer + 4) = oC.x.w;
2070 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2073 *Pointer<Float>(buffer + 0) = oC.x.x;
2074 *Pointer<Float>(buffer + 4) = oC.x.y;
2077 case VK_FORMAT_R16_SINT:
2078 case VK_FORMAT_R16_UINT:
2079 if(rgbaWriteMask & 0x00000001)
2081 buffer = cBuffer + 2 * x;
2084 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2086 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2088 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2089 value = As<Float4>(Int4(xyzw));
2091 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2092 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2093 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2095 if(state.targetFormat[index] == VK_FORMAT_R16_SINT)
2097 Float component = oC.x.z;
2098 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2100 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2102 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2105 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2107 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2109 else // VK_FORMAT_R16_UINT
2111 Float component = oC.x.z;
2112 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2114 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2116 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2119 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2121 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2125 case VK_FORMAT_R8_SINT:
2126 case VK_FORMAT_R8_UINT:
2127 if(rgbaWriteMask & 0x00000001)
2129 buffer = cBuffer + x;
2131 UInt xyzw, packedCol;
2133 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
2134 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2135 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
2137 Short4 tmpCol = Short4(As<Int4>(oC.x));
2138 if(state.targetFormat[index] == VK_FORMAT_R8_SINT)
2140 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
2144 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
2146 packedCol = Extract(As<Int2>(tmpCol), 0);
2148 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2149 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2151 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2152 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2153 *Pointer<UShort>(buffer) = UShort(packedCol);
2156 case VK_FORMAT_R32G32_SFLOAT:
2157 case VK_FORMAT_R32G32_SINT:
2158 case VK_FORMAT_R32G32_UINT:
2159 buffer = cBuffer + 8 * x;
2161 value = *Pointer<Float4>(buffer);
2163 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2165 Float4 masked = value;
2166 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2167 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
2168 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2171 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2172 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
2173 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2174 *Pointer<Float4>(buffer) = oC.x;
2176 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2178 value = *Pointer<Float4>(buffer);
2180 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2185 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2186 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
2187 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2190 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2191 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
2192 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2193 *Pointer<Float4>(buffer) = oC.y;
2195 case VK_FORMAT_R16G16_SINT:
2196 case VK_FORMAT_R16G16_UINT:
2197 if((rgbaWriteMask & 0x00000003) != 0x0)
2199 buffer = cBuffer + 4 * x;
2202 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2203 UShort4 value = *Pointer<UShort4>(buffer);
2204 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2205 if((rgbaWriteMask & 0x3) != 0x3)
2207 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2208 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2209 mergedMask &= rgbaMask;
2211 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2213 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2215 packedCol = UShort4(As<Int4>(oC.y));
2216 value = *Pointer<UShort4>(buffer);
2217 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2218 if((rgbaWriteMask & 0x3) != 0x3)
2220 mergedMask &= rgbaMask;
2222 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2225 case VK_FORMAT_R8G8_SINT:
2226 case VK_FORMAT_R8G8_UINT:
2227 if((rgbaWriteMask & 0x00000003) != 0x0)
2229 buffer = cBuffer + 2 * x;
2231 Int2 xyzw, packedCol;
2233 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2234 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2235 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2237 if(state.targetFormat[index] == VK_FORMAT_R8G8_SINT)
2239 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2243 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2246 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2247 if((rgbaWriteMask & 0x3) != 0x3)
2249 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2250 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2251 mergedMask &= rgbaMask;
2254 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2256 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2257 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2258 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2261 case VK_FORMAT_R32G32B32A32_SFLOAT:
2262 case VK_FORMAT_R32G32B32A32_SINT:
2263 case VK_FORMAT_R32G32B32A32_UINT:
2264 buffer = cBuffer + 16 * x;
2267 value = *Pointer<Float4>(buffer, 16);
2269 if(rgbaWriteMask != 0x0000000F)
2271 Float4 masked = value;
2272 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2273 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2274 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
2277 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2278 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
2279 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2280 *Pointer<Float4>(buffer, 16) = oC.x;
2284 value = *Pointer<Float4>(buffer + 16, 16);
2286 if(rgbaWriteMask != 0x0000000F)
2288 Float4 masked = value;
2289 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2290 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2291 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
2294 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2295 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
2296 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2297 *Pointer<Float4>(buffer + 16, 16) = oC.y;
2300 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
2303 value = *Pointer<Float4>(buffer, 16);
2305 if(rgbaWriteMask != 0x0000000F)
2307 Float4 masked = value;
2308 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2309 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2310 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
2313 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2314 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
2315 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2316 *Pointer<Float4>(buffer, 16) = oC.z;
2320 value = *Pointer<Float4>(buffer + 16, 16);
2322 if(rgbaWriteMask != 0x0000000F)
2324 Float4 masked = value;
2325 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2326 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
2327 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
2330 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2331 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
2332 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2333 *Pointer<Float4>(buffer + 16, 16) = oC.w;
2336 case VK_FORMAT_R16G16B16A16_SINT:
2337 case VK_FORMAT_R16G16B16A16_UINT:
2338 if((rgbaWriteMask & 0x0000000F) != 0x0)
2340 buffer = cBuffer + 8 * x;
2343 UShort8 value = *Pointer<UShort8>(buffer);
2344 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2345 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2346 if((rgbaWriteMask & 0xF) != 0xF)
2348 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2349 rgbaMask = UInt4(tmpMask, tmpMask);
2350 mergedMask &= rgbaMask;
2352 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2354 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2356 value = *Pointer<UShort8>(buffer);
2357 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2358 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2359 if((rgbaWriteMask & 0xF) != 0xF)
2361 mergedMask &= rgbaMask;
2363 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2366 case VK_FORMAT_R8G8B8A8_SINT:
2367 case VK_FORMAT_R8G8B8A8_UINT:
2368 if((rgbaWriteMask & 0x0000000F) != 0x0)
2370 UInt2 value, packedCol, mergedMask;
2372 buffer = cBuffer + 4 * x;
2374 if(state.targetFormat[index] == VK_FORMAT_R8G8B8A8_SINT)
2376 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2380 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
2382 value = *Pointer<UInt2>(buffer, 16);
2383 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2384 if(rgbaWriteMask != 0xF)
2386 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2388 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2390 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2392 if(state.targetFormat[index] == VK_FORMAT_R8G8B8A8_SINT)
2394 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2398 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
2400 value = *Pointer<UInt2>(buffer, 16);
2401 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2402 if(rgbaWriteMask != 0xF)
2404 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2406 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2414 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2416 return UShort4(cf * Float4(0xFFFF), saturate);
2419 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
2421 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
2423 c.x = As<UShort4>(c.x) >> 4;
2424 c.y = As<UShort4>(c.y) >> 4;
2425 c.z = As<UShort4>(c.z) >> 4;
2427 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2428 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2429 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2430 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2432 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2433 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2434 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2435 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2437 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2438 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2439 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2440 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2443 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
2445 c.x = As<UShort4>(c.x) >> 4;
2446 c.y = As<UShort4>(c.y) >> 4;
2447 c.z = As<UShort4>(c.z) >> 4;
2449 linearToSRGB12_16(c);
2452 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
2454 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
2456 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2457 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2458 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2459 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
2461 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2462 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2463 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2464 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
2466 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2467 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2468 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2469 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
2472 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2474 Float4 linear = x * x;
2475 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2477 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2480 bool PixelRoutine::colorUsed()
2482 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;