1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "PixelPipeline.hpp"
16 #include "SamplerCore.hpp"
17 #include "Renderer/Renderer.hpp"
21 extern bool postBlendSRGB;
23 void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
25 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000);
26 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000);
27 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000);
28 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000);
30 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000);
31 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000);
32 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000);
33 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000);
36 void PixelPipeline::fixedFunction()
39 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
41 for(int stage = 0; stage < 8; stage++)
43 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
50 if(state.textureStage[stage].usesTexture)
52 texture = sampleTexture(stage, stage);
55 blendTexture(temp, texture, stage);
58 specularPixel(current, specular);
61 void PixelPipeline::applyShader(Int cMask[4])
69 int pad = 0; // Count number of texm3x3pad instructions
70 Vector4s dPairing; // Destination for first pairing instruction
72 for(size_t i = 0; i < shader->getLength(); i++)
74 const Shader::Instruction *instruction = shader->getInstruction(i);
75 Shader::Opcode opcode = instruction->opcode;
77 // #ifndef NDEBUG // FIXME: Centralize debug output control
78 // shader->printInstruction(i, "debug.txt");
81 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
86 const Dst &dst = instruction->dst;
87 const Src &src0 = instruction->src[0];
88 const Src &src1 = instruction->src[1];
89 const Src &src2 = instruction->src[2];
91 unsigned short shaderModel = shader->getShaderModel();
92 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair
93 bool coissue = instruction->coissue; // Second instruction of pair
100 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
101 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
102 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
104 Float4 x = shaderModel < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x;
105 Float4 y = shaderModel < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y;
106 Float4 z = shaderModel < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z;
107 Float4 w = shaderModel < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w;
111 case Shader::OPCODE_PS_1_0: break;
112 case Shader::OPCODE_PS_1_1: break;
113 case Shader::OPCODE_PS_1_2: break;
114 case Shader::OPCODE_PS_1_3: break;
115 case Shader::OPCODE_PS_1_4: break;
117 case Shader::OPCODE_DEF: break;
119 case Shader::OPCODE_NOP: break;
120 case Shader::OPCODE_MOV: MOV(d, s0); break;
121 case Shader::OPCODE_ADD: ADD(d, s0, s1); break;
122 case Shader::OPCODE_SUB: SUB(d, s0, s1); break;
123 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
124 case Shader::OPCODE_MUL: MUL(d, s0, s1); break;
125 case Shader::OPCODE_DP3: DP3(d, s0, s1); break;
126 case Shader::OPCODE_DP4: DP4(d, s0, s1); break;
127 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
128 case Shader::OPCODE_TEXCOORD:
129 if(shaderModel < 0x0104)
131 TEXCOORD(d, x, y, z, dst.index);
135 if((src0.swizzle & 0x30) == 0x20) // .xyz
137 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
141 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
145 case Shader::OPCODE_TEXKILL:
146 if(shaderModel < 0x0104)
148 TEXKILL(cMask, x, y, z);
150 else if(shaderModel == 0x0104)
152 if(dst.type == Shader::PARAMETER_TEXTURE)
154 TEXKILL(cMask, x, y, z);
158 TEXKILL(cMask, rs[dst.index]);
163 case Shader::OPCODE_TEX:
164 if(shaderModel < 0x0104)
166 TEX(d, x, y, z, dst.index, false);
168 else if(shaderModel == 0x0104)
170 if(src0.type == Shader::PARAMETER_TEXTURE)
172 if((src0.swizzle & 0x30) == 0x20) // .xyz
174 TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
178 TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
183 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
188 case Shader::OPCODE_TEXBEM: TEXBEM(d, s0, x, y, z, dst.index); break;
189 case Shader::OPCODE_TEXBEML: TEXBEML(d, s0, x, y, z, dst.index); break;
190 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(d, s0, dst.index); break;
191 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(d, s0, dst.index); break;
192 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break;
193 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
194 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break;
195 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
196 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1); break;
197 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0); break;
198 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break;
199 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(d, s0, dst.index); break;
200 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(d, x, y, z, dst.index, s0); break;
201 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
202 case Shader::OPCODE_TEXDP3: TEXDP3(d, x, y, z, s0); break;
203 case Shader::OPCODE_TEXM3X3: TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
204 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(); break;
205 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break;
206 case Shader::OPCODE_BEM: BEM(d, s0, s1, dst.index); break;
207 case Shader::OPCODE_PHASE: break;
208 case Shader::OPCODE_END: break;
213 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
217 if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); }
218 if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); }
219 if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); }
220 if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); }
222 else if(dst.shift < 0)
224 if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
225 if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
226 if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
227 if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
232 if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000)); }
233 if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000)); }
234 if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000)); }
235 if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000)); }
240 if(dst.mask & 0x1) dPairing.x = d.x;
241 if(dst.mask & 0x2) dPairing.y = d.y;
242 if(dst.mask & 0x4) dPairing.z = d.z;
243 if(dst.mask & 0x8) dPairing.w = d.w;
248 const Dst &dst = shader->getInstruction(i - 1)->dst;
250 writeDestination(dPairing, dst);
255 writeDestination(d, dst);
261 Bool PixelPipeline::alphaTest(Int cMask[4])
263 current.x = Min(current.x, Short4(0x0FFF)); current.x = Max(current.x, Short4(0x0000));
264 current.y = Min(current.y, Short4(0x0FFF)); current.y = Max(current.y, Short4(0x0000));
265 current.z = Min(current.z, Short4(0x0FFF)); current.z = Max(current.z, Short4(0x0000));
266 current.w = Min(current.w, Short4(0x0FFF)); current.w = Max(current.w, Short4(0x0000));
268 if(!state.alphaTestActive())
275 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
277 PixelRoutine::alphaTest(aMask, current.w);
279 for(unsigned int q = 0; q < state.multiSample; q++)
284 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
286 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
288 alphaToCoverage(cMask, alpha);
294 for(unsigned int q = 1; q < state.multiSample; q++)
296 pass = pass | cMask[q];
302 void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
304 if(!state.colorWriteActive(0))
311 switch(state.targetFormat[0])
314 case FORMAT_X8R8G8B8:
315 case FORMAT_X8B8G8R8:
316 case FORMAT_A8R8G8B8:
317 case FORMAT_A8B8G8R8:
320 case FORMAT_A16B16G16R16:
321 if(!postBlendSRGB && state.writeSRGB)
323 linearToSRGB12_16(current);
333 if(state.targetFormat[0] == FORMAT_R5G6B5)
335 current.x &= Short4(0xF800u);
336 current.y &= Short4(0xFC00u);
337 current.z &= Short4(0xF800u);
340 fogBlend(current, fog);
342 for(unsigned int q = 0; q < state.multiSample; q++)
344 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
345 Vector4s color = current;
347 if(state.multiSampleMask & (1 << q))
349 alphaBlend(0, buffer, color, x);
350 logicOperation(0, buffer, color, x);
351 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
357 case FORMAT_X32B32G32R32F:
358 case FORMAT_A32B32G32R32F:
359 // case FORMAT_X32B32G32R32F_UNSIGNED: // Not renderable in any fixed-function API.
360 convertSigned12(oC, current);
361 PixelRoutine::fogBlend(oC, fog);
363 for(unsigned int q = 0; q < state.multiSample; q++)
365 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
368 if(state.multiSampleMask & (1 << q))
370 alphaBlend(0, buffer, color, x);
371 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
380 void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage)
382 Vector4s *arg1 = nullptr;
383 Vector4s *arg2 = nullptr;
384 Vector4s *arg3 = nullptr;
390 const TextureStage::State &textureStage = state.textureStage[stage];
392 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
393 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
394 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
395 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
396 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
397 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
399 constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0]));
400 constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1]));
401 constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2]));
402 constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3]));
405 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
406 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
407 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
408 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
409 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
410 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
412 tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0]));
413 tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1]));
414 tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2]));
415 tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]));
419 if(stage > 0 && textureStage.usesTexture)
421 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
423 current.x = MulHigh(current.x, texture.x) << 4;
424 current.y = MulHigh(current.y, texture.y) << 4;
425 current.z = MulHigh(current.z, texture.z) << 4;
428 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
430 current.w = MulHigh(current.w, texture.w) << 4;
436 texture.x = MulHigh(texture.x, L) << 4;
437 texture.y = MulHigh(texture.y, L) << 4;
438 texture.z = MulHigh(texture.z, L) << 4;
443 switch(textureStage.firstArgument)
445 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
446 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
447 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break;
448 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
449 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
450 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
451 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
456 switch(textureStage.secondArgument)
458 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
459 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
460 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break;
461 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
462 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
463 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
464 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
469 switch(textureStage.thirdArgument)
471 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
472 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
473 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break;
474 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
475 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
476 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
477 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
486 switch(textureStage.firstModifier)
488 case TextureStage::MODIFIER_COLOR:
490 case TextureStage::MODIFIER_INVCOLOR:
491 mod1.x = SubSat(Short4(0x1000), arg1->x);
492 mod1.y = SubSat(Short4(0x1000), arg1->y);
493 mod1.z = SubSat(Short4(0x1000), arg1->z);
494 mod1.w = SubSat(Short4(0x1000), arg1->w);
498 case TextureStage::MODIFIER_ALPHA:
506 case TextureStage::MODIFIER_INVALPHA:
507 mod1.x = SubSat(Short4(0x1000), arg1->w);
508 mod1.y = SubSat(Short4(0x1000), arg1->w);
509 mod1.z = SubSat(Short4(0x1000), arg1->w);
510 mod1.w = SubSat(Short4(0x1000), arg1->w);
518 switch(textureStage.secondModifier)
520 case TextureStage::MODIFIER_COLOR:
522 case TextureStage::MODIFIER_INVCOLOR:
523 mod2.x = SubSat(Short4(0x1000), arg2->x);
524 mod2.y = SubSat(Short4(0x1000), arg2->y);
525 mod2.z = SubSat(Short4(0x1000), arg2->z);
526 mod2.w = SubSat(Short4(0x1000), arg2->w);
530 case TextureStage::MODIFIER_ALPHA:
538 case TextureStage::MODIFIER_INVALPHA:
539 mod2.x = SubSat(Short4(0x1000), arg2->w);
540 mod2.y = SubSat(Short4(0x1000), arg2->w);
541 mod2.z = SubSat(Short4(0x1000), arg2->w);
542 mod2.w = SubSat(Short4(0x1000), arg2->w);
550 switch(textureStage.thirdModifier)
552 case TextureStage::MODIFIER_COLOR:
554 case TextureStage::MODIFIER_INVCOLOR:
555 mod3.x = SubSat(Short4(0x1000), arg3->x);
556 mod3.y = SubSat(Short4(0x1000), arg3->y);
557 mod3.z = SubSat(Short4(0x1000), arg3->z);
558 mod3.w = SubSat(Short4(0x1000), arg3->w);
562 case TextureStage::MODIFIER_ALPHA:
570 case TextureStage::MODIFIER_INVALPHA:
571 mod3.x = SubSat(Short4(0x1000), arg3->w);
572 mod3.y = SubSat(Short4(0x1000), arg3->w);
573 mod3.z = SubSat(Short4(0x1000), arg3->w);
574 mod3.w = SubSat(Short4(0x1000), arg3->w);
582 switch(textureStage.stageOperation)
584 case TextureStage::STAGE_DISABLE:
586 case TextureStage::STAGE_SELECTARG1: // Arg1
591 case TextureStage::STAGE_SELECTARG2: // Arg2
596 case TextureStage::STAGE_SELECTARG3: // Arg3
601 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
602 res.x = MulHigh(arg1->x, arg2->x) << 4;
603 res.y = MulHigh(arg1->y, arg2->y) << 4;
604 res.z = MulHigh(arg1->z, arg2->z) << 4;
606 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
607 res.x = MulHigh(arg1->x, arg2->x) << 5;
608 res.y = MulHigh(arg1->y, arg2->y) << 5;
609 res.z = MulHigh(arg1->z, arg2->z) << 5;
611 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
612 res.x = MulHigh(arg1->x, arg2->x) << 6;
613 res.y = MulHigh(arg1->y, arg2->y) << 6;
614 res.z = MulHigh(arg1->z, arg2->z) << 6;
616 case TextureStage::STAGE_ADD: // Arg1 + Arg2
617 res.x = AddSat(arg1->x, arg2->x);
618 res.y = AddSat(arg1->y, arg2->y);
619 res.z = AddSat(arg1->z, arg2->z);
621 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
622 res.x = AddSat(arg1->x, arg2->x);
623 res.y = AddSat(arg1->y, arg2->y);
624 res.z = AddSat(arg1->z, arg2->z);
626 res.x = SubSat(res.x, Short4(0x0800));
627 res.y = SubSat(res.y, Short4(0x0800));
628 res.z = SubSat(res.z, Short4(0x0800));
630 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
631 res.x = AddSat(arg1->x, arg2->x);
632 res.y = AddSat(arg1->y, arg2->y);
633 res.z = AddSat(arg1->z, arg2->z);
635 res.x = SubSat(res.x, Short4(0x0800));
636 res.y = SubSat(res.y, Short4(0x0800));
637 res.z = SubSat(res.z, Short4(0x0800));
639 res.x = AddSat(res.x, res.x);
640 res.y = AddSat(res.y, res.y);
641 res.z = AddSat(res.z, res.z);
643 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
644 res.x = SubSat(arg1->x, arg2->x);
645 res.y = SubSat(arg1->y, arg2->y);
646 res.z = SubSat(arg1->z, arg2->z);
648 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
652 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
653 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
654 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
657 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
658 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
659 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
660 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
662 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
663 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
664 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
665 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
667 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
671 res.x = SubSat(arg1->x, Short4(0x0800)); tmp = SubSat(arg2->x, Short4(0x0800)); res.x = MulHigh(res.x, tmp);
672 res.y = SubSat(arg1->y, Short4(0x0800)); tmp = SubSat(arg2->y, Short4(0x0800)); res.y = MulHigh(res.y, tmp);
673 res.z = SubSat(arg1->z, Short4(0x0800)); tmp = SubSat(arg2->z, Short4(0x0800)); res.z = MulHigh(res.z, tmp);
679 res.x = AddSat(res.x, res.y);
680 res.x = AddSat(res.x, res.z);
683 res.x = Max(res.x, Short4(0x0000));
684 res.x = Min(res.x, Short4(0x1000));
691 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
692 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x);
693 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y);
694 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z);
696 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
697 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
698 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
699 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
701 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
702 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
703 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
704 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
706 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
707 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
708 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
709 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
711 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
712 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
713 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
714 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
716 case TextureStage::STAGE_PREMODULATE:
721 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
722 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
723 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
724 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
726 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
727 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
728 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
729 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
731 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
735 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
736 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
737 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
740 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
744 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
745 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
746 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
749 case TextureStage::STAGE_BUMPENVMAP:
751 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
752 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
759 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
760 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
762 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
763 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
774 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
776 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
777 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
785 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
786 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
788 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
789 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
795 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
797 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
798 L = Max(L, Short4(0x0000));
799 L = Min(L, Short4(0x1000));
813 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
815 switch(textureStage.firstArgumentAlpha)
817 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
818 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
819 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break;
820 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
821 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
822 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
823 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
828 switch(textureStage.secondArgumentAlpha)
830 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
831 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
832 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break;
833 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
834 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
835 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
836 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
841 switch(textureStage.thirdArgumentAlpha)
843 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
844 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
845 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break;
846 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
847 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
848 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
849 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
854 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used
856 case TextureStage::MODIFIER_COLOR:
858 case TextureStage::MODIFIER_INVCOLOR:
859 mod1.w = SubSat(Short4(0x1000), arg1->w);
863 case TextureStage::MODIFIER_ALPHA:
866 case TextureStage::MODIFIER_INVALPHA:
867 mod1.w = SubSat(Short4(0x1000), arg1->w);
875 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used
877 case TextureStage::MODIFIER_COLOR:
879 case TextureStage::MODIFIER_INVCOLOR:
880 mod2.w = SubSat(Short4(0x1000), arg2->w);
884 case TextureStage::MODIFIER_ALPHA:
887 case TextureStage::MODIFIER_INVALPHA:
888 mod2.w = SubSat(Short4(0x1000), arg2->w);
896 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used
898 case TextureStage::MODIFIER_COLOR:
900 case TextureStage::MODIFIER_INVCOLOR:
901 mod3.w = SubSat(Short4(0x1000), arg3->w);
905 case TextureStage::MODIFIER_ALPHA:
908 case TextureStage::MODIFIER_INVALPHA:
909 mod3.w = SubSat(Short4(0x1000), arg3->w);
917 switch(textureStage.stageOperationAlpha)
919 case TextureStage::STAGE_DISABLE:
921 case TextureStage::STAGE_SELECTARG1: // Arg1
924 case TextureStage::STAGE_SELECTARG2: // Arg2
927 case TextureStage::STAGE_SELECTARG3: // Arg3
930 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
931 res.w = MulHigh(arg1->w, arg2->w) << 4;
933 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
934 res.w = MulHigh(arg1->w, arg2->w) << 5;
936 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
937 res.w = MulHigh(arg1->w, arg2->w) << 6;
939 case TextureStage::STAGE_ADD: // Arg1 + Arg2
940 res.w = AddSat(arg1->w, arg2->w);
942 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
943 res.w = AddSat(arg1->w, arg2->w);
944 res.w = SubSat(res.w, Short4(0x0800));
946 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
947 res.w = AddSat(arg1->w, arg2->w);
948 res.w = SubSat(res.w, Short4(0x0800));
949 res.w = AddSat(res.w, res.w);
951 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
952 res.w = SubSat(arg1->w, arg2->w);
954 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
958 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
961 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
962 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
964 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
965 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
967 case TextureStage::STAGE_DOT3:
968 break; // Already computed in color channel
969 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
970 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w);
972 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
973 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
975 case TextureStage::STAGE_BLENDFACTORALPHA:
976 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
978 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
979 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
981 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
982 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
984 case TextureStage::STAGE_PREMODULATE:
987 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
988 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
989 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
990 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
991 case TextureStage::STAGE_BUMPENVMAP:
992 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
993 break; // Invalid alpha operations
999 // Clamp result to [0, 1]
1001 switch(textureStage.stageOperation)
1003 case TextureStage::STAGE_DISABLE:
1004 case TextureStage::STAGE_SELECTARG1:
1005 case TextureStage::STAGE_SELECTARG2:
1006 case TextureStage::STAGE_SELECTARG3:
1007 case TextureStage::STAGE_MODULATE:
1008 case TextureStage::STAGE_MODULATE2X:
1009 case TextureStage::STAGE_MODULATE4X:
1010 case TextureStage::STAGE_ADD:
1011 case TextureStage::STAGE_MULTIPLYADD:
1012 case TextureStage::STAGE_LERP:
1013 case TextureStage::STAGE_BLENDCURRENTALPHA:
1014 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1015 case TextureStage::STAGE_BLENDFACTORALPHA:
1016 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1017 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1018 case TextureStage::STAGE_DOT3: // Already clamped
1019 case TextureStage::STAGE_PREMODULATE:
1020 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1021 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1022 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1023 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1024 case TextureStage::STAGE_BUMPENVMAP:
1025 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1026 if(state.textureStage[stage].cantUnderflow)
1028 break; // Can't go below zero
1030 case TextureStage::STAGE_ADDSIGNED:
1031 case TextureStage::STAGE_ADDSIGNED2X:
1032 case TextureStage::STAGE_SUBTRACT:
1033 case TextureStage::STAGE_ADDSMOOTH:
1034 res.x = Max(res.x, Short4(0x0000));
1035 res.y = Max(res.y, Short4(0x0000));
1036 res.z = Max(res.z, Short4(0x0000));
1042 switch(textureStage.stageOperationAlpha)
1044 case TextureStage::STAGE_DISABLE:
1045 case TextureStage::STAGE_SELECTARG1:
1046 case TextureStage::STAGE_SELECTARG2:
1047 case TextureStage::STAGE_SELECTARG3:
1048 case TextureStage::STAGE_MODULATE:
1049 case TextureStage::STAGE_MODULATE2X:
1050 case TextureStage::STAGE_MODULATE4X:
1051 case TextureStage::STAGE_ADD:
1052 case TextureStage::STAGE_MULTIPLYADD:
1053 case TextureStage::STAGE_LERP:
1054 case TextureStage::STAGE_BLENDCURRENTALPHA:
1055 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1056 case TextureStage::STAGE_BLENDFACTORALPHA:
1057 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1058 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1059 case TextureStage::STAGE_DOT3: // Already clamped
1060 case TextureStage::STAGE_PREMODULATE:
1061 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1062 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1063 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1064 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1065 case TextureStage::STAGE_BUMPENVMAP:
1066 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1067 if(state.textureStage[stage].cantUnderflow)
1069 break; // Can't go below zero
1071 case TextureStage::STAGE_ADDSIGNED:
1072 case TextureStage::STAGE_ADDSIGNED2X:
1073 case TextureStage::STAGE_SUBTRACT:
1074 case TextureStage::STAGE_ADDSMOOTH:
1075 res.w = Max(res.w, Short4(0x0000));
1081 switch(textureStage.stageOperation)
1083 case TextureStage::STAGE_DISABLE:
1084 case TextureStage::STAGE_SELECTARG1:
1085 case TextureStage::STAGE_SELECTARG2:
1086 case TextureStage::STAGE_SELECTARG3:
1087 case TextureStage::STAGE_MODULATE:
1088 case TextureStage::STAGE_SUBTRACT:
1089 case TextureStage::STAGE_ADDSMOOTH:
1090 case TextureStage::STAGE_LERP:
1091 case TextureStage::STAGE_BLENDCURRENTALPHA:
1092 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1093 case TextureStage::STAGE_BLENDFACTORALPHA:
1094 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1095 case TextureStage::STAGE_DOT3: // Already clamped
1096 case TextureStage::STAGE_PREMODULATE:
1097 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1098 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1099 case TextureStage::STAGE_BUMPENVMAP:
1100 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1101 break; // Can't go above one
1102 case TextureStage::STAGE_MODULATE2X:
1103 case TextureStage::STAGE_MODULATE4X:
1104 case TextureStage::STAGE_ADD:
1105 case TextureStage::STAGE_ADDSIGNED:
1106 case TextureStage::STAGE_ADDSIGNED2X:
1107 case TextureStage::STAGE_MULTIPLYADD:
1108 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1109 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1110 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1111 res.x = Min(res.x, Short4(0x1000));
1112 res.y = Min(res.y, Short4(0x1000));
1113 res.z = Min(res.z, Short4(0x1000));
1119 switch(textureStage.stageOperationAlpha)
1121 case TextureStage::STAGE_DISABLE:
1122 case TextureStage::STAGE_SELECTARG1:
1123 case TextureStage::STAGE_SELECTARG2:
1124 case TextureStage::STAGE_SELECTARG3:
1125 case TextureStage::STAGE_MODULATE:
1126 case TextureStage::STAGE_SUBTRACT:
1127 case TextureStage::STAGE_ADDSMOOTH:
1128 case TextureStage::STAGE_LERP:
1129 case TextureStage::STAGE_BLENDCURRENTALPHA:
1130 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1131 case TextureStage::STAGE_BLENDFACTORALPHA:
1132 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1133 case TextureStage::STAGE_DOT3: // Already clamped
1134 case TextureStage::STAGE_PREMODULATE:
1135 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1136 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1137 case TextureStage::STAGE_BUMPENVMAP:
1138 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1139 break; // Can't go above one
1140 case TextureStage::STAGE_MODULATE2X:
1141 case TextureStage::STAGE_MODULATE4X:
1142 case TextureStage::STAGE_ADD:
1143 case TextureStage::STAGE_ADDSIGNED:
1144 case TextureStage::STAGE_ADDSIGNED2X:
1145 case TextureStage::STAGE_MULTIPLYADD:
1146 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1147 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1148 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1149 res.w = Min(res.w, Short4(0x1000));
1155 switch(textureStage.destinationArgument)
1157 case TextureStage::DESTINATION_CURRENT:
1163 case TextureStage::DESTINATION_TEMP:
1174 void PixelPipeline::fogBlend(Vector4s ¤t, Float4 &f)
1176 if(!state.fogActive)
1181 if(state.pixelFogMode != FOG_NONE)
1186 UShort4 fog = convertFixed16(f, true);
1188 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
1189 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
1190 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
1192 UShort4 invFog = UShort4(0xFFFFu) - fog;
1194 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0]))));
1195 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1]))));
1196 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2]))));
1199 void PixelPipeline::specularPixel(Vector4s ¤t, Vector4s &specular)
1201 if(!state.specularAdd)
1206 current.x = AddSat(current.x, specular.x);
1207 current.y = AddSat(current.y, specular.y);
1208 current.z = AddSat(current.z, specular.z);
1211 Vector4s PixelPipeline::sampleTexture(int coordinates, int stage, bool project)
1213 Float4 x = v[2 + coordinates].x;
1214 Float4 y = v[2 + coordinates].y;
1215 Float4 z = v[2 + coordinates].z;
1216 Float4 w = v[2 + coordinates].w;
1226 return sampleTexture(stage, x, y, z, w, project);
1229 Vector4s PixelPipeline::sampleTexture(int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
1234 Long texTime = Ticks();
1240 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
1244 c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u, v, w, q, q, dsx, dsy);
1248 Float4 rq = reciprocal(q);
1250 Float4 u_q = u * rq;
1251 Float4 v_q = v * rq;
1252 Float4 w_q = w * rq;
1254 c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u_q, v_q, w_q, q, q, dsx, dsy);
1258 cycles[PERF_TEX] += Ticks() - texTime;
1264 Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
1266 return RoundShort4(cf * Float4(0x1000));
1269 void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf)
1271 cs.x = convertFixed12(cf.x);
1272 cs.y = convertFixed12(cf.y);
1273 cs.z = convertFixed12(cf.z);
1274 cs.w = convertFixed12(cf.w);
1277 Float4 PixelPipeline::convertSigned12(Short4 &cs)
1279 return Float4(cs) * Float4(1.0f / 0x0FFE);
1282 void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs)
1284 cf.x = convertSigned12(cs.x);
1285 cf.y = convertSigned12(cs.y);
1286 cf.z = convertSigned12(cs.z);
1287 cf.w = convertSigned12(cs.w);
1290 void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst)
1294 case Shader::PARAMETER_TEMP:
1295 if(dst.mask & 0x1) rs[dst.index].x = d.x;
1296 if(dst.mask & 0x2) rs[dst.index].y = d.y;
1297 if(dst.mask & 0x4) rs[dst.index].z = d.z;
1298 if(dst.mask & 0x8) rs[dst.index].w = d.w;
1300 case Shader::PARAMETER_INPUT:
1301 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1302 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1303 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1304 if(dst.mask & 0x8) vs[dst.index].w = d.w;
1306 case Shader::PARAMETER_CONST: ASSERT(false); break;
1307 case Shader::PARAMETER_TEXTURE:
1308 if(dst.mask & 0x1) ts[dst.index].x = d.x;
1309 if(dst.mask & 0x2) ts[dst.index].y = d.y;
1310 if(dst.mask & 0x4) ts[dst.index].z = d.z;
1311 if(dst.mask & 0x8) ts[dst.index].w = d.w;
1313 case Shader::PARAMETER_COLOROUT:
1314 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1315 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1316 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1317 if(dst.mask & 0x8) vs[dst.index].w = d.w;
1324 Vector4s PixelPipeline::fetchRegister(const Src &src)
1331 if(src.type == Shader::PARAMETER_CONST)
1333 c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0]));
1334 c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1]));
1335 c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2]));
1336 c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3]));
1341 case Shader::PARAMETER_TEMP: reg = &rs[i]; break;
1342 case Shader::PARAMETER_INPUT: reg = &vs[i]; break;
1343 case Shader::PARAMETER_CONST: reg = &c; break;
1344 case Shader::PARAMETER_TEXTURE: reg = &ts[i]; break;
1345 case Shader::PARAMETER_VOID: return rs[0]; // Dummy
1346 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy
1347 default: ASSERT(false); return rs[0];
1350 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
1351 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
1352 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
1353 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
1357 switch(src.modifier)
1359 case Shader::MODIFIER_NONE:
1365 case Shader::MODIFIER_BIAS:
1366 mod.x = SubSat(x, Short4(0x0800));
1367 mod.y = SubSat(y, Short4(0x0800));
1368 mod.z = SubSat(z, Short4(0x0800));
1369 mod.w = SubSat(w, Short4(0x0800));
1371 case Shader::MODIFIER_BIAS_NEGATE:
1372 mod.x = SubSat(Short4(0x0800), x);
1373 mod.y = SubSat(Short4(0x0800), y);
1374 mod.z = SubSat(Short4(0x0800), z);
1375 mod.w = SubSat(Short4(0x0800), w);
1377 case Shader::MODIFIER_COMPLEMENT:
1378 mod.x = SubSat(Short4(0x1000), x);
1379 mod.y = SubSat(Short4(0x1000), y);
1380 mod.z = SubSat(Short4(0x1000), z);
1381 mod.w = SubSat(Short4(0x1000), w);
1383 case Shader::MODIFIER_NEGATE:
1389 case Shader::MODIFIER_X2:
1390 mod.x = AddSat(x, x);
1391 mod.y = AddSat(y, y);
1392 mod.z = AddSat(z, z);
1393 mod.w = AddSat(w, w);
1395 case Shader::MODIFIER_X2_NEGATE:
1396 mod.x = -AddSat(x, x);
1397 mod.y = -AddSat(y, y);
1398 mod.z = -AddSat(z, z);
1399 mod.w = -AddSat(w, w);
1401 case Shader::MODIFIER_SIGN:
1402 mod.x = SubSat(x, Short4(0x0800));
1403 mod.y = SubSat(y, Short4(0x0800));
1404 mod.z = SubSat(z, Short4(0x0800));
1405 mod.w = SubSat(w, Short4(0x0800));
1406 mod.x = AddSat(mod.x, mod.x);
1407 mod.y = AddSat(mod.y, mod.y);
1408 mod.z = AddSat(mod.z, mod.z);
1409 mod.w = AddSat(mod.w, mod.w);
1411 case Shader::MODIFIER_SIGN_NEGATE:
1412 mod.x = SubSat(Short4(0x0800), x);
1413 mod.y = SubSat(Short4(0x0800), y);
1414 mod.z = SubSat(Short4(0x0800), z);
1415 mod.w = SubSat(Short4(0x0800), w);
1416 mod.x = AddSat(mod.x, mod.x);
1417 mod.y = AddSat(mod.y, mod.y);
1418 mod.z = AddSat(mod.z, mod.z);
1419 mod.w = AddSat(mod.w, mod.w);
1421 case Shader::MODIFIER_DZ:
1426 // Projection performed by texture sampler
1428 case Shader::MODIFIER_DW:
1433 // Projection performed by texture sampler
1439 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
1441 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000));
1442 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000));
1443 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000));
1444 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000));
1450 void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0)
1458 void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1460 dst.x = AddSat(src0.x, src1.x);
1461 dst.y = AddSat(src0.y, src1.y);
1462 dst.z = AddSat(src0.z, src1.z);
1463 dst.w = AddSat(src0.w, src1.w);
1466 void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1468 dst.x = SubSat(src0.x, src1.x);
1469 dst.y = SubSat(src0.y, src1.y);
1470 dst.z = SubSat(src0.z, src1.z);
1471 dst.w = SubSat(src0.w, src1.w);
1474 void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1476 // FIXME: Long fixed-point multiply fixup
1477 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1478 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); }
1479 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1480 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1483 void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1485 // FIXME: Long fixed-point multiply fixup
1486 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
1487 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); }
1488 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
1489 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
1492 void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1497 // FIXME: Long fixed-point multiply fixup
1498 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1499 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1500 t0 = AddSat(t0, t1);
1501 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1502 t0 = AddSat(t0, t1);
1510 void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1515 // FIXME: Long fixed-point multiply fixup
1516 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1517 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1518 t0 = AddSat(t0, t1);
1519 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1520 t0 = AddSat(t0, t1);
1521 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1522 t0 = AddSat(t0, t1);
1530 void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1532 // FIXME: Long fixed-point multiply fixup
1533 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1535 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1537 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1538 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1541 void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
1547 if(state.interpolant[2 + coordinate].component & 0x01)
1549 uw = Max(u, Float4(0.0f));
1550 uw = Min(uw, Float4(1.0f));
1551 dst.x = convertFixed12(uw);
1555 dst.x = Short4(0x0000);
1558 if(state.interpolant[2 + coordinate].component & 0x02)
1560 vw = Max(v, Float4(0.0f));
1561 vw = Min(vw, Float4(1.0f));
1562 dst.y = convertFixed12(vw);
1566 dst.y = Short4(0x0000);
1569 if(state.interpolant[2 + coordinate].component & 0x04)
1571 sw = Max(s, Float4(0.0f));
1572 sw = Min(sw, Float4(1.0f));
1573 dst.z = convertFixed12(sw);
1577 dst.z = Short4(0x0000);
1580 dst.w = Short4(0x1000);
1583 void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
1595 if(state.interpolant[2 + coordinate].component & 0x01)
1597 uw *= Float4(0x1000);
1598 uw = Max(uw, Float4(-0x8000));
1599 uw = Min(uw, Float4(0x7FFF));
1600 dst.x = RoundShort4(uw);
1604 dst.x = Short4(0x0000);
1607 if(state.interpolant[2 + coordinate].component & 0x02)
1609 vw *= Float4(0x1000);
1610 vw = Max(vw, Float4(-0x8000));
1611 vw = Min(vw, Float4(0x7FFF));
1612 dst.y = RoundShort4(vw);
1616 dst.y = Short4(0x0000);
1619 if(state.interpolant[2 + coordinate].component & 0x04)
1621 sw *= Float4(0x1000);
1622 sw = Max(sw, Float4(-0x8000));
1623 sw = Min(sw, Float4(0x7FFF));
1624 dst.z = RoundShort4(sw);
1628 dst.z = Short4(0x0000);
1632 void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
1634 TEXM3X3PAD(u, v, s, src, 0, false);
1636 Short4 t0 = RoundShort4(u_ * Float4(0x1000));
1644 void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
1646 TEXM3X3PAD(u, v, s, src0, 0, false);
1651 dst = sampleTexture(stage, u_, v_, w_, w_);
1654 void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
1656 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
1657 SignMask(CmpNLT(v, Float4(0.0f))) &
1658 SignMask(CmpNLT(s, Float4(0.0f)));
1660 for(unsigned int q = 0; q < state.multiSample; q++)
1666 void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
1668 Short4 test = src.x | src.y | src.z;
1669 Int kill = SignMask(PackSigned(test, test)) ^ 0x0000000F;
1671 for(unsigned int q = 0; q < state.multiSample; q++)
1677 void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
1679 dst = sampleTexture(sampler, u, v, s, s, project);
1682 void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project)
1684 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
1685 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
1686 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
1688 dst = sampleTexture(sampler, u, v, s, s, project);
1691 void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1693 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1694 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1699 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1700 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1702 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1703 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1709 dst = sampleTexture(stage, u_, v_, s, s);
1712 void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1714 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1715 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1720 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1721 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1723 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1724 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1730 dst = sampleTexture(stage, u_, v_, s, s);
1735 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
1737 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
1738 L = Max(L, Short4(0x0000));
1739 L = Min(L, Short4(0x1000));
1741 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
1742 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
1743 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
1746 void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage)
1748 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
1749 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1750 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1752 dst = sampleTexture(stage, u, v, s, s);
1755 void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage)
1757 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1758 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1761 dst = sampleTexture(stage, u, v, s, s);
1764 void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage)
1766 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1767 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1768 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1770 dst = sampleTexture(stage, u, v, s, s);
1773 void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
1775 TEXM3X2PAD(u, v, s, src, 1, signedScaling);
1778 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
1783 void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1785 TEXM3X3PAD(u, v, s, src0, component, signedScaling);
1788 void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1790 TEXM3X2PAD(u, v, s, src0, 1, signedScaling);
1794 dst = sampleTexture(stage, u_, v_, w_, w_);
1797 void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
1799 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1801 dst.x = RoundShort4(u_ * Float4(0x1000));
1802 dst.y = RoundShort4(v_ * Float4(0x1000));
1803 dst.z = RoundShort4(w_ * Float4(0x1000));
1804 dst.w = Short4(0x1000);
1807 void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1809 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers?
1815 previousScaling = signedScaling;
1818 Float4 x = U * u + V * v + W * s;
1820 x *= Float4(1.0f / 0x1000);
1824 case 0: u_ = x; break;
1825 case 1: v_ = x; break;
1826 case 2: w_ = x; break;
1827 default: ASSERT(false);
1831 void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
1833 TEXM3X3PAD(u, v, s, src0, 2, false);
1835 Float4 E[3]; // Eye vector
1837 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
1838 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
1839 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
1846 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1865 dst = sampleTexture(stage, u__, v__, w__, w__);
1868 void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1870 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1872 dst = sampleTexture(stage, u_, v_, w_, w_);
1875 void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0)
1877 TEXM3X3PAD(x, y, z, src0, 2, false);
1879 Float4 E[3]; // Eye vector
1881 E[0] = v[2 + stage - 2].w;
1882 E[1] = v[2 + stage - 1].w;
1883 E[2] = v[2 + stage - 0].w;
1890 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1909 dst = sampleTexture(stage, u__, v__, w__, w__);
1912 void PixelPipeline::TEXDEPTH()
1914 u_ = Float4(rs[5].x);
1915 v_ = Float4(rs[5].y);
1918 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
1923 void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1925 {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; };
1926 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; };
1927 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; };
1928 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; };
1931 void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1933 {Short4 t0 = CmpGT(Short4(0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; };
1934 {Short4 t0 = CmpGT(Short4(0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; };
1935 {Short4 t0 = CmpGT(Short4(0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; };
1936 {Short4 t0 = CmpGT(Short4(0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; };
1939 void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
1944 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
1945 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1946 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
1947 t0 = AddSat(t0, t1);
1948 t0 = AddSat(t0, src0.x);
1951 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
1952 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1953 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
1954 t0 = AddSat(t0, t1);
1955 t0 = AddSat(t0, src0.y);