1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "PixelPipeline.hpp"
16 #include "Renderer.hpp"
17 #include "SamplerCore.hpp"
21 extern bool postBlendSRGB;
23 void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
25 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000);
26 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000);
27 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000);
28 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000);
30 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
31 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
32 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
33 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
36 void PixelPipeline::fixedFunction()
39 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
41 for(int stage = 0; stage < 8; stage++)
43 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
50 if(state.textureStage[stage].usesTexture)
52 sampleTexture(texture, stage, stage);
55 blendTexture(temp, texture, stage);
58 specularPixel(current, specular);
61 void PixelPipeline::applyShader(Int cMask[4])
69 int pad = 0; // Count number of texm3x3pad instructions
70 Vector4s dPairing; // Destination for first pairing instruction
72 for(size_t i = 0; i < shader->getLength(); i++)
74 const Shader::Instruction *instruction = shader->getInstruction(i);
75 Shader::Opcode opcode = instruction->opcode;
77 // #ifndef NDEBUG // FIXME: Centralize debug output control
78 // shader->printInstruction(i, "debug.txt");
81 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
86 const Dst &dst = instruction->dst;
87 const Src &src0 = instruction->src[0];
88 const Src &src1 = instruction->src[1];
89 const Src &src2 = instruction->src[2];
91 unsigned short version = shader->getVersion();
92 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair
93 bool coissue = instruction->coissue; // Second instruction of pair
100 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
101 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
102 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
104 Float4 x = version < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x;
105 Float4 y = version < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y;
106 Float4 z = version < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z;
107 Float4 w = version < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w;
111 case Shader::OPCODE_PS_1_0: break;
112 case Shader::OPCODE_PS_1_1: break;
113 case Shader::OPCODE_PS_1_2: break;
114 case Shader::OPCODE_PS_1_3: break;
115 case Shader::OPCODE_PS_1_4: break;
117 case Shader::OPCODE_DEF: break;
119 case Shader::OPCODE_NOP: break;
120 case Shader::OPCODE_MOV: MOV(d, s0); break;
121 case Shader::OPCODE_ADD: ADD(d, s0, s1); break;
122 case Shader::OPCODE_SUB: SUB(d, s0, s1); break;
123 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
124 case Shader::OPCODE_MUL: MUL(d, s0, s1); break;
125 case Shader::OPCODE_DP3: DP3(d, s0, s1); break;
126 case Shader::OPCODE_DP4: DP4(d, s0, s1); break;
127 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
128 case Shader::OPCODE_TEXCOORD:
131 TEXCOORD(d, x, y, z, dst.index);
135 if((src0.swizzle & 0x30) == 0x20) // .xyz
137 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
141 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
145 case Shader::OPCODE_TEXKILL:
148 TEXKILL(cMask, x, y, z);
150 else if(version == 0x0104)
152 if(dst.type == Shader::PARAMETER_TEXTURE)
154 TEXKILL(cMask, x, y, z);
158 TEXKILL(cMask, rs[dst.index]);
163 case Shader::OPCODE_TEX:
166 TEX(d, x, y, z, dst.index, false);
168 else if(version == 0x0104)
170 if(src0.type == Shader::PARAMETER_TEXTURE)
172 if((src0.swizzle & 0x30) == 0x20) // .xyz
174 TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
178 TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
183 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
188 case Shader::OPCODE_TEXBEM: TEXBEM(d, s0, x, y, z, dst.index); break;
189 case Shader::OPCODE_TEXBEML: TEXBEML(d, s0, x, y, z, dst.index); break;
190 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(d, s0, dst.index); break;
191 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(d, s0, dst.index); break;
192 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break;
193 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
194 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break;
195 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
196 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1); break;
197 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0); break;
198 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break;
199 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(d, s0, dst.index); break;
200 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(d, x, y, z, dst.index, s0); break;
201 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
202 case Shader::OPCODE_TEXDP3: TEXDP3(d, x, y, z, s0); break;
203 case Shader::OPCODE_TEXM3X3: TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
204 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(); break;
205 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break;
206 case Shader::OPCODE_BEM: BEM(d, s0, s1, dst.index); break;
207 case Shader::OPCODE_PHASE: break;
208 case Shader::OPCODE_END: break;
213 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
217 if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); }
218 if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); }
219 if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); }
220 if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); }
222 else if(dst.shift < 0)
224 if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
225 if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
226 if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
227 if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
232 if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
233 if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
234 if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
235 if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000)); }
240 if(dst.mask & 0x1) dPairing.x = d.x;
241 if(dst.mask & 0x2) dPairing.y = d.y;
242 if(dst.mask & 0x4) dPairing.z = d.z;
243 if(dst.mask & 0x8) dPairing.w = d.w;
248 const Dst &dst = shader->getInstruction(i - 1)->dst;
250 writeDestination(dPairing, dst);
255 writeDestination(d, dst);
261 Bool PixelPipeline::alphaTest(Int cMask[4])
263 current.x = Min(current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.x = Max(current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
264 current.y = Min(current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.y = Max(current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
265 current.z = Min(current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.z = Max(current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
266 current.w = Min(current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.w = Max(current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
268 if(!state.alphaTestActive())
275 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
277 PixelRoutine::alphaTest(aMask, current.w);
279 for(unsigned int q = 0; q < state.multiSample; q++)
284 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
286 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
288 alphaToCoverage(cMask, alpha);
294 for(unsigned int q = 1; q < state.multiSample; q++)
296 pass = pass | cMask[q];
302 void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
304 if(!state.colorWriteActive(0))
311 switch(state.targetFormat[0])
314 case FORMAT_X8R8G8B8:
315 case FORMAT_X8B8G8R8:
316 case FORMAT_A8R8G8B8:
317 case FORMAT_A8B8G8R8:
320 case FORMAT_A16B16G16R16:
321 if(!postBlendSRGB && state.writeSRGB)
323 linearToSRGB12_16(current);
333 if(state.targetFormat[0] == FORMAT_R5G6B5)
335 current.x &= Short4(0xF800u);
336 current.y &= Short4(0xFC00u);
337 current.z &= Short4(0xF800u);
340 fogBlend(current, fog);
342 for(unsigned int q = 0; q < state.multiSample; q++)
344 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
345 Vector4s color = current;
347 if(state.multiSampleMask & (1 << q))
349 alphaBlend(0, buffer, color, x);
350 logicOperation(0, buffer, color, x);
351 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
357 case FORMAT_X32B32G32R32F:
358 case FORMAT_A32B32G32R32F:
359 convertSigned12(oC, current);
360 PixelRoutine::fogBlend(oC, fog);
362 for(unsigned int q = 0; q < state.multiSample; q++)
364 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
367 if(state.multiSampleMask & (1 << q))
369 alphaBlend(0, buffer, color, x);
370 writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
379 void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage)
381 Vector4s *arg1 = nullptr;
382 Vector4s *arg2 = nullptr;
383 Vector4s *arg3 = nullptr;
389 const TextureStage::State &textureStage = state.textureStage[stage];
391 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
392 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
393 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
394 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
395 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
396 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
398 constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0]));
399 constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1]));
400 constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2]));
401 constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3]));
404 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
405 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
406 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
407 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
408 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
409 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
411 tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0]));
412 tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1]));
413 tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2]));
414 tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]));
418 if(stage > 0 && textureStage.usesTexture)
420 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
422 current.x = MulHigh(current.x, texture.x) << 4;
423 current.y = MulHigh(current.y, texture.y) << 4;
424 current.z = MulHigh(current.z, texture.z) << 4;
427 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
429 current.w = MulHigh(current.w, texture.w) << 4;
435 texture.x = MulHigh(texture.x, L) << 4;
436 texture.y = MulHigh(texture.y, L) << 4;
437 texture.z = MulHigh(texture.z, L) << 4;
442 switch(textureStage.firstArgument)
444 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
445 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
446 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break;
447 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
448 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
449 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
450 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
455 switch(textureStage.secondArgument)
457 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
458 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
459 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break;
460 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
461 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
462 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
463 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
468 switch(textureStage.thirdArgument)
470 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
471 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
472 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break;
473 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
474 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
475 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
476 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
485 switch(textureStage.firstModifier)
487 case TextureStage::MODIFIER_COLOR:
489 case TextureStage::MODIFIER_INVCOLOR:
490 mod1.x = SubSat(Short4(0x1000), arg1->x);
491 mod1.y = SubSat(Short4(0x1000), arg1->y);
492 mod1.z = SubSat(Short4(0x1000), arg1->z);
493 mod1.w = SubSat(Short4(0x1000), arg1->w);
497 case TextureStage::MODIFIER_ALPHA:
505 case TextureStage::MODIFIER_INVALPHA:
506 mod1.x = SubSat(Short4(0x1000), arg1->w);
507 mod1.y = SubSat(Short4(0x1000), arg1->w);
508 mod1.z = SubSat(Short4(0x1000), arg1->w);
509 mod1.w = SubSat(Short4(0x1000), arg1->w);
517 switch(textureStage.secondModifier)
519 case TextureStage::MODIFIER_COLOR:
521 case TextureStage::MODIFIER_INVCOLOR:
522 mod2.x = SubSat(Short4(0x1000), arg2->x);
523 mod2.y = SubSat(Short4(0x1000), arg2->y);
524 mod2.z = SubSat(Short4(0x1000), arg2->z);
525 mod2.w = SubSat(Short4(0x1000), arg2->w);
529 case TextureStage::MODIFIER_ALPHA:
537 case TextureStage::MODIFIER_INVALPHA:
538 mod2.x = SubSat(Short4(0x1000), arg2->w);
539 mod2.y = SubSat(Short4(0x1000), arg2->w);
540 mod2.z = SubSat(Short4(0x1000), arg2->w);
541 mod2.w = SubSat(Short4(0x1000), arg2->w);
549 switch(textureStage.thirdModifier)
551 case TextureStage::MODIFIER_COLOR:
553 case TextureStage::MODIFIER_INVCOLOR:
554 mod3.x = SubSat(Short4(0x1000), arg3->x);
555 mod3.y = SubSat(Short4(0x1000), arg3->y);
556 mod3.z = SubSat(Short4(0x1000), arg3->z);
557 mod3.w = SubSat(Short4(0x1000), arg3->w);
561 case TextureStage::MODIFIER_ALPHA:
569 case TextureStage::MODIFIER_INVALPHA:
570 mod3.x = SubSat(Short4(0x1000), arg3->w);
571 mod3.y = SubSat(Short4(0x1000), arg3->w);
572 mod3.z = SubSat(Short4(0x1000), arg3->w);
573 mod3.w = SubSat(Short4(0x1000), arg3->w);
581 switch(textureStage.stageOperation)
583 case TextureStage::STAGE_DISABLE:
585 case TextureStage::STAGE_SELECTARG1: // Arg1
590 case TextureStage::STAGE_SELECTARG2: // Arg2
595 case TextureStage::STAGE_SELECTARG3: // Arg3
600 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
601 res.x = MulHigh(arg1->x, arg2->x) << 4;
602 res.y = MulHigh(arg1->y, arg2->y) << 4;
603 res.z = MulHigh(arg1->z, arg2->z) << 4;
605 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
606 res.x = MulHigh(arg1->x, arg2->x) << 5;
607 res.y = MulHigh(arg1->y, arg2->y) << 5;
608 res.z = MulHigh(arg1->z, arg2->z) << 5;
610 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
611 res.x = MulHigh(arg1->x, arg2->x) << 6;
612 res.y = MulHigh(arg1->y, arg2->y) << 6;
613 res.z = MulHigh(arg1->z, arg2->z) << 6;
615 case TextureStage::STAGE_ADD: // Arg1 + Arg2
616 res.x = AddSat(arg1->x, arg2->x);
617 res.y = AddSat(arg1->y, arg2->y);
618 res.z = AddSat(arg1->z, arg2->z);
620 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
621 res.x = AddSat(arg1->x, arg2->x);
622 res.y = AddSat(arg1->y, arg2->y);
623 res.z = AddSat(arg1->z, arg2->z);
625 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
626 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
627 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
629 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
630 res.x = AddSat(arg1->x, arg2->x);
631 res.y = AddSat(arg1->y, arg2->y);
632 res.z = AddSat(arg1->z, arg2->z);
634 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
635 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
636 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
638 res.x = AddSat(res.x, res.x);
639 res.y = AddSat(res.y, res.y);
640 res.z = AddSat(res.z, res.z);
642 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
643 res.x = SubSat(arg1->x, arg2->x);
644 res.y = SubSat(arg1->y, arg2->y);
645 res.z = SubSat(arg1->z, arg2->z);
647 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
651 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
652 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
653 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
656 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
657 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
658 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
659 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
661 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
662 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
663 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
664 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
666 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
670 res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp);
671 res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp);
672 res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp);
678 res.x = AddSat(res.x, res.y);
679 res.x = AddSat(res.x, res.z);
682 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
683 res.x = Min(res.x, Short4(0x1000));
690 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
691 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x);
692 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y);
693 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z);
695 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
696 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
697 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
698 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
700 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
701 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
702 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
703 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
705 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
706 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
707 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
708 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
710 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
711 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
712 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
713 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
715 case TextureStage::STAGE_PREMODULATE:
720 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
721 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
722 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
723 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
725 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
726 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
727 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
728 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
730 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
734 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
735 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
736 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
739 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
743 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
744 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
745 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
748 case TextureStage::STAGE_BUMPENVMAP:
750 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
751 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
758 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
759 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
761 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
762 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
773 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
775 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
776 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
784 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
785 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
787 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
788 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
794 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
796 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
797 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
798 L = Min(L, Short4(0x1000));
812 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
814 switch(textureStage.firstArgumentAlpha)
816 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
817 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
818 case TextureStage::SOURCE_CURRENT: arg1 = ¤t; break;
819 case TextureStage::SOURCE_DIFFUSE: arg1 = &diffuse; break;
820 case TextureStage::SOURCE_SPECULAR: arg1 = &specular; break;
821 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
822 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
827 switch(textureStage.secondArgumentAlpha)
829 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
830 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
831 case TextureStage::SOURCE_CURRENT: arg2 = ¤t; break;
832 case TextureStage::SOURCE_DIFFUSE: arg2 = &diffuse; break;
833 case TextureStage::SOURCE_SPECULAR: arg2 = &specular; break;
834 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
835 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
840 switch(textureStage.thirdArgumentAlpha)
842 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
843 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
844 case TextureStage::SOURCE_CURRENT: arg3 = ¤t; break;
845 case TextureStage::SOURCE_DIFFUSE: arg3 = &diffuse; break;
846 case TextureStage::SOURCE_SPECULAR: arg3 = &specular; break;
847 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
848 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
853 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used
855 case TextureStage::MODIFIER_COLOR:
857 case TextureStage::MODIFIER_INVCOLOR:
858 mod1.w = SubSat(Short4(0x1000), arg1->w);
862 case TextureStage::MODIFIER_ALPHA:
865 case TextureStage::MODIFIER_INVALPHA:
866 mod1.w = SubSat(Short4(0x1000), arg1->w);
874 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used
876 case TextureStage::MODIFIER_COLOR:
878 case TextureStage::MODIFIER_INVCOLOR:
879 mod2.w = SubSat(Short4(0x1000), arg2->w);
883 case TextureStage::MODIFIER_ALPHA:
886 case TextureStage::MODIFIER_INVALPHA:
887 mod2.w = SubSat(Short4(0x1000), arg2->w);
895 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used
897 case TextureStage::MODIFIER_COLOR:
899 case TextureStage::MODIFIER_INVCOLOR:
900 mod3.w = SubSat(Short4(0x1000), arg3->w);
904 case TextureStage::MODIFIER_ALPHA:
907 case TextureStage::MODIFIER_INVALPHA:
908 mod3.w = SubSat(Short4(0x1000), arg3->w);
916 switch(textureStage.stageOperationAlpha)
918 case TextureStage::STAGE_DISABLE:
920 case TextureStage::STAGE_SELECTARG1: // Arg1
923 case TextureStage::STAGE_SELECTARG2: // Arg2
926 case TextureStage::STAGE_SELECTARG3: // Arg3
929 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
930 res.w = MulHigh(arg1->w, arg2->w) << 4;
932 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
933 res.w = MulHigh(arg1->w, arg2->w) << 5;
935 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
936 res.w = MulHigh(arg1->w, arg2->w) << 6;
938 case TextureStage::STAGE_ADD: // Arg1 + Arg2
939 res.w = AddSat(arg1->w, arg2->w);
941 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
942 res.w = AddSat(arg1->w, arg2->w);
943 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
945 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
946 res.w = AddSat(arg1->w, arg2->w);
947 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
948 res.w = AddSat(res.w, res.w);
950 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
951 res.w = SubSat(arg1->w, arg2->w);
953 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
957 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
960 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
961 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
963 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
964 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
966 case TextureStage::STAGE_DOT3:
967 break; // Already computed in color channel
968 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
969 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w);
971 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
972 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
974 case TextureStage::STAGE_BLENDFACTORALPHA:
975 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
977 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
978 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
980 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
981 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
983 case TextureStage::STAGE_PREMODULATE:
986 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
987 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
988 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
989 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
990 case TextureStage::STAGE_BUMPENVMAP:
991 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
992 break; // Invalid alpha operations
998 // Clamp result to [0, 1]
1000 switch(textureStage.stageOperation)
1002 case TextureStage::STAGE_DISABLE:
1003 case TextureStage::STAGE_SELECTARG1:
1004 case TextureStage::STAGE_SELECTARG2:
1005 case TextureStage::STAGE_SELECTARG3:
1006 case TextureStage::STAGE_MODULATE:
1007 case TextureStage::STAGE_MODULATE2X:
1008 case TextureStage::STAGE_MODULATE4X:
1009 case TextureStage::STAGE_ADD:
1010 case TextureStage::STAGE_MULTIPLYADD:
1011 case TextureStage::STAGE_LERP:
1012 case TextureStage::STAGE_BLENDCURRENTALPHA:
1013 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1014 case TextureStage::STAGE_BLENDFACTORALPHA:
1015 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1016 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1017 case TextureStage::STAGE_DOT3: // Already clamped
1018 case TextureStage::STAGE_PREMODULATE:
1019 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1020 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1021 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1022 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1023 case TextureStage::STAGE_BUMPENVMAP:
1024 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1025 if(state.textureStage[stage].cantUnderflow)
1027 break; // Can't go below zero
1029 case TextureStage::STAGE_ADDSIGNED:
1030 case TextureStage::STAGE_ADDSIGNED2X:
1031 case TextureStage::STAGE_SUBTRACT:
1032 case TextureStage::STAGE_ADDSMOOTH:
1033 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1034 res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1035 res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1041 switch(textureStage.stageOperationAlpha)
1043 case TextureStage::STAGE_DISABLE:
1044 case TextureStage::STAGE_SELECTARG1:
1045 case TextureStage::STAGE_SELECTARG2:
1046 case TextureStage::STAGE_SELECTARG3:
1047 case TextureStage::STAGE_MODULATE:
1048 case TextureStage::STAGE_MODULATE2X:
1049 case TextureStage::STAGE_MODULATE4X:
1050 case TextureStage::STAGE_ADD:
1051 case TextureStage::STAGE_MULTIPLYADD:
1052 case TextureStage::STAGE_LERP:
1053 case TextureStage::STAGE_BLENDCURRENTALPHA:
1054 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1055 case TextureStage::STAGE_BLENDFACTORALPHA:
1056 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1057 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1058 case TextureStage::STAGE_DOT3: // Already clamped
1059 case TextureStage::STAGE_PREMODULATE:
1060 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1061 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1062 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1063 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1064 case TextureStage::STAGE_BUMPENVMAP:
1065 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1066 if(state.textureStage[stage].cantUnderflow)
1068 break; // Can't go below zero
1070 case TextureStage::STAGE_ADDSIGNED:
1071 case TextureStage::STAGE_ADDSIGNED2X:
1072 case TextureStage::STAGE_SUBTRACT:
1073 case TextureStage::STAGE_ADDSMOOTH:
1074 res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1080 switch(textureStage.stageOperation)
1082 case TextureStage::STAGE_DISABLE:
1083 case TextureStage::STAGE_SELECTARG1:
1084 case TextureStage::STAGE_SELECTARG2:
1085 case TextureStage::STAGE_SELECTARG3:
1086 case TextureStage::STAGE_MODULATE:
1087 case TextureStage::STAGE_SUBTRACT:
1088 case TextureStage::STAGE_ADDSMOOTH:
1089 case TextureStage::STAGE_LERP:
1090 case TextureStage::STAGE_BLENDCURRENTALPHA:
1091 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1092 case TextureStage::STAGE_BLENDFACTORALPHA:
1093 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1094 case TextureStage::STAGE_DOT3: // Already clamped
1095 case TextureStage::STAGE_PREMODULATE:
1096 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1097 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1098 case TextureStage::STAGE_BUMPENVMAP:
1099 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1100 break; // Can't go above one
1101 case TextureStage::STAGE_MODULATE2X:
1102 case TextureStage::STAGE_MODULATE4X:
1103 case TextureStage::STAGE_ADD:
1104 case TextureStage::STAGE_ADDSIGNED:
1105 case TextureStage::STAGE_ADDSIGNED2X:
1106 case TextureStage::STAGE_MULTIPLYADD:
1107 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1108 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1109 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1110 res.x = Min(res.x, Short4(0x1000));
1111 res.y = Min(res.y, Short4(0x1000));
1112 res.z = Min(res.z, Short4(0x1000));
1118 switch(textureStage.stageOperationAlpha)
1120 case TextureStage::STAGE_DISABLE:
1121 case TextureStage::STAGE_SELECTARG1:
1122 case TextureStage::STAGE_SELECTARG2:
1123 case TextureStage::STAGE_SELECTARG3:
1124 case TextureStage::STAGE_MODULATE:
1125 case TextureStage::STAGE_SUBTRACT:
1126 case TextureStage::STAGE_ADDSMOOTH:
1127 case TextureStage::STAGE_LERP:
1128 case TextureStage::STAGE_BLENDCURRENTALPHA:
1129 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1130 case TextureStage::STAGE_BLENDFACTORALPHA:
1131 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1132 case TextureStage::STAGE_DOT3: // Already clamped
1133 case TextureStage::STAGE_PREMODULATE:
1134 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1135 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1136 case TextureStage::STAGE_BUMPENVMAP:
1137 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1138 break; // Can't go above one
1139 case TextureStage::STAGE_MODULATE2X:
1140 case TextureStage::STAGE_MODULATE4X:
1141 case TextureStage::STAGE_ADD:
1142 case TextureStage::STAGE_ADDSIGNED:
1143 case TextureStage::STAGE_ADDSIGNED2X:
1144 case TextureStage::STAGE_MULTIPLYADD:
1145 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1146 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1147 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1148 res.w = Min(res.w, Short4(0x1000));
1154 switch(textureStage.destinationArgument)
1156 case TextureStage::DESTINATION_CURRENT:
1162 case TextureStage::DESTINATION_TEMP:
1173 void PixelPipeline::fogBlend(Vector4s ¤t, Float4 &f)
1175 if(!state.fogActive)
1180 if(state.pixelFogMode != FOG_NONE)
1185 UShort4 fog = convertFixed16(f, true);
1187 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
1188 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
1189 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
1191 UShort4 invFog = UShort4(0xFFFFu) - fog;
1193 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0]))));
1194 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1]))));
1195 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2]))));
1198 void PixelPipeline::specularPixel(Vector4s ¤t, Vector4s &specular)
1200 if(!state.specularAdd)
1205 current.x = AddSat(current.x, specular.x);
1206 current.y = AddSat(current.y, specular.y);
1207 current.z = AddSat(current.z, specular.z);
1210 void PixelPipeline::sampleTexture(Vector4s &c, int coordinates, int stage, bool project)
1212 Float4 x = v[2 + coordinates].x;
1213 Float4 y = v[2 + coordinates].y;
1214 Float4 z = v[2 + coordinates].z;
1215 Float4 w = v[2 + coordinates].w;
1225 sampleTexture(c, stage, x, y, z, w, project);
1228 void PixelPipeline::sampleTexture(Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
1231 Long texTime = Ticks();
1237 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
1241 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy);
1245 Float4 rq = reciprocal(q);
1247 Float4 u_q = u * rq;
1248 Float4 v_q = v * rq;
1249 Float4 w_q = w * rq;
1251 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy);
1255 cycles[PERF_TEX] += Ticks() - texTime;
1259 Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
1261 return RoundShort4(cf * Float4(0x1000));
1264 void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf)
1266 cs.x = convertFixed12(cf.x);
1267 cs.y = convertFixed12(cf.y);
1268 cs.z = convertFixed12(cf.z);
1269 cs.w = convertFixed12(cf.w);
1272 Float4 PixelPipeline::convertSigned12(Short4 &cs)
1274 return Float4(cs) * Float4(1.0f / 0x0FFE);
1277 void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs)
1279 cf.x = convertSigned12(cs.x);
1280 cf.y = convertSigned12(cs.y);
1281 cf.z = convertSigned12(cs.z);
1282 cf.w = convertSigned12(cs.w);
1285 void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst)
1289 case Shader::PARAMETER_TEMP:
1290 if(dst.mask & 0x1) rs[dst.index].x = d.x;
1291 if(dst.mask & 0x2) rs[dst.index].y = d.y;
1292 if(dst.mask & 0x4) rs[dst.index].z = d.z;
1293 if(dst.mask & 0x8) rs[dst.index].w = d.w;
1295 case Shader::PARAMETER_INPUT:
1296 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1297 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1298 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1299 if(dst.mask & 0x8) vs[dst.index].w = d.w;
1301 case Shader::PARAMETER_CONST: ASSERT(false); break;
1302 case Shader::PARAMETER_TEXTURE:
1303 if(dst.mask & 0x1) ts[dst.index].x = d.x;
1304 if(dst.mask & 0x2) ts[dst.index].y = d.y;
1305 if(dst.mask & 0x4) ts[dst.index].z = d.z;
1306 if(dst.mask & 0x8) ts[dst.index].w = d.w;
1308 case Shader::PARAMETER_COLOROUT:
1309 if(dst.mask & 0x1) vs[dst.index].x = d.x;
1310 if(dst.mask & 0x2) vs[dst.index].y = d.y;
1311 if(dst.mask & 0x4) vs[dst.index].z = d.z;
1312 if(dst.mask & 0x8) vs[dst.index].w = d.w;
1319 Vector4s PixelPipeline::fetchRegister(const Src &src)
1326 if(src.type == Shader::PARAMETER_CONST)
1328 c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0]));
1329 c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1]));
1330 c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2]));
1331 c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3]));
1336 case Shader::PARAMETER_TEMP: reg = &rs[i]; break;
1337 case Shader::PARAMETER_INPUT: reg = &vs[i]; break;
1338 case Shader::PARAMETER_CONST: reg = &c; break;
1339 case Shader::PARAMETER_TEXTURE: reg = &ts[i]; break;
1340 case Shader::PARAMETER_VOID: return rs[0]; // Dummy
1341 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy
1342 default: ASSERT(false); return rs[0];
1345 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
1346 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
1347 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
1348 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
1352 switch(src.modifier)
1354 case Shader::MODIFIER_NONE:
1360 case Shader::MODIFIER_BIAS:
1361 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1362 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1363 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1364 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1366 case Shader::MODIFIER_BIAS_NEGATE:
1367 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
1368 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
1369 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
1370 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
1372 case Shader::MODIFIER_COMPLEMENT:
1373 mod.x = SubSat(Short4(0x1000), x);
1374 mod.y = SubSat(Short4(0x1000), y);
1375 mod.z = SubSat(Short4(0x1000), z);
1376 mod.w = SubSat(Short4(0x1000), w);
1378 case Shader::MODIFIER_NEGATE:
1384 case Shader::MODIFIER_X2:
1385 mod.x = AddSat(x, x);
1386 mod.y = AddSat(y, y);
1387 mod.z = AddSat(z, z);
1388 mod.w = AddSat(w, w);
1390 case Shader::MODIFIER_X2_NEGATE:
1391 mod.x = -AddSat(x, x);
1392 mod.y = -AddSat(y, y);
1393 mod.z = -AddSat(z, z);
1394 mod.w = -AddSat(w, w);
1396 case Shader::MODIFIER_SIGN:
1397 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1398 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1399 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1400 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1401 mod.x = AddSat(mod.x, mod.x);
1402 mod.y = AddSat(mod.y, mod.y);
1403 mod.z = AddSat(mod.z, mod.z);
1404 mod.w = AddSat(mod.w, mod.w);
1406 case Shader::MODIFIER_SIGN_NEGATE:
1407 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
1408 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
1409 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
1410 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
1411 mod.x = AddSat(mod.x, mod.x);
1412 mod.y = AddSat(mod.y, mod.y);
1413 mod.z = AddSat(mod.z, mod.z);
1414 mod.w = AddSat(mod.w, mod.w);
1416 case Shader::MODIFIER_DZ:
1421 // Projection performed by texture sampler
1423 case Shader::MODIFIER_DW:
1428 // Projection performed by texture sampler
1434 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
1436 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1437 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1438 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1439 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
1445 void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0)
1453 void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1455 dst.x = AddSat(src0.x, src1.x);
1456 dst.y = AddSat(src0.y, src1.y);
1457 dst.z = AddSat(src0.z, src1.z);
1458 dst.w = AddSat(src0.w, src1.w);
1461 void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1463 dst.x = SubSat(src0.x, src1.x);
1464 dst.y = SubSat(src0.y, src1.y);
1465 dst.z = SubSat(src0.z, src1.z);
1466 dst.w = SubSat(src0.w, src1.w);
1469 void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1471 // FIXME: Long fixed-point multiply fixup
1472 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1474 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1476 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1477 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1480 void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1482 // FIXME: Long fixed-point multiply fixup
1483 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
1485 dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);
1487 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
1488 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
1491 void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1496 // FIXME: Long fixed-point multiply fixup
1497 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1498 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1499 t0 = AddSat(t0, t1);
1500 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1501 t0 = AddSat(t0, t1);
1509 void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1514 // FIXME: Long fixed-point multiply fixup
1515 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1516 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1517 t0 = AddSat(t0, t1);
1518 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1519 t0 = AddSat(t0, t1);
1520 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1521 t0 = AddSat(t0, t1);
1529 void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1531 // FIXME: Long fixed-point multiply fixup
1532 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1534 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1536 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1537 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1540 void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
1546 if(state.interpolant[2 + coordinate].component & 0x01)
1548 uw = Max(u, Float4(0.0f));
1549 uw = Min(uw, Float4(1.0f));
1550 dst.x = convertFixed12(uw);
1554 dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1557 if(state.interpolant[2 + coordinate].component & 0x02)
1559 vw = Max(v, Float4(0.0f));
1560 vw = Min(vw, Float4(1.0f));
1561 dst.y = convertFixed12(vw);
1565 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1568 if(state.interpolant[2 + coordinate].component & 0x04)
1570 sw = Max(s, Float4(0.0f));
1571 sw = Min(sw, Float4(1.0f));
1572 dst.z = convertFixed12(sw);
1576 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1579 dst.w = Short4(0x1000);
1582 void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
1594 if(state.interpolant[2 + coordinate].component & 0x01)
1596 uw *= Float4(0x1000);
1597 uw = Max(uw, Float4(-0x8000));
1598 uw = Min(uw, Float4(0x7FFF));
1599 dst.x = RoundShort4(uw);
1603 dst.x = Short4(0x0000);
1606 if(state.interpolant[2 + coordinate].component & 0x02)
1608 vw *= Float4(0x1000);
1609 vw = Max(vw, Float4(-0x8000));
1610 vw = Min(vw, Float4(0x7FFF));
1611 dst.y = RoundShort4(vw);
1615 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1618 if(state.interpolant[2 + coordinate].component & 0x04)
1620 sw *= Float4(0x1000);
1621 sw = Max(sw, Float4(-0x8000));
1622 sw = Min(sw, Float4(0x7FFF));
1623 dst.z = RoundShort4(sw);
1627 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1631 void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
1633 TEXM3X3PAD(u, v, s, src, 0, false);
1635 Short4 t0 = RoundShort4(u_ * Float4(0x1000));
1643 void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
1645 TEXM3X3PAD(u, v, s, src0, 0, false);
1650 sampleTexture(dst, stage, u_, v_, w_, w_);
1653 void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
1655 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
1656 SignMask(CmpNLT(v, Float4(0.0f))) &
1657 SignMask(CmpNLT(s, Float4(0.0f)));
1659 for(unsigned int q = 0; q < state.multiSample; q++)
1665 void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
1667 Short4 test = src.x | src.y | src.z;
1668 Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
1670 for(unsigned int q = 0; q < state.multiSample; q++)
1676 void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
1678 sampleTexture(dst, sampler, u, v, s, s, project);
1681 void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project)
1683 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
1684 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
1685 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
1687 sampleTexture(dst, sampler, u, v, s, s, project);
1690 void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1692 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1693 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1698 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1699 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1701 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1702 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1708 sampleTexture(dst, stage, u_, v_, s, s);
1711 void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1713 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1714 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1719 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1720 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1722 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1723 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1729 sampleTexture(dst, stage, u_, v_, s, s);
1734 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
1736 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
1737 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1738 L = Min(L, Short4(0x1000));
1740 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
1741 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
1742 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
1745 void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage)
1747 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
1748 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1749 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1751 sampleTexture(dst, stage, u, v, s, s);
1754 void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage)
1756 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1757 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1760 sampleTexture(dst, stage, u, v, s, s);
1763 void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage)
1765 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1766 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1767 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1769 sampleTexture(dst, stage, u, v, s, s);
1772 void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
1774 TEXM3X2PAD(u, v, s, src, 1, signedScaling);
1777 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
1782 void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1784 TEXM3X3PAD(u, v, s, src0, component, signedScaling);
1787 void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1789 TEXM3X2PAD(u, v, s, src0, 1, signedScaling);
1793 sampleTexture(dst, stage, u_, v_, w_, w_);
1796 void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
1798 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1800 dst.x = RoundShort4(u_ * Float4(0x1000));
1801 dst.y = RoundShort4(v_ * Float4(0x1000));
1802 dst.z = RoundShort4(w_ * Float4(0x1000));
1803 dst.w = Short4(0x1000);
1806 void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1808 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers?
1814 previousScaling = signedScaling;
1817 Float4 x = U * u + V * v + W * s;
1819 x *= Float4(1.0f / 0x1000);
1823 case 0: u_ = x; break;
1824 case 1: v_ = x; break;
1825 case 2: w_ = x; break;
1826 default: ASSERT(false);
1830 void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
1832 TEXM3X3PAD(u, v, s, src0, 2, false);
1834 Float4 E[3]; // Eye vector
1836 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
1837 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
1838 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
1845 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1864 sampleTexture(dst, stage, u__, v__, w__, w__);
1867 void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1869 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1871 sampleTexture(dst, stage, u_, v_, w_, w_);
1874 void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0)
1876 TEXM3X3PAD(x, y, z, src0, 2, false);
1878 Float4 E[3]; // Eye vector
1880 E[0] = v[2 + stage - 2].w;
1881 E[1] = v[2 + stage - 1].w;
1882 E[2] = v[2 + stage - 0].w;
1889 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1908 sampleTexture(dst, stage, u__, v__, w__, w__);
1911 void PixelPipeline::TEXDEPTH()
1913 u_ = Float4(rs[5].x);
1914 v_ = Float4(rs[5].y);
1917 u_ *= Rcp_pp(v_); // FIXME: Set result to 1.0 when division by zero
1922 void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1924 { Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; };
1925 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; };
1926 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; };
1927 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; };
1930 void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1932 { Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; };
1933 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; };
1934 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; };
1935 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; };
1938 void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
1943 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
1944 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1945 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
1946 t0 = AddSat(t0, t1);
1947 t0 = AddSat(t0, src0.x);
1950 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
1951 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
1952 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
1953 t0 = AddSat(t0, t1);
1954 t0 = AddSat(t0, src0.y);