OSDN Git Service

e6237358a6a4a6244cb16adb196c5cd18809ce48
[android-x86/external-swiftshader.git] / src / Shader / PixelPipeline.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "PixelPipeline.hpp"
16 #include "SamplerCore.hpp"
17 #include "Renderer/Renderer.hpp"
18
19 namespace sw
20 {
21         extern bool postBlendSRGB;
22
23         void PixelPipeline::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w)
24         {
25                 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(v[0].x); else diffuse.x = Short4(0x1000);
26                 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(v[0].y); else diffuse.y = Short4(0x1000);
27                 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(v[0].z); else diffuse.z = Short4(0x1000);
28                 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(v[0].w); else diffuse.w = Short4(0x1000);
29
30                 if(state.color[1].component & 0x1) specular.x = convertFixed12(v[1].x); else specular.x = Short4(0x0000);
31                 if(state.color[1].component & 0x2) specular.y = convertFixed12(v[1].y); else specular.y = Short4(0x0000);
32                 if(state.color[1].component & 0x4) specular.z = convertFixed12(v[1].z); else specular.z = Short4(0x0000);
33                 if(state.color[1].component & 0x8) specular.w = convertFixed12(v[1].w); else specular.w = Short4(0x0000);
34         }
35
36         void PixelPipeline::fixedFunction()
37         {
38                 current = diffuse;
39                 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
40
41                 for(int stage = 0; stage < 8; stage++)
42                 {
43                         if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
44                         {
45                                 break;
46                         }
47
48                         Vector4s texture;
49
50                         if(state.textureStage[stage].usesTexture)
51                         {
52                                 texture = sampleTexture(stage, stage);
53                         }
54
55                         blendTexture(temp, texture, stage);
56                 }
57
58                 specularPixel(current, specular);
59         }
60
61         void PixelPipeline::applyShader(Int cMask[4])
62         {
63                 if(!shader)
64                 {
65                         fixedFunction();
66                         return;
67                 }
68
69                 int pad = 0;        // Count number of texm3x3pad instructions
70                 Vector4s dPairing;   // Destination for first pairing instruction
71
72                 for(size_t i = 0; i < shader->getLength(); i++)
73                 {
74                         const Shader::Instruction *instruction = shader->getInstruction(i);
75                         Shader::Opcode opcode = instruction->opcode;
76
77                         //      #ifndef NDEBUG   // FIXME: Centralize debug output control
78                         //              shader->printInstruction(i, "debug.txt");
79                         //      #endif
80
81                         if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
82                         {
83                                 continue;
84                         }
85
86                         const Dst &dst = instruction->dst;
87                         const Src &src0 = instruction->src[0];
88                         const Src &src1 = instruction->src[1];
89                         const Src &src2 = instruction->src[2];
90
91                         unsigned short shaderModel = shader->getShaderModel();
92                         bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue;   // First instruction of pair
93                         bool coissue = instruction->coissue;                                                              // Second instruction of pair
94
95                         Vector4s d;
96                         Vector4s s0;
97                         Vector4s s1;
98                         Vector4s s2;
99
100                         if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
101                         if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
102                         if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
103
104                         Float4 x = shaderModel < 0x0104 ? v[2 + dst.index].x : v[2 + src0.index].x;
105                         Float4 y = shaderModel < 0x0104 ? v[2 + dst.index].y : v[2 + src0.index].y;
106                         Float4 z = shaderModel < 0x0104 ? v[2 + dst.index].z : v[2 + src0.index].z;
107                         Float4 w = shaderModel < 0x0104 ? v[2 + dst.index].w : v[2 + src0.index].w;
108
109                         switch(opcode)
110                         {
111                         case Shader::OPCODE_PS_1_0: break;
112                         case Shader::OPCODE_PS_1_1: break;
113                         case Shader::OPCODE_PS_1_2: break;
114                         case Shader::OPCODE_PS_1_3: break;
115                         case Shader::OPCODE_PS_1_4: break;
116
117                         case Shader::OPCODE_DEF:    break;
118
119                         case Shader::OPCODE_NOP:    break;
120                         case Shader::OPCODE_MOV: MOV(d, s0);         break;
121                         case Shader::OPCODE_ADD: ADD(d, s0, s1);     break;
122                         case Shader::OPCODE_SUB: SUB(d, s0, s1);     break;
123                         case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
124                         case Shader::OPCODE_MUL: MUL(d, s0, s1);     break;
125                         case Shader::OPCODE_DP3: DP3(d, s0, s1);     break;
126                         case Shader::OPCODE_DP4: DP4(d, s0, s1);     break;
127                         case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
128                         case Shader::OPCODE_TEXCOORD:
129                                 if(shaderModel < 0x0104)
130                                 {
131                                         TEXCOORD(d, x, y, z, dst.index);
132                         }
133                                 else
134                                 {
135                                         if((src0.swizzle & 0x30) == 0x20)   // .xyz
136                                         {
137                                                 TEXCRD(d, x, y, z, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
138                                         }
139                                         else   // .xwy
140                                         {
141                                                 TEXCRD(d, x, y, w, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
142                                         }
143                                 }
144                                 break;
145                         case Shader::OPCODE_TEXKILL:
146                                 if(shaderModel < 0x0104)
147                                 {
148                                         TEXKILL(cMask, x, y, z);
149                                 }
150                                 else if(shaderModel == 0x0104)
151                                 {
152                                         if(dst.type == Shader::PARAMETER_TEXTURE)
153                                         {
154                                                 TEXKILL(cMask, x, y, z);
155                                         }
156                                         else
157                                         {
158                                                 TEXKILL(cMask, rs[dst.index]);
159                                         }
160                                 }
161                                 else ASSERT(false);
162                                 break;
163                         case Shader::OPCODE_TEX:
164                                 if(shaderModel < 0x0104)
165                                 {
166                                         TEX(d, x, y, z, dst.index, false);
167                                 }
168                                 else if(shaderModel == 0x0104)
169                                 {
170                                         if(src0.type == Shader::PARAMETER_TEXTURE)
171                                         {
172                                                 if((src0.swizzle & 0x30) == 0x20)   // .xyz
173                                                 {
174                                                         TEX(d, x, y, z, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
175                                                 }
176                                                 else   // .xyw
177                                                 {
178                                                         TEX(d, x, y, w, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
179                                                 }
180                                         }
181                                         else
182                                         {
183                                                 TEXLD(d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
184                                         }
185                                 }
186                                 else ASSERT(false);
187                                 break;
188                         case Shader::OPCODE_TEXBEM:       TEXBEM(d, s0, x, y, z, dst.index);                                             break;
189                         case Shader::OPCODE_TEXBEML:      TEXBEML(d, s0, x, y, z, dst.index);                                            break;
190                         case Shader::OPCODE_TEXREG2AR:    TEXREG2AR(d, s0, dst.index);                                                   break;
191                         case Shader::OPCODE_TEXREG2GB:    TEXREG2GB(d, s0, dst.index);                                                   break;
192                         case Shader::OPCODE_TEXM3X2PAD:   TEXM3X2PAD(x, y, z, s0, 0, src0.modifier == Shader::MODIFIER_SIGN);            break;
193                         case Shader::OPCODE_TEXM3X2TEX:   TEXM3X2TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
194                         case Shader::OPCODE_TEXM3X3PAD:   TEXM3X3PAD(x, y, z, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN);    break;
195                         case Shader::OPCODE_TEXM3X3TEX:   TEXM3X3TEX(d, x, y, z, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
196                         case Shader::OPCODE_TEXM3X3SPEC:  TEXM3X3SPEC(d, x, y, z, dst.index, s0, s1);                                    break;
197                         case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(d, x, y, z, dst.index, s0);                                       break;
198                         case Shader::OPCODE_CND:          CND(d, s0, s1, s2);                                                            break;
199                         case Shader::OPCODE_TEXREG2RGB:   TEXREG2RGB(d, s0, dst.index);                                                  break;
200                         case Shader::OPCODE_TEXDP3TEX:    TEXDP3TEX(d, x, y, z, dst.index, s0);                                          break;
201                         case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN);          break;
202                         case Shader::OPCODE_TEXDP3:       TEXDP3(d, x, y, z, s0);                                                        break;
203                         case Shader::OPCODE_TEXM3X3:      TEXM3X3(d, x, y, z, s0, src0.modifier == Shader::MODIFIER_SIGN);               break;
204                         case Shader::OPCODE_TEXDEPTH:     TEXDEPTH();                                                                    break;
205                         case Shader::OPCODE_CMP0:         CMP(d, s0, s1, s2);                                                            break;
206                         case Shader::OPCODE_BEM:          BEM(d, s0, s1, dst.index);                                                     break;
207                         case Shader::OPCODE_PHASE:                                                                                       break;
208                         case Shader::OPCODE_END:                                                                                         break;
209                         default:
210                                 ASSERT(false);
211                         }
212
213                         if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
214                         {
215                                 if(dst.shift > 0)
216                                 {
217                                         if(dst.mask & 0x1) { d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x); }
218                                         if(dst.mask & 0x2) { d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y); }
219                                         if(dst.mask & 0x4) { d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z); }
220                                         if(dst.mask & 0x8) { d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w); }
221                                 }
222                                 else if(dst.shift < 0)
223                                 {
224                                         if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
225                                         if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
226                                         if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
227                                         if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
228                                 }
229
230                                 if(dst.saturate)
231                                 {
232                                         if(dst.mask & 0x1) { d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000)); }
233                                         if(dst.mask & 0x2) { d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000)); }
234                                         if(dst.mask & 0x4) { d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000)); }
235                                         if(dst.mask & 0x8) { d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000)); }
236                                 }
237
238                                 if(pairing)
239                                 {
240                                         if(dst.mask & 0x1) dPairing.x = d.x;
241                                         if(dst.mask & 0x2) dPairing.y = d.y;
242                                         if(dst.mask & 0x4) dPairing.z = d.z;
243                                         if(dst.mask & 0x8) dPairing.w = d.w;
244                                 }
245
246                                 if(coissue)
247                                 {
248                                         const Dst &dst = shader->getInstruction(i - 1)->dst;
249
250                                         writeDestination(dPairing, dst);
251                                 }
252
253                                 if(!pairing)
254                                 {
255                                         writeDestination(d, dst);
256                                 }
257                         }
258                 }
259         }
260
261         Bool PixelPipeline::alphaTest(Int cMask[4])
262         {
263                 current.x = Min(current.x, Short4(0x0FFF)); current.x = Max(current.x, Short4(0x0000));
264                 current.y = Min(current.y, Short4(0x0FFF)); current.y = Max(current.y, Short4(0x0000));
265                 current.z = Min(current.z, Short4(0x0FFF)); current.z = Max(current.z, Short4(0x0000));
266                 current.w = Min(current.w, Short4(0x0FFF)); current.w = Max(current.w, Short4(0x0000));
267
268                 if(!state.alphaTestActive())
269                 {
270                         return true;
271                 }
272
273                 Int aMask;
274
275                 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
276                 {
277                         PixelRoutine::alphaTest(aMask, current.w);
278
279                         for(unsigned int q = 0; q < state.multiSample; q++)
280                         {
281                                 cMask[q] &= aMask;
282                         }
283                 }
284                 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
285                 {
286                         Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
287
288                         alphaToCoverage(cMask, alpha);
289                 }
290                 else ASSERT(false);
291
292                 Int pass = cMask[0];
293
294                 for(unsigned int q = 1; q < state.multiSample; q++)
295                 {
296                         pass = pass | cMask[q];
297                 }
298
299                 return pass != 0x0;
300         }
301
302         void PixelPipeline::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
303         {
304                 if(!state.colorWriteActive(0))
305                 {
306                         return;
307                 }
308
309                 Vector4f oC;
310
311                 switch(state.targetFormat[0])
312                 {
313                 case FORMAT_R5G6B5:
314                 case FORMAT_X8R8G8B8:
315                 case FORMAT_X8B8G8R8:
316                 case FORMAT_A8R8G8B8:
317                 case FORMAT_A8B8G8R8:
318                 case FORMAT_A8:
319                 case FORMAT_G16R16:
320                 case FORMAT_A16B16G16R16:
321                         if(!postBlendSRGB && state.writeSRGB)
322                         {
323                                 linearToSRGB12_16(current);
324                         }
325                         else
326                         {
327                                 current.x <<= 4;
328                                 current.y <<= 4;
329                                 current.z <<= 4;
330                                 current.w <<= 4;
331                         }
332
333                         if(state.targetFormat[0] == FORMAT_R5G6B5)
334                         {
335                                 current.x &= Short4(0xF800u);
336                                 current.y &= Short4(0xFC00u);
337                                 current.z &= Short4(0xF800u);
338                         }
339
340                         fogBlend(current, fog);
341
342                         for(unsigned int q = 0; q < state.multiSample; q++)
343                         {
344                                 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
345                                 Vector4s color = current;
346
347                                 if(state.multiSampleMask & (1 << q))
348                                 {
349                                         alphaBlend(0, buffer, color, x);
350                                         logicOperation(0, buffer, color, x);
351                                         writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
352                                 }
353                         }
354                         break;
355                 case FORMAT_R32F:
356                 case FORMAT_G32R32F:
357                 case FORMAT_X32B32G32R32F:
358                 case FORMAT_A32B32G32R32F:
359         //      case FORMAT_X32B32G32R32F_UNSIGNED:   // Not renderable in any fixed-function API.
360                         convertSigned12(oC, current);
361                         PixelRoutine::fogBlend(oC, fog);
362
363                         for(unsigned int q = 0; q < state.multiSample; q++)
364                         {
365                                 Pointer<Byte> buffer = cBuffer[0] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[0]));
366                                 Vector4f color = oC;
367
368                                 if(state.multiSampleMask & (1 << q))
369                                 {
370                                         alphaBlend(0, buffer, color, x);
371                                         writeColor(0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
372                                 }
373                         }
374                         break;
375                 default:
376                         ASSERT(false);
377                 }
378         }
379
380         void PixelPipeline::blendTexture(Vector4s &temp, Vector4s &texture, int stage)
381         {
382                 Vector4s *arg1 = nullptr;
383                 Vector4s *arg2 = nullptr;
384                 Vector4s *arg3 = nullptr;
385                 Vector4s res;
386
387                 Vector4s constant;
388                 Vector4s tfactor;
389
390                 const TextureStage::State &textureStage = state.textureStage[stage];
391
392                 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
393                    textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
394                    textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
395                    textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
396                    textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
397                    textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
398                 {
399                         constant.x = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[0]));
400                         constant.y = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[1]));
401                         constant.z = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[2]));
402                         constant.w = *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].constantColor4[3]));
403                 }
404
405                 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
406                    textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
407                    textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
408                    textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
409                    textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
410                    textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
411                 {
412                         tfactor.x = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[0]));
413                         tfactor.y = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[1]));
414                         tfactor.z = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[2]));
415                         tfactor.w = *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]));
416                 }
417
418                 // Premodulate
419                 if(stage > 0 && textureStage.usesTexture)
420                 {
421                         if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
422                         {
423                                 current.x = MulHigh(current.x, texture.x) << 4;
424                                 current.y = MulHigh(current.y, texture.y) << 4;
425                                 current.z = MulHigh(current.z, texture.z) << 4;
426                         }
427
428                         if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
429                         {
430                                 current.w = MulHigh(current.w, texture.w) << 4;
431                         }
432                 }
433
434                 if(luminance)
435                 {
436                         texture.x = MulHigh(texture.x, L) << 4;
437                         texture.y = MulHigh(texture.y, L) << 4;
438                         texture.z = MulHigh(texture.z, L) << 4;
439
440                         luminance = false;
441                 }
442
443                 switch(textureStage.firstArgument)
444                 {
445                 case TextureStage::SOURCE_TEXTURE:      arg1 = &texture;    break;
446                 case TextureStage::SOURCE_CONSTANT:     arg1 = &constant;   break;
447                 case TextureStage::SOURCE_CURRENT:      arg1 = &current;  break;
448                 case TextureStage::SOURCE_DIFFUSE:      arg1 = &diffuse;  break;
449                 case TextureStage::SOURCE_SPECULAR:     arg1 = &specular; break;
450                 case TextureStage::SOURCE_TEMP:         arg1 = &temp;       break;
451                 case TextureStage::SOURCE_TFACTOR:      arg1 = &tfactor;    break;
452                 default:
453                         ASSERT(false);
454                 }
455
456                 switch(textureStage.secondArgument)
457                 {
458                 case TextureStage::SOURCE_TEXTURE:      arg2 = &texture;    break;
459                 case TextureStage::SOURCE_CONSTANT:     arg2 = &constant;   break;
460                 case TextureStage::SOURCE_CURRENT:      arg2 = &current;  break;
461                 case TextureStage::SOURCE_DIFFUSE:      arg2 = &diffuse;  break;
462                 case TextureStage::SOURCE_SPECULAR:     arg2 = &specular; break;
463                 case TextureStage::SOURCE_TEMP:         arg2 = &temp;       break;
464                 case TextureStage::SOURCE_TFACTOR:      arg2 = &tfactor;    break;
465                 default:
466                         ASSERT(false);
467                 }
468
469                 switch(textureStage.thirdArgument)
470                 {
471                 case TextureStage::SOURCE_TEXTURE:      arg3 = &texture;    break;
472                 case TextureStage::SOURCE_CONSTANT:     arg3 = &constant;   break;
473                 case TextureStage::SOURCE_CURRENT:      arg3 = &current;  break;
474                 case TextureStage::SOURCE_DIFFUSE:      arg3 = &diffuse;  break;
475                 case TextureStage::SOURCE_SPECULAR:     arg3 = &specular; break;
476                 case TextureStage::SOURCE_TEMP:         arg3 = &temp;       break;
477                 case TextureStage::SOURCE_TFACTOR:      arg3 = &tfactor;    break;
478                 default:
479                         ASSERT(false);
480                 }
481
482                 Vector4s mod1;
483                 Vector4s mod2;
484                 Vector4s mod3;
485
486                 switch(textureStage.firstModifier)
487                 {
488                 case TextureStage::MODIFIER_COLOR:
489                         break;
490                 case TextureStage::MODIFIER_INVCOLOR:
491                         mod1.x = SubSat(Short4(0x1000), arg1->x);
492                         mod1.y = SubSat(Short4(0x1000), arg1->y);
493                         mod1.z = SubSat(Short4(0x1000), arg1->z);
494                         mod1.w = SubSat(Short4(0x1000), arg1->w);
495
496                         arg1 = &mod1;
497                         break;
498                 case TextureStage::MODIFIER_ALPHA:
499                         mod1.x = arg1->w;
500                         mod1.y = arg1->w;
501                         mod1.z = arg1->w;
502                         mod1.w = arg1->w;
503
504                         arg1 = &mod1;
505                         break;
506                 case TextureStage::MODIFIER_INVALPHA:
507                         mod1.x = SubSat(Short4(0x1000), arg1->w);
508                         mod1.y = SubSat(Short4(0x1000), arg1->w);
509                         mod1.z = SubSat(Short4(0x1000), arg1->w);
510                         mod1.w = SubSat(Short4(0x1000), arg1->w);
511
512                         arg1 = &mod1;
513                         break;
514                 default:
515                         ASSERT(false);
516                 }
517
518                 switch(textureStage.secondModifier)
519                 {
520                 case TextureStage::MODIFIER_COLOR:
521                         break;
522                 case TextureStage::MODIFIER_INVCOLOR:
523                         mod2.x = SubSat(Short4(0x1000), arg2->x);
524                         mod2.y = SubSat(Short4(0x1000), arg2->y);
525                         mod2.z = SubSat(Short4(0x1000), arg2->z);
526                         mod2.w = SubSat(Short4(0x1000), arg2->w);
527
528                         arg2 = &mod2;
529                         break;
530                 case TextureStage::MODIFIER_ALPHA:
531                         mod2.x = arg2->w;
532                         mod2.y = arg2->w;
533                         mod2.z = arg2->w;
534                         mod2.w = arg2->w;
535
536                         arg2 = &mod2;
537                         break;
538                 case TextureStage::MODIFIER_INVALPHA:
539                         mod2.x = SubSat(Short4(0x1000), arg2->w);
540                         mod2.y = SubSat(Short4(0x1000), arg2->w);
541                         mod2.z = SubSat(Short4(0x1000), arg2->w);
542                         mod2.w = SubSat(Short4(0x1000), arg2->w);
543
544                         arg2 = &mod2;
545                         break;
546                 default:
547                         ASSERT(false);
548                 }
549
550                 switch(textureStage.thirdModifier)
551                 {
552                 case TextureStage::MODIFIER_COLOR:
553                         break;
554                 case TextureStage::MODIFIER_INVCOLOR:
555                         mod3.x = SubSat(Short4(0x1000), arg3->x);
556                         mod3.y = SubSat(Short4(0x1000), arg3->y);
557                         mod3.z = SubSat(Short4(0x1000), arg3->z);
558                         mod3.w = SubSat(Short4(0x1000), arg3->w);
559
560                         arg3 = &mod3;
561                         break;
562                 case TextureStage::MODIFIER_ALPHA:
563                         mod3.x = arg3->w;
564                         mod3.y = arg3->w;
565                         mod3.z = arg3->w;
566                         mod3.w = arg3->w;
567
568                         arg3 = &mod3;
569                         break;
570                 case TextureStage::MODIFIER_INVALPHA:
571                         mod3.x = SubSat(Short4(0x1000), arg3->w);
572                         mod3.y = SubSat(Short4(0x1000), arg3->w);
573                         mod3.z = SubSat(Short4(0x1000), arg3->w);
574                         mod3.w = SubSat(Short4(0x1000), arg3->w);
575
576                         arg3 = &mod3;
577                         break;
578                 default:
579                         ASSERT(false);
580                 }
581
582                 switch(textureStage.stageOperation)
583                 {
584                 case TextureStage::STAGE_DISABLE:
585                         break;
586                 case TextureStage::STAGE_SELECTARG1: // Arg1
587                         res.x = arg1->x;
588                         res.y = arg1->y;
589                         res.z = arg1->z;
590                         break;
591                 case TextureStage::STAGE_SELECTARG2: // Arg2
592                         res.x = arg2->x;
593                         res.y = arg2->y;
594                         res.z = arg2->z;
595                         break;
596                 case TextureStage::STAGE_SELECTARG3: // Arg3
597                         res.x = arg3->x;
598                         res.y = arg3->y;
599                         res.z = arg3->z;
600                         break;
601                 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
602                         res.x = MulHigh(arg1->x, arg2->x) << 4;
603                         res.y = MulHigh(arg1->y, arg2->y) << 4;
604                         res.z = MulHigh(arg1->z, arg2->z) << 4;
605                         break;
606                 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
607                         res.x = MulHigh(arg1->x, arg2->x) << 5;
608                         res.y = MulHigh(arg1->y, arg2->y) << 5;
609                         res.z = MulHigh(arg1->z, arg2->z) << 5;
610                         break;
611                 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
612                         res.x = MulHigh(arg1->x, arg2->x) << 6;
613                         res.y = MulHigh(arg1->y, arg2->y) << 6;
614                         res.z = MulHigh(arg1->z, arg2->z) << 6;
615                         break;
616                 case TextureStage::STAGE_ADD: // Arg1 + Arg2
617                         res.x = AddSat(arg1->x, arg2->x);
618                         res.y = AddSat(arg1->y, arg2->y);
619                         res.z = AddSat(arg1->z, arg2->z);
620                         break;
621                 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
622                         res.x = AddSat(arg1->x, arg2->x);
623                         res.y = AddSat(arg1->y, arg2->y);
624                         res.z = AddSat(arg1->z, arg2->z);
625
626                         res.x = SubSat(res.x, Short4(0x0800));
627                         res.y = SubSat(res.y, Short4(0x0800));
628                         res.z = SubSat(res.z, Short4(0x0800));
629                         break;
630                 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
631                         res.x = AddSat(arg1->x, arg2->x);
632                         res.y = AddSat(arg1->y, arg2->y);
633                         res.z = AddSat(arg1->z, arg2->z);
634
635                         res.x = SubSat(res.x, Short4(0x0800));
636                         res.y = SubSat(res.y, Short4(0x0800));
637                         res.z = SubSat(res.z, Short4(0x0800));
638
639                         res.x = AddSat(res.x, res.x);
640                         res.y = AddSat(res.y, res.y);
641                         res.z = AddSat(res.z, res.z);
642                         break;
643                 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
644                         res.x = SubSat(arg1->x, arg2->x);
645                         res.y = SubSat(arg1->y, arg2->y);
646                         res.z = SubSat(arg1->z, arg2->z);
647                         break;
648                 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
649                         {
650                                 Short4 tmp;
651
652                                 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
653                                 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
654                                 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
655                         }
656                         break;
657                 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
658                         res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
659                         res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
660                         res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
661                         break;
662                 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
663                         res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
664                         res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
665                         res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
666                         break;
667                 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
668                         {
669                                 Short4 tmp;
670
671                                 res.x = SubSat(arg1->x, Short4(0x0800)); tmp = SubSat(arg2->x, Short4(0x0800)); res.x = MulHigh(res.x, tmp);
672                                 res.y = SubSat(arg1->y, Short4(0x0800)); tmp = SubSat(arg2->y, Short4(0x0800)); res.y = MulHigh(res.y, tmp);
673                                 res.z = SubSat(arg1->z, Short4(0x0800)); tmp = SubSat(arg2->z, Short4(0x0800)); res.z = MulHigh(res.z, tmp);
674
675                                 res.x = res.x << 6;
676                                 res.y = res.y << 6;
677                                 res.z = res.z << 6;
678
679                                 res.x = AddSat(res.x, res.y);
680                                 res.x = AddSat(res.x, res.z);
681
682                                 // Clamp to [0, 1]
683                                 res.x = Max(res.x, Short4(0x0000));
684                                 res.x = Min(res.x, Short4(0x1000));
685
686                                 res.y = res.x;
687                                 res.z = res.x;
688                                 res.w = res.x;
689                         }
690                         break;
691                 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
692                         res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, current.w) << 4; res.x = AddSat(res.x, arg2->x);
693                         res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, current.w) << 4; res.y = AddSat(res.y, arg2->y);
694                         res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, current.w) << 4; res.z = AddSat(res.z, arg2->z);
695                         break;
696                 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
697                         res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
698                         res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
699                         res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
700                         break;
701                 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
702                         res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
703                         res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
704                         res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
705                         break;
706                 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
707                         res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
708                         res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
709                         res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
710                         break;
711                 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
712                         res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
713                         res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
714                         res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
715                         break;
716                 case TextureStage::STAGE_PREMODULATE:
717                         res.x = arg1->x;
718                         res.y = arg1->y;
719                         res.z = arg1->z;
720                         break;
721                 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
722                         res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
723                         res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
724                         res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
725                         break;
726                 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
727                         res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
728                         res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
729                         res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
730                         break;
731                 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
732                         {
733                                 Short4 tmp;
734
735                                 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
736                                 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
737                                 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
738                         }
739                         break;
740                 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
741                         {
742                                 Short4 tmp;
743
744                                 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
745                                 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
746                                 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
747                         }
748                         break;
749                 case TextureStage::STAGE_BUMPENVMAP:
750                         {
751                                 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
752                                 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
753
754                                 Float4 du2;
755                                 Float4 dv2;
756
757                                 du2 = du;
758                                 dv2 = dv;
759                                 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
760                                 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
761                                 du += dv2;
762                                 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
763                                 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
764                                 dv += du2;
765
766                                 perturbate = true;
767
768                                 res.x = current.x;
769                                 res.y = current.y;
770                                 res.z = current.z;
771                                 res.w = current.w;
772                         }
773                         break;
774                 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
775                         {
776                                 du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
777                                 dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
778
779                                 Float4 du2;
780                                 Float4 dv2;
781
782                                 du2 = du;
783                                 dv2 = dv;
784
785                                 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
786                                 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
787                                 du += dv2;
788                                 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
789                                 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
790                                 dv += du2;
791
792                                 perturbate = true;
793
794                                 L = texture.z;
795                                 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
796                                 L = L << 4;
797                                 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
798                                 L = Max(L, Short4(0x0000));
799                                 L = Min(L, Short4(0x1000));
800
801                                 luminance = true;
802
803                                 res.x = current.x;
804                                 res.y = current.y;
805                                 res.z = current.z;
806                                 res.w = current.w;
807                         }
808                         break;
809                 default:
810                         ASSERT(false);
811                 }
812
813                 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
814                 {
815                         switch(textureStage.firstArgumentAlpha)
816                         {
817                         case TextureStage::SOURCE_TEXTURE:      arg1 = &texture;                break;
818                         case TextureStage::SOURCE_CONSTANT:     arg1 = &constant;               break;
819                         case TextureStage::SOURCE_CURRENT:      arg1 = &current;                break;
820                         case TextureStage::SOURCE_DIFFUSE:      arg1 = &diffuse;                break;
821                         case TextureStage::SOURCE_SPECULAR:     arg1 = &specular;               break;
822                         case TextureStage::SOURCE_TEMP:         arg1 = &temp;                   break;
823                         case TextureStage::SOURCE_TFACTOR:      arg1 = &tfactor;                break;
824                         default:
825                                 ASSERT(false);
826                         }
827
828                         switch(textureStage.secondArgumentAlpha)
829                         {
830                         case TextureStage::SOURCE_TEXTURE:      arg2 = &texture;                break;
831                         case TextureStage::SOURCE_CONSTANT:     arg2 = &constant;               break;
832                         case TextureStage::SOURCE_CURRENT:      arg2 = &current;                break;
833                         case TextureStage::SOURCE_DIFFUSE:      arg2 = &diffuse;                break;
834                         case TextureStage::SOURCE_SPECULAR:     arg2 = &specular;               break;
835                         case TextureStage::SOURCE_TEMP:         arg2 = &temp;                   break;
836                         case TextureStage::SOURCE_TFACTOR:      arg2 = &tfactor;                break;
837                         default:
838                                 ASSERT(false);
839                         }
840
841                         switch(textureStage.thirdArgumentAlpha)
842                         {
843                         case TextureStage::SOURCE_TEXTURE:      arg3 = &texture;                break;
844                         case TextureStage::SOURCE_CONSTANT:     arg3 = &constant;               break;
845                         case TextureStage::SOURCE_CURRENT:      arg3 = &current;                break;
846                         case TextureStage::SOURCE_DIFFUSE:      arg3 = &diffuse;                break;
847                         case TextureStage::SOURCE_SPECULAR:     arg3 = &specular;               break;
848                         case TextureStage::SOURCE_TEMP:         arg3 = &temp;                   break;
849                         case TextureStage::SOURCE_TFACTOR:      arg3 = &tfactor;                break;
850                         default:
851                                 ASSERT(false);
852                         }
853
854                         switch(textureStage.firstModifierAlpha)   // FIXME: Check if actually used
855                         {
856                         case TextureStage::MODIFIER_COLOR:
857                                 break;
858                         case TextureStage::MODIFIER_INVCOLOR:
859                                 mod1.w = SubSat(Short4(0x1000), arg1->w);
860
861                                 arg1 = &mod1;
862                                 break;
863                         case TextureStage::MODIFIER_ALPHA:
864                                 // Redudant
865                                 break;
866                         case TextureStage::MODIFIER_INVALPHA:
867                                 mod1.w = SubSat(Short4(0x1000), arg1->w);
868
869                                 arg1 = &mod1;
870                                 break;
871                         default:
872                                 ASSERT(false);
873                         }
874
875                         switch(textureStage.secondModifierAlpha)   // FIXME: Check if actually used
876                         {
877                         case TextureStage::MODIFIER_COLOR:
878                                 break;
879                         case TextureStage::MODIFIER_INVCOLOR:
880                                 mod2.w = SubSat(Short4(0x1000), arg2->w);
881
882                                 arg2 = &mod2;
883                                 break;
884                         case TextureStage::MODIFIER_ALPHA:
885                                 // Redudant
886                                 break;
887                         case TextureStage::MODIFIER_INVALPHA:
888                                 mod2.w = SubSat(Short4(0x1000), arg2->w);
889
890                                 arg2 = &mod2;
891                                 break;
892                         default:
893                                 ASSERT(false);
894                         }
895
896                         switch(textureStage.thirdModifierAlpha)   // FIXME: Check if actually used
897                         {
898                         case TextureStage::MODIFIER_COLOR:
899                                 break;
900                         case TextureStage::MODIFIER_INVCOLOR:
901                                 mod3.w = SubSat(Short4(0x1000), arg3->w);
902
903                                 arg3 = &mod3;
904                                 break;
905                         case TextureStage::MODIFIER_ALPHA:
906                                 // Redudant
907                                 break;
908                         case TextureStage::MODIFIER_INVALPHA:
909                                 mod3.w = SubSat(Short4(0x1000), arg3->w);
910
911                                 arg3 = &mod3;
912                                 break;
913                         default:
914                                 ASSERT(false);
915                         }
916
917                         switch(textureStage.stageOperationAlpha)
918                         {
919                         case TextureStage::STAGE_DISABLE:
920                                 break;
921                         case TextureStage::STAGE_SELECTARG1: // Arg1
922                                 res.w = arg1->w;
923                                 break;
924                         case TextureStage::STAGE_SELECTARG2: // Arg2
925                                 res.w = arg2->w;
926                                 break;
927                         case TextureStage::STAGE_SELECTARG3: // Arg3
928                                 res.w = arg3->w;
929                                 break;
930                         case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
931                                 res.w = MulHigh(arg1->w, arg2->w) << 4;
932                                 break;
933                         case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
934                                 res.w = MulHigh(arg1->w, arg2->w) << 5;
935                                 break;
936                         case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
937                                 res.w = MulHigh(arg1->w, arg2->w) << 6;
938                                 break;
939                         case TextureStage::STAGE_ADD: // Arg1 + Arg2
940                                 res.w = AddSat(arg1->w, arg2->w);
941                                 break;
942                         case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
943                                 res.w = AddSat(arg1->w, arg2->w);
944                                 res.w = SubSat(res.w, Short4(0x0800));
945                                 break;
946                         case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
947                                 res.w = AddSat(arg1->w, arg2->w);
948                                 res.w = SubSat(res.w, Short4(0x0800));
949                                 res.w = AddSat(res.w, res.w);
950                                 break;
951                         case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
952                                 res.w = SubSat(arg1->w, arg2->w);
953                                 break;
954                         case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
955                                 {
956                                         Short4 tmp;
957
958                                         tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
959                                 }
960                                 break;
961                         case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
962                                 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
963                                 break;
964                         case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
965                                 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
966                                 break;
967                         case TextureStage::STAGE_DOT3:
968                                 break;   // Already computed in color channel
969                         case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
970                                 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, current.w) << 4; res.w = AddSat(res.w, arg2->w);
971                                 break;
972                         case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
973                                 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
974                                 break;
975                         case TextureStage::STAGE_BLENDFACTORALPHA:
976                                 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(data + OFFSET(DrawData, factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
977                                 break;
978                         case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
979                                 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
980                                 break;
981                         case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
982                                 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
983                                 break;
984                         case TextureStage::STAGE_PREMODULATE:
985                                 res.w = arg1->w;
986                                 break;
987                         case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
988                         case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
989                         case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
990                         case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
991                         case TextureStage::STAGE_BUMPENVMAP:
992                         case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
993                                 break;   // Invalid alpha operations
994                         default:
995                                 ASSERT(false);
996                         }
997                 }
998
999                 // Clamp result to [0, 1]
1000
1001                 switch(textureStage.stageOperation)
1002                 {
1003                 case TextureStage::STAGE_DISABLE:
1004                 case TextureStage::STAGE_SELECTARG1:
1005                 case TextureStage::STAGE_SELECTARG2:
1006                 case TextureStage::STAGE_SELECTARG3:
1007                 case TextureStage::STAGE_MODULATE:
1008                 case TextureStage::STAGE_MODULATE2X:
1009                 case TextureStage::STAGE_MODULATE4X:
1010                 case TextureStage::STAGE_ADD:
1011                 case TextureStage::STAGE_MULTIPLYADD:
1012                 case TextureStage::STAGE_LERP:
1013                 case TextureStage::STAGE_BLENDCURRENTALPHA:
1014                 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1015                 case TextureStage::STAGE_BLENDFACTORALPHA:
1016                 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1017                 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1018                 case TextureStage::STAGE_DOT3:   // Already clamped
1019                 case TextureStage::STAGE_PREMODULATE:
1020                 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1021                 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1022                 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1023                 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1024                 case TextureStage::STAGE_BUMPENVMAP:
1025                 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1026                         if(state.textureStage[stage].cantUnderflow)
1027                         {
1028                                 break;   // Can't go below zero
1029                         }
1030                 case TextureStage::STAGE_ADDSIGNED:
1031                 case TextureStage::STAGE_ADDSIGNED2X:
1032                 case TextureStage::STAGE_SUBTRACT:
1033                 case TextureStage::STAGE_ADDSMOOTH:
1034                         res.x = Max(res.x, Short4(0x0000));
1035                         res.y = Max(res.y, Short4(0x0000));
1036                         res.z = Max(res.z, Short4(0x0000));
1037                         break;
1038                 default:
1039                         ASSERT(false);
1040                 }
1041
1042                 switch(textureStage.stageOperationAlpha)
1043                 {
1044                 case TextureStage::STAGE_DISABLE:
1045                 case TextureStage::STAGE_SELECTARG1:
1046                 case TextureStage::STAGE_SELECTARG2:
1047                 case TextureStage::STAGE_SELECTARG3:
1048                 case TextureStage::STAGE_MODULATE:
1049                 case TextureStage::STAGE_MODULATE2X:
1050                 case TextureStage::STAGE_MODULATE4X:
1051                 case TextureStage::STAGE_ADD:
1052                 case TextureStage::STAGE_MULTIPLYADD:
1053                 case TextureStage::STAGE_LERP:
1054                 case TextureStage::STAGE_BLENDCURRENTALPHA:
1055                 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1056                 case TextureStage::STAGE_BLENDFACTORALPHA:
1057                 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1058                 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1059                 case TextureStage::STAGE_DOT3:   // Already clamped
1060                 case TextureStage::STAGE_PREMODULATE:
1061                 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1062                 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1063                 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1064                 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1065                 case TextureStage::STAGE_BUMPENVMAP:
1066                 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1067                         if(state.textureStage[stage].cantUnderflow)
1068                         {
1069                                 break;   // Can't go below zero
1070                         }
1071                 case TextureStage::STAGE_ADDSIGNED:
1072                 case TextureStage::STAGE_ADDSIGNED2X:
1073                 case TextureStage::STAGE_SUBTRACT:
1074                 case TextureStage::STAGE_ADDSMOOTH:
1075                         res.w = Max(res.w, Short4(0x0000));
1076                         break;
1077                 default:
1078                         ASSERT(false);
1079                 }
1080
1081                 switch(textureStage.stageOperation)
1082                 {
1083                 case TextureStage::STAGE_DISABLE:
1084                 case TextureStage::STAGE_SELECTARG1:
1085                 case TextureStage::STAGE_SELECTARG2:
1086                 case TextureStage::STAGE_SELECTARG3:
1087                 case TextureStage::STAGE_MODULATE:
1088                 case TextureStage::STAGE_SUBTRACT:
1089                 case TextureStage::STAGE_ADDSMOOTH:
1090                 case TextureStage::STAGE_LERP:
1091                 case TextureStage::STAGE_BLENDCURRENTALPHA:
1092                 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1093                 case TextureStage::STAGE_BLENDFACTORALPHA:
1094                 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1095                 case TextureStage::STAGE_DOT3:   // Already clamped
1096                 case TextureStage::STAGE_PREMODULATE:
1097                 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1098                 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1099                 case TextureStage::STAGE_BUMPENVMAP:
1100                 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1101                         break;   // Can't go above one
1102                 case TextureStage::STAGE_MODULATE2X:
1103                 case TextureStage::STAGE_MODULATE4X:
1104                 case TextureStage::STAGE_ADD:
1105                 case TextureStage::STAGE_ADDSIGNED:
1106                 case TextureStage::STAGE_ADDSIGNED2X:
1107                 case TextureStage::STAGE_MULTIPLYADD:
1108                 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1109                 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1110                 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1111                         res.x = Min(res.x, Short4(0x1000));
1112                         res.y = Min(res.y, Short4(0x1000));
1113                         res.z = Min(res.z, Short4(0x1000));
1114                         break;
1115                 default:
1116                         ASSERT(false);
1117                 }
1118
1119                 switch(textureStage.stageOperationAlpha)
1120                 {
1121                 case TextureStage::STAGE_DISABLE:
1122                 case TextureStage::STAGE_SELECTARG1:
1123                 case TextureStage::STAGE_SELECTARG2:
1124                 case TextureStage::STAGE_SELECTARG3:
1125                 case TextureStage::STAGE_MODULATE:
1126                 case TextureStage::STAGE_SUBTRACT:
1127                 case TextureStage::STAGE_ADDSMOOTH:
1128                 case TextureStage::STAGE_LERP:
1129                 case TextureStage::STAGE_BLENDCURRENTALPHA:
1130                 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1131                 case TextureStage::STAGE_BLENDFACTORALPHA:
1132                 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1133                 case TextureStage::STAGE_DOT3:   // Already clamped
1134                 case TextureStage::STAGE_PREMODULATE:
1135                 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1136                 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1137                 case TextureStage::STAGE_BUMPENVMAP:
1138                 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1139                         break;   // Can't go above one
1140                 case TextureStage::STAGE_MODULATE2X:
1141                 case TextureStage::STAGE_MODULATE4X:
1142                 case TextureStage::STAGE_ADD:
1143                 case TextureStage::STAGE_ADDSIGNED:
1144                 case TextureStage::STAGE_ADDSIGNED2X:
1145                 case TextureStage::STAGE_MULTIPLYADD:
1146                 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1147                 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1148                 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1149                         res.w = Min(res.w, Short4(0x1000));
1150                         break;
1151                 default:
1152                         ASSERT(false);
1153                 }
1154
1155                 switch(textureStage.destinationArgument)
1156                 {
1157                 case TextureStage::DESTINATION_CURRENT:
1158                         current.x = res.x;
1159                         current.y = res.y;
1160                         current.z = res.z;
1161                         current.w = res.w;
1162                         break;
1163                 case TextureStage::DESTINATION_TEMP:
1164                         temp.x = res.x;
1165                         temp.y = res.y;
1166                         temp.z = res.z;
1167                         temp.w = res.w;
1168                         break;
1169                 default:
1170                         ASSERT(false);
1171                 }
1172         }
1173
1174         void PixelPipeline::fogBlend(Vector4s &current, Float4 &f)
1175         {
1176                 if(!state.fogActive)
1177                 {
1178                         return;
1179                 }
1180
1181                 if(state.pixelFogMode != FOG_NONE)
1182                 {
1183                         pixelFog(f);
1184                 }
1185
1186                 UShort4 fog = convertFixed16(f, true);
1187
1188                 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
1189                 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
1190                 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
1191
1192                 UShort4 invFog = UShort4(0xFFFFu) - fog;
1193
1194                 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[0]))));
1195                 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[1]))));
1196                 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(data + OFFSET(DrawData, fog.color4[2]))));
1197         }
1198
1199         void PixelPipeline::specularPixel(Vector4s &current, Vector4s &specular)
1200         {
1201                 if(!state.specularAdd)
1202                 {
1203                         return;
1204                 }
1205
1206                 current.x = AddSat(current.x, specular.x);
1207                 current.y = AddSat(current.y, specular.y);
1208                 current.z = AddSat(current.z, specular.z);
1209         }
1210
1211         Vector4s PixelPipeline::sampleTexture(int coordinates, int stage, bool project)
1212         {
1213                 Float4 x = v[2 + coordinates].x;
1214                 Float4 y = v[2 + coordinates].y;
1215                 Float4 z = v[2 + coordinates].z;
1216                 Float4 w = v[2 + coordinates].w;
1217
1218                 if(perturbate)
1219                 {
1220                         x += du;
1221                         y += dv;
1222
1223                         perturbate = false;
1224                 }
1225
1226                 return sampleTexture(stage, x, y, z, w, project);
1227         }
1228
1229         Vector4s PixelPipeline::sampleTexture(int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project)
1230         {
1231                 Vector4s c;
1232
1233                 #if PERF_PROFILE
1234                         Long texTime = Ticks();
1235                 #endif
1236
1237                 Vector4f dsx;
1238                 Vector4f dsy;
1239
1240                 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + stage * sizeof(Texture);
1241
1242                 if(!project)
1243                 {
1244                         c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u, v, w, q, q, dsx, dsy);
1245                 }
1246                 else
1247                 {
1248                         Float4 rq = reciprocal(q);
1249
1250                         Float4 u_q = u * rq;
1251                         Float4 v_q = v * rq;
1252                         Float4 w_q = w * rq;
1253
1254                         c = SamplerCore(constants, state.sampler[stage]).sampleTexture(texture, u_q, v_q, w_q, q, q, dsx, dsy);
1255                 }
1256
1257                 #if PERF_PROFILE
1258                         cycles[PERF_TEX] += Ticks() - texTime;
1259                 #endif
1260
1261                 return c;
1262         }
1263
1264         Short4 PixelPipeline::convertFixed12(RValue<Float4> cf)
1265         {
1266                 return RoundShort4(cf * Float4(0x1000));
1267         }
1268
1269         void PixelPipeline::convertFixed12(Vector4s &cs, Vector4f &cf)
1270         {
1271                 cs.x = convertFixed12(cf.x);
1272                 cs.y = convertFixed12(cf.y);
1273                 cs.z = convertFixed12(cf.z);
1274                 cs.w = convertFixed12(cf.w);
1275         }
1276
1277         Float4 PixelPipeline::convertSigned12(Short4 &cs)
1278         {
1279                 return Float4(cs) * Float4(1.0f / 0x0FFE);
1280         }
1281
1282         void PixelPipeline::convertSigned12(Vector4f &cf, Vector4s &cs)
1283         {
1284                 cf.x = convertSigned12(cs.x);
1285                 cf.y = convertSigned12(cs.y);
1286                 cf.z = convertSigned12(cs.z);
1287                 cf.w = convertSigned12(cs.w);
1288         }
1289
1290         void PixelPipeline::writeDestination(Vector4s &d, const Dst &dst)
1291         {
1292                 switch(dst.type)
1293                 {
1294                 case Shader::PARAMETER_TEMP:
1295                         if(dst.mask & 0x1) rs[dst.index].x = d.x;
1296                         if(dst.mask & 0x2) rs[dst.index].y = d.y;
1297                         if(dst.mask & 0x4) rs[dst.index].z = d.z;
1298                         if(dst.mask & 0x8) rs[dst.index].w = d.w;
1299                         break;
1300                 case Shader::PARAMETER_INPUT:
1301                         if(dst.mask & 0x1) vs[dst.index].x = d.x;
1302                         if(dst.mask & 0x2) vs[dst.index].y = d.y;
1303                         if(dst.mask & 0x4) vs[dst.index].z = d.z;
1304                         if(dst.mask & 0x8) vs[dst.index].w = d.w;
1305                         break;
1306                 case Shader::PARAMETER_CONST: ASSERT(false); break;
1307                 case Shader::PARAMETER_TEXTURE:
1308                         if(dst.mask & 0x1) ts[dst.index].x = d.x;
1309                         if(dst.mask & 0x2) ts[dst.index].y = d.y;
1310                         if(dst.mask & 0x4) ts[dst.index].z = d.z;
1311                         if(dst.mask & 0x8) ts[dst.index].w = d.w;
1312                         break;
1313                 case Shader::PARAMETER_COLOROUT:
1314                         if(dst.mask & 0x1) vs[dst.index].x = d.x;
1315                         if(dst.mask & 0x2) vs[dst.index].y = d.y;
1316                         if(dst.mask & 0x4) vs[dst.index].z = d.z;
1317                         if(dst.mask & 0x8) vs[dst.index].w = d.w;
1318                         break;
1319                 default:
1320                         ASSERT(false);
1321                 }
1322         }
1323
1324         Vector4s PixelPipeline::fetchRegister(const Src &src)
1325         {
1326                 Vector4s *reg;
1327                 int i = src.index;
1328
1329                 Vector4s c;
1330
1331                 if(src.type == Shader::PARAMETER_CONST)
1332                 {
1333                         c.x = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][0]));
1334                         c.y = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][1]));
1335                         c.z = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][2]));
1336                         c.w = *Pointer<Short4>(data + OFFSET(DrawData, ps.cW[i][3]));
1337                 }
1338
1339                 switch(src.type)
1340                 {
1341                 case Shader::PARAMETER_TEMP:          reg = &rs[i]; break;
1342                 case Shader::PARAMETER_INPUT:         reg = &vs[i]; break;
1343                 case Shader::PARAMETER_CONST:         reg = &c;       break;
1344                 case Shader::PARAMETER_TEXTURE:       reg = &ts[i]; break;
1345                 case Shader::PARAMETER_VOID:          return rs[0]; // Dummy
1346                 case Shader::PARAMETER_FLOAT4LITERAL: return rs[0]; // Dummy
1347                 default: ASSERT(false); return rs[0];
1348                 }
1349
1350                 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
1351                 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
1352                 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
1353                 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
1354
1355                 Vector4s mod;
1356
1357                 switch(src.modifier)
1358                 {
1359                 case Shader::MODIFIER_NONE:
1360                         mod.x = x;
1361                         mod.y = y;
1362                         mod.z = z;
1363                         mod.w = w;
1364                         break;
1365                 case Shader::MODIFIER_BIAS:
1366                         mod.x = SubSat(x, Short4(0x0800));
1367                         mod.y = SubSat(y, Short4(0x0800));
1368                         mod.z = SubSat(z, Short4(0x0800));
1369                         mod.w = SubSat(w, Short4(0x0800));
1370                         break;
1371                 case Shader::MODIFIER_BIAS_NEGATE:
1372                         mod.x = SubSat(Short4(0x0800), x);
1373                         mod.y = SubSat(Short4(0x0800), y);
1374                         mod.z = SubSat(Short4(0x0800), z);
1375                         mod.w = SubSat(Short4(0x0800), w);
1376                         break;
1377                 case Shader::MODIFIER_COMPLEMENT:
1378                         mod.x = SubSat(Short4(0x1000), x);
1379                         mod.y = SubSat(Short4(0x1000), y);
1380                         mod.z = SubSat(Short4(0x1000), z);
1381                         mod.w = SubSat(Short4(0x1000), w);
1382                         break;
1383                 case Shader::MODIFIER_NEGATE:
1384                         mod.x = -x;
1385                         mod.y = -y;
1386                         mod.z = -z;
1387                         mod.w = -w;
1388                         break;
1389                 case Shader::MODIFIER_X2:
1390                         mod.x = AddSat(x, x);
1391                         mod.y = AddSat(y, y);
1392                         mod.z = AddSat(z, z);
1393                         mod.w = AddSat(w, w);
1394                         break;
1395                 case Shader::MODIFIER_X2_NEGATE:
1396                         mod.x = -AddSat(x, x);
1397                         mod.y = -AddSat(y, y);
1398                         mod.z = -AddSat(z, z);
1399                         mod.w = -AddSat(w, w);
1400                         break;
1401                 case Shader::MODIFIER_SIGN:
1402                         mod.x = SubSat(x, Short4(0x0800));
1403                         mod.y = SubSat(y, Short4(0x0800));
1404                         mod.z = SubSat(z, Short4(0x0800));
1405                         mod.w = SubSat(w, Short4(0x0800));
1406                         mod.x = AddSat(mod.x, mod.x);
1407                         mod.y = AddSat(mod.y, mod.y);
1408                         mod.z = AddSat(mod.z, mod.z);
1409                         mod.w = AddSat(mod.w, mod.w);
1410                         break;
1411                 case Shader::MODIFIER_SIGN_NEGATE:
1412                         mod.x = SubSat(Short4(0x0800), x);
1413                         mod.y = SubSat(Short4(0x0800), y);
1414                         mod.z = SubSat(Short4(0x0800), z);
1415                         mod.w = SubSat(Short4(0x0800), w);
1416                         mod.x = AddSat(mod.x, mod.x);
1417                         mod.y = AddSat(mod.y, mod.y);
1418                         mod.z = AddSat(mod.z, mod.z);
1419                         mod.w = AddSat(mod.w, mod.w);
1420                         break;
1421                 case Shader::MODIFIER_DZ:
1422                         mod.x = x;
1423                         mod.y = y;
1424                         mod.z = z;
1425                         mod.w = w;
1426                         // Projection performed by texture sampler
1427                         break;
1428                 case Shader::MODIFIER_DW:
1429                         mod.x = x;
1430                         mod.y = y;
1431                         mod.z = z;
1432                         mod.w = w;
1433                         // Projection performed by texture sampler
1434                         break;
1435                 default:
1436                         ASSERT(false);
1437                 }
1438
1439                 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
1440                 {
1441                         mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000));
1442                         mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000));
1443                         mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000));
1444                         mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000));
1445                 }
1446
1447                 return mod;
1448         }
1449
1450         void PixelPipeline::MOV(Vector4s &dst, Vector4s &src0)
1451         {
1452                 dst.x = src0.x;
1453                 dst.y = src0.y;
1454                 dst.z = src0.z;
1455                 dst.w = src0.w;
1456         }
1457
1458         void PixelPipeline::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1459         {
1460                 dst.x = AddSat(src0.x, src1.x);
1461                 dst.y = AddSat(src0.y, src1.y);
1462                 dst.z = AddSat(src0.z, src1.z);
1463                 dst.w = AddSat(src0.w, src1.w);
1464         }
1465
1466         void PixelPipeline::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1467         {
1468                 dst.x = SubSat(src0.x, src1.x);
1469                 dst.y = SubSat(src0.y, src1.y);
1470                 dst.z = SubSat(src0.z, src1.z);
1471                 dst.w = SubSat(src0.w, src1.w);
1472         }
1473
1474         void PixelPipeline::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1475         {
1476                 // FIXME: Long fixed-point multiply fixup
1477                 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1478                 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y); }
1479                 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1480                 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1481         }
1482
1483         void PixelPipeline::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1484         {
1485                 // FIXME: Long fixed-point multiply fixup
1486                 { dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); }
1487                 { dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); }
1488                 { dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); }
1489                 { dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); }
1490         }
1491
1492         void PixelPipeline::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1493         {
1494                 Short4 t0;
1495                 Short4 t1;
1496
1497                 // FIXME: Long fixed-point multiply fixup
1498                 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1499                 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1500                 t0 = AddSat(t0, t1);
1501                 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1502                 t0 = AddSat(t0, t1);
1503
1504                 dst.x = t0;
1505                 dst.y = t0;
1506                 dst.z = t0;
1507                 dst.w = t0;
1508         }
1509
1510         void PixelPipeline::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
1511         {
1512                 Short4 t0;
1513                 Short4 t1;
1514
1515                 // FIXME: Long fixed-point multiply fixup
1516                 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
1517                 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1518                 t0 = AddSat(t0, t1);
1519                 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1520                 t0 = AddSat(t0, t1);
1521                 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
1522                 t0 = AddSat(t0, t1);
1523
1524                 dst.x = t0;
1525                 dst.y = t0;
1526                 dst.z = t0;
1527                 dst.w = t0;
1528         }
1529
1530         void PixelPipeline::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1531         {
1532                 // FIXME: Long fixed-point multiply fixup
1533                 { dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x); }
1534                 {
1535                 dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);
1536         }
1537                 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z); }
1538                 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w); }
1539         }
1540
1541         void PixelPipeline::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
1542         {
1543                 Float4 uw;
1544                 Float4 vw;
1545                 Float4 sw;
1546
1547                 if(state.interpolant[2 + coordinate].component & 0x01)
1548                 {
1549                         uw = Max(u, Float4(0.0f));
1550                         uw = Min(uw, Float4(1.0f));
1551                         dst.x = convertFixed12(uw);
1552                 }
1553                 else
1554                 {
1555                         dst.x = Short4(0x0000);
1556                 }
1557
1558                 if(state.interpolant[2 + coordinate].component & 0x02)
1559                 {
1560                         vw = Max(v, Float4(0.0f));
1561                         vw = Min(vw, Float4(1.0f));
1562                         dst.y = convertFixed12(vw);
1563                 }
1564                 else
1565                 {
1566                         dst.y = Short4(0x0000);
1567                 }
1568
1569                 if(state.interpolant[2 + coordinate].component & 0x04)
1570                 {
1571                         sw = Max(s, Float4(0.0f));
1572                         sw = Min(sw, Float4(1.0f));
1573                         dst.z = convertFixed12(sw);
1574                 }
1575                 else
1576                 {
1577                         dst.z = Short4(0x0000);
1578                 }
1579
1580                 dst.w = Short4(0x1000);
1581         }
1582
1583         void PixelPipeline::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
1584         {
1585                 Float4 uw = u;
1586                 Float4 vw = v;
1587                 Float4 sw = s;
1588
1589                 if(project)
1590                 {
1591                         uw *= Rcp_pp(s);
1592                         vw *= Rcp_pp(s);
1593                 }
1594
1595                 if(state.interpolant[2 + coordinate].component & 0x01)
1596                 {
1597                         uw *= Float4(0x1000);
1598                         uw = Max(uw, Float4(-0x8000));
1599                         uw = Min(uw, Float4(0x7FFF));
1600                         dst.x = RoundShort4(uw);
1601                 }
1602                 else
1603                 {
1604                         dst.x = Short4(0x0000);
1605                 }
1606
1607                 if(state.interpolant[2 + coordinate].component & 0x02)
1608                 {
1609                         vw *= Float4(0x1000);
1610                         vw = Max(vw, Float4(-0x8000));
1611                         vw = Min(vw, Float4(0x7FFF));
1612                         dst.y = RoundShort4(vw);
1613                 }
1614                 else
1615                 {
1616                         dst.y = Short4(0x0000);
1617                 }
1618
1619                 if(state.interpolant[2 + coordinate].component & 0x04)
1620                 {
1621                         sw *= Float4(0x1000);
1622                         sw = Max(sw, Float4(-0x8000));
1623                         sw = Min(sw, Float4(0x7FFF));
1624                         dst.z = RoundShort4(sw);
1625                 }
1626                 else
1627                 {
1628                         dst.z = Short4(0x0000);
1629                 }
1630         }
1631
1632         void PixelPipeline::TEXDP3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
1633         {
1634                 TEXM3X3PAD(u, v, s, src, 0, false);
1635
1636                 Short4 t0 = RoundShort4(u_ * Float4(0x1000));
1637
1638                 dst.x = t0;
1639                 dst.y = t0;
1640                 dst.z = t0;
1641                 dst.w = t0;
1642         }
1643
1644         void PixelPipeline::TEXDP3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
1645         {
1646                 TEXM3X3PAD(u, v, s, src0, 0, false);
1647
1648                 v_ = Float4(0.0f);
1649                 w_ = Float4(0.0f);
1650
1651                 dst = sampleTexture(stage, u_, v_, w_, w_);
1652         }
1653
1654         void PixelPipeline::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
1655         {
1656                 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
1657                         SignMask(CmpNLT(v, Float4(0.0f))) &
1658                         SignMask(CmpNLT(s, Float4(0.0f)));
1659
1660                 for(unsigned int q = 0; q < state.multiSample; q++)
1661                 {
1662                         cMask[q] &= kill;
1663                 }
1664         }
1665
1666         void PixelPipeline::TEXKILL(Int cMask[4], Vector4s &src)
1667         {
1668                 Short4 test = src.x | src.y | src.z;
1669                 Int kill = SignMask(PackSigned(test, test)) ^ 0x0000000F;
1670
1671                 for(unsigned int q = 0; q < state.multiSample; q++)
1672                 {
1673                         cMask[q] &= kill;
1674                 }
1675         }
1676
1677         void PixelPipeline::TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
1678         {
1679                 dst = sampleTexture(sampler, u, v, s, s, project);
1680         }
1681
1682         void PixelPipeline::TEXLD(Vector4s &dst, Vector4s &src, int sampler, bool project)
1683         {
1684                 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
1685                 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
1686                 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
1687
1688                 dst = sampleTexture(sampler, u, v, s, s, project);
1689         }
1690
1691         void PixelPipeline::TEXBEM(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1692         {
1693                 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1694                 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1695
1696                 Float4 du2 = du;
1697                 Float4 dv2 = dv;
1698
1699                 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1700                 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1701                 du += dv2;
1702                 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1703                 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1704                 dv += du2;
1705
1706                 Float4 u_ = u + du;
1707                 Float4 v_ = v + dv;
1708
1709                 dst = sampleTexture(stage, u_, v_, s, s);
1710         }
1711
1712         void PixelPipeline::TEXBEML(Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
1713         {
1714                 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
1715                 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
1716
1717                 Float4 du2 = du;
1718                 Float4 dv2 = dv;
1719
1720                 du *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][0]));
1721                 dv2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][0]));
1722                 du += dv2;
1723                 dv *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[1][1]));
1724                 du2 *= *Pointer<Float4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4F[0][1]));
1725                 dv += du2;
1726
1727                 Float4 u_ = u + du;
1728                 Float4 v_ = v + dv;
1729
1730                 dst = sampleTexture(stage, u_, v_, s, s);
1731
1732                 Short4 L;
1733
1734                 L = src.z;
1735                 L = MulHigh(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceScale4)));
1736                 L = L << 4;
1737                 L = AddSat(L, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].luminanceOffset4)));
1738                 L = Max(L, Short4(0x0000));
1739                 L = Min(L, Short4(0x1000));
1740
1741                 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
1742                 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
1743                 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
1744         }
1745
1746         void PixelPipeline::TEXREG2AR(Vector4s &dst, Vector4s &src0, int stage)
1747         {
1748                 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
1749                 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1750                 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1751
1752                 dst = sampleTexture(stage, u, v, s, s);
1753         }
1754
1755         void PixelPipeline::TEXREG2GB(Vector4s &dst, Vector4s &src0, int stage)
1756         {
1757                 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1758                 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1759                 Float4 s = v;
1760
1761                 dst = sampleTexture(stage, u, v, s, s);
1762         }
1763
1764         void PixelPipeline::TEXREG2RGB(Vector4s &dst, Vector4s &src0, int stage)
1765         {
1766                 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
1767                 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
1768                 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
1769
1770                 dst = sampleTexture(stage, u, v, s, s);
1771         }
1772
1773         void PixelPipeline::TEXM3X2DEPTH(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
1774         {
1775                 TEXM3X2PAD(u, v, s, src, 1, signedScaling);
1776
1777                 // z / w
1778                 u_ *= Rcp_pp(v_);   // FIXME: Set result to 1.0 when division by zero
1779
1780                 oDepth = u_;
1781         }
1782
1783         void PixelPipeline::TEXM3X2PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1784         {
1785                 TEXM3X3PAD(u, v, s, src0, component, signedScaling);
1786         }
1787
1788         void PixelPipeline::TEXM3X2TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1789         {
1790                 TEXM3X2PAD(u, v, s, src0, 1, signedScaling);
1791
1792                 w_ = Float4(0.0f);
1793
1794                 dst = sampleTexture(stage, u_, v_, w_, w_);
1795         }
1796
1797         void PixelPipeline::TEXM3X3(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
1798         {
1799                 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1800
1801                 dst.x = RoundShort4(u_ * Float4(0x1000));
1802                 dst.y = RoundShort4(v_ * Float4(0x1000));
1803                 dst.z = RoundShort4(w_ * Float4(0x1000));
1804                 dst.w = Short4(0x1000);
1805         }
1806
1807         void PixelPipeline::TEXM3X3PAD(Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
1808         {
1809                 if(component == 0 || previousScaling != signedScaling)   // FIXME: Other source modifiers?
1810                 {
1811                         U = Float4(src0.x);
1812                         V = Float4(src0.y);
1813                         W = Float4(src0.z);
1814
1815                         previousScaling = signedScaling;
1816                 }
1817
1818                 Float4 x = U * u + V * v + W * s;
1819
1820                 x *= Float4(1.0f / 0x1000);
1821
1822                 switch(component)
1823                 {
1824                 case 0: u_ = x; break;
1825                 case 1: v_ = x; break;
1826                 case 2: w_ = x; break;
1827                 default: ASSERT(false);
1828                 }
1829         }
1830
1831         void PixelPipeline::TEXM3X3SPEC(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
1832         {
1833                 TEXM3X3PAD(u, v, s, src0, 2, false);
1834
1835                 Float4 E[3];   // Eye vector
1836
1837                 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
1838                 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
1839                 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
1840
1841                 // Reflection
1842                 Float4 u__;
1843                 Float4 v__;
1844                 Float4 w__;
1845
1846                 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1847                 u__ = u_ * E[0];
1848                 v__ = v_ * E[1];
1849                 w__ = w_ * E[2];
1850                 u__ += v__ + w__;
1851                 u__ += u__;
1852                 v__ = u__;
1853                 w__ = u__;
1854                 u__ *= u_;
1855                 v__ *= v_;
1856                 w__ *= w_;
1857                 u_ *= u_;
1858                 v_ *= v_;
1859                 w_ *= w_;
1860                 u_ += v_ + w_;
1861                 u__ -= E[0] * u_;
1862                 v__ -= E[1] * u_;
1863                 w__ -= E[2] * u_;
1864
1865                 dst = sampleTexture(stage, u__, v__, w__, w__);
1866         }
1867
1868         void PixelPipeline::TEXM3X3TEX(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
1869         {
1870                 TEXM3X3PAD(u, v, s, src0, 2, signedScaling);
1871
1872                 dst = sampleTexture(stage, u_, v_, w_, w_);
1873         }
1874
1875         void PixelPipeline::TEXM3X3VSPEC(Vector4s &dst, Float4 &x, Float4 &y, Float4 &z, int stage, Vector4s &src0)
1876         {
1877                 TEXM3X3PAD(x, y, z, src0, 2, false);
1878
1879                 Float4 E[3];   // Eye vector
1880
1881                 E[0] = v[2 + stage - 2].w;
1882                 E[1] = v[2 + stage - 1].w;
1883                 E[2] = v[2 + stage - 0].w;
1884
1885                 // Reflection
1886                 Float4 u__;
1887                 Float4 v__;
1888                 Float4 w__;
1889
1890                 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
1891                 u__ = u_ * E[0];
1892                 v__ = v_ * E[1];
1893                 w__ = w_ * E[2];
1894                 u__ += v__ + w__;
1895                 u__ += u__;
1896                 v__ = u__;
1897                 w__ = u__;
1898                 u__ *= u_;
1899                 v__ *= v_;
1900                 w__ *= w_;
1901                 u_ *= u_;
1902                 v_ *= v_;
1903                 w_ *= w_;
1904                 u_ += v_ + w_;
1905                 u__ -= E[0] * u_;
1906                 v__ -= E[1] * u_;
1907                 w__ -= E[2] * u_;
1908
1909                 dst = sampleTexture(stage, u__, v__, w__, w__);
1910         }
1911
1912         void PixelPipeline::TEXDEPTH()
1913         {
1914                 u_ = Float4(rs[5].x);
1915                 v_ = Float4(rs[5].y);
1916
1917                 // z / w
1918                 u_ *= Rcp_pp(v_);   // FIXME: Set result to 1.0 when division by zero
1919
1920                 oDepth = u_;
1921         }
1922
1923         void PixelPipeline::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1924         {
1925                 {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0; };
1926                 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0; };
1927                 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0; };
1928                 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0; };
1929         }
1930
1931         void PixelPipeline::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
1932         {
1933                 {Short4 t0 = CmpGT(Short4(0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0; };
1934                 {Short4 t0 = CmpGT(Short4(0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0; };
1935                 {Short4 t0 = CmpGT(Short4(0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0; };
1936                 {Short4 t0 = CmpGT(Short4(0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0; };
1937         }
1938
1939         void PixelPipeline::BEM(Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
1940         {
1941                 Short4 t0;
1942                 Short4 t1;
1943
1944                 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
1945                 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4;   // FIXME: Matrix components range? Overflow hazard.
1946                 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4;   // FIXME: Matrix components range? Overflow hazard.
1947                 t0 = AddSat(t0, t1);
1948                 t0 = AddSat(t0, src0.x);
1949                 dst.x = t0;
1950
1951                 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
1952                 t0 = MulHigh(src1.x, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4;   // FIXME: Matrix components range? Overflow hazard.
1953                 t1 = MulHigh(src1.y, *Pointer<Short4>(data + OFFSET(DrawData, textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4;   // FIXME: Matrix components range? Overflow hazard.
1954                 t0 = AddSat(t0, t1);
1955                 t0 = AddSat(t0, src0.y);
1956                 dst.y = t0;
1957         }
1958 }
1959