1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "VertexRoutine.hpp"
17 #include "VertexShader.hpp"
20 #include "Renderer.hpp"
21 #include "Constants.hpp"
26 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
27 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
29 VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader)
30 : v(shader && shader->dynamicallyIndexedInput),
31 o(shader && shader->dynamicallyIndexedOutput),
36 VertexRoutine::~VertexRoutine()
40 void VertexRoutine::generate()
42 const bool textureSampling = state.textureSampling;
44 Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
45 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
46 Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
48 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
50 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
54 UInt index = *Pointer<UInt>(batch);
55 UInt tagIndex = index & 0x0000003C;
56 UInt indexQ = !textureSampling ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance.
58 If(*Pointer<UInt>(tagCache + tagIndex) != indexQ)
60 *Pointer<UInt>(tagCache + tagIndex) = indexQ;
67 Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex));
68 writeCache(cacheLine0);
71 UInt cacheIndex = index & 0x0000003F;
72 Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
73 writeVertex(vertex, cacheLine);
75 vertex += sizeof(Vertex);
76 batch += sizeof(unsigned int);
79 Until(vertexCount == 0)
84 void VertexRoutine::readInput(UInt &index)
86 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
88 Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,input) + sizeof(void*) * i);
89 UInt stride = *Pointer<UInt>(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i);
91 v[i] = readStream(input, stride, state.input[i], index);
95 void VertexRoutine::computeClipFlags()
97 int pos = state.positionRegister;
99 Int4 maxX = CmpLT(o[pos].w, o[pos].x);
100 Int4 maxY = CmpLT(o[pos].w, o[pos].y);
101 Int4 maxZ = CmpLT(o[pos].w, o[pos].z);
103 Int4 minX = CmpNLE(-o[pos].w, o[pos].x);
104 Int4 minY = CmpNLE(-o[pos].w, o[pos].y);
105 Int4 minZ = CmpNLE(Float4(0.0f), o[pos].z);
109 flags = SignMask(maxX);
110 clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing
111 flags = SignMask(maxY);
112 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + flags * 4);
113 flags = SignMask(maxZ);
114 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + flags * 4);
115 flags = SignMask(minX);
116 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + flags * 4);
117 flags = SignMask(minY);
118 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + flags * 4);
119 flags = SignMask(minZ);
120 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + flags * 4);
122 Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
123 Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
124 Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
126 flags = SignMask(finiteX & finiteY & finiteZ);
127 clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + flags * 4);
129 if(state.preTransformed)
131 clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane
135 Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
137 const bool textureSampling = state.textureSampling;
141 Pointer<Byte> source0 = buffer + index * stride;
142 Pointer<Byte> source1 = source0 + (!textureSampling ? stride : 0);
143 Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0);
144 Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0);
148 case STREAMTYPE_FLOAT:
150 if(stream.count == 0)
152 // Null stream, all default components
154 else if(stream.count == 1)
156 v.x.x = *Pointer<Float>(source0);
157 v.x.y = *Pointer<Float>(source1);
158 v.x.z = *Pointer<Float>(source2);
159 v.x.w = *Pointer<Float>(source3);
163 v.x = *Pointer<Float4>(source0);
164 v.y = *Pointer<Float4>(source1);
165 v.z = *Pointer<Float4>(source2);
166 v.w = *Pointer<Float4>(source3);
168 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
172 case STREAMTYPE_BYTE:
174 v.x = Float4(*Pointer<Byte4>(source0));
175 v.y = Float4(*Pointer<Byte4>(source1));
176 v.z = Float4(*Pointer<Byte4>(source2));
177 v.w = Float4(*Pointer<Byte4>(source3));
179 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
181 if(stream.normalized)
183 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
184 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
185 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
186 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
190 case STREAMTYPE_SBYTE:
192 v.x = Float4(*Pointer<SByte4>(source0));
193 v.y = Float4(*Pointer<SByte4>(source1));
194 v.z = Float4(*Pointer<SByte4>(source2));
195 v.w = Float4(*Pointer<SByte4>(source3));
197 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
199 if(stream.normalized)
201 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
202 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
203 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
204 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
208 case STREAMTYPE_COLOR:
210 v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
211 v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
212 v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
213 v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
215 transpose4x4(v.x, v.y, v.z, v.w);
223 case STREAMTYPE_SHORT:
225 v.x = Float4(*Pointer<Short4>(source0));
226 v.y = Float4(*Pointer<Short4>(source1));
227 v.z = Float4(*Pointer<Short4>(source2));
228 v.w = Float4(*Pointer<Short4>(source3));
230 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
232 if(stream.normalized)
234 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
235 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
236 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
237 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
241 case STREAMTYPE_USHORT:
243 v.x = Float4(*Pointer<UShort4>(source0));
244 v.y = Float4(*Pointer<UShort4>(source1));
245 v.z = Float4(*Pointer<UShort4>(source2));
246 v.w = Float4(*Pointer<UShort4>(source3));
248 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
250 if(stream.normalized)
252 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
253 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
254 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
255 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
261 if(stream.normalized)
263 v.x = Float4(*Pointer<Int4>(source0));
264 v.y = Float4(*Pointer<Int4>(source1));
265 v.z = Float4(*Pointer<Int4>(source2));
266 v.w = Float4(*Pointer<Int4>(source3));
268 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
270 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
271 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
272 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
273 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
277 v.x = As<Float4>(*Pointer<Int4>(source0));
278 v.y = As<Float4>(*Pointer<Int4>(source1));
279 v.z = As<Float4>(*Pointer<Int4>(source2));
280 v.w = As<Float4>(*Pointer<Int4>(source3));
282 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
286 case STREAMTYPE_UINT:
288 if(stream.normalized)
290 v.x = Float4(*Pointer<UInt4>(source0));
291 v.y = Float4(*Pointer<UInt4>(source1));
292 v.z = Float4(*Pointer<UInt4>(source2));
293 v.w = Float4(*Pointer<UInt4>(source3));
295 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
297 if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
298 if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
299 if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
300 if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
304 v.x = As<Float4>(*Pointer<UInt4>(source0));
305 v.y = As<Float4>(*Pointer<UInt4>(source1));
306 v.z = As<Float4>(*Pointer<UInt4>(source2));
307 v.w = As<Float4>(*Pointer<UInt4>(source3));
309 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
313 case STREAMTYPE_UDEC3:
319 x = y = z = *Pointer<Int>(source0);
321 v.x.x = Float(x & 0x000003FF);
322 v.x.y = Float(y & 0x000FFC00);
323 v.x.z = Float(z & 0x3FF00000);
329 x = y = z = *Pointer<Int>(source1);
331 v.y.x = Float(x & 0x000003FF);
332 v.y.y = Float(y & 0x000FFC00);
333 v.y.z = Float(z & 0x3FF00000);
339 x = y = z = *Pointer<Int>(source2);
341 v.z.x = Float(x & 0x000003FF);
342 v.z.y = Float(y & 0x000FFC00);
343 v.z.z = Float(z & 0x3FF00000);
349 x = y = z = *Pointer<Int>(source3);
351 v.w.x = Float(x & 0x000003FF);
352 v.w.y = Float(y & 0x000FFC00);
353 v.w.z = Float(z & 0x3FF00000);
356 transpose4x3(v.x, v.y, v.z, v.w);
358 v.y *= Float4(1.0f / 0x00000400);
359 v.z *= Float4(1.0f / 0x00100000);
362 case STREAMTYPE_DEC3N:
368 x = y = z = *Pointer<Int>(source0);
370 v.x.x = Float((x << 22) & 0xFFC00000);
371 v.x.y = Float((y << 12) & 0xFFC00000);
372 v.x.z = Float((z << 2) & 0xFFC00000);
378 x = y = z = *Pointer<Int>(source1);
380 v.y.x = Float((x << 22) & 0xFFC00000);
381 v.y.y = Float((y << 12) & 0xFFC00000);
382 v.y.z = Float((z << 2) & 0xFFC00000);
388 x = y = z = *Pointer<Int>(source2);
390 v.z.x = Float((x << 22) & 0xFFC00000);
391 v.z.y = Float((y << 12) & 0xFFC00000);
392 v.z.z = Float((z << 2) & 0xFFC00000);
398 x = y = z = *Pointer<Int>(source3);
400 v.w.x = Float((x << 22) & 0xFFC00000);
401 v.w.y = Float((y << 12) & 0xFFC00000);
402 v.w.z = Float((z << 2) & 0xFFC00000);
405 transpose4x3(v.x, v.y, v.z, v.w);
407 v.x *= Float4(1.0f / 0x00400000 / 511.0f);
408 v.y *= Float4(1.0f / 0x00400000 / 511.0f);
409 v.z *= Float4(1.0f / 0x00400000 / 511.0f);
412 case STREAMTYPE_FIXED:
414 v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
415 v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
416 v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
417 v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleFixed));
419 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
422 case STREAMTYPE_HALF:
424 if(stream.count >= 1)
426 UShort x0 = *Pointer<UShort>(source0 + 0);
427 UShort x1 = *Pointer<UShort>(source1 + 0);
428 UShort x2 = *Pointer<UShort>(source2 + 0);
429 UShort x3 = *Pointer<UShort>(source3 + 0);
431 v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4);
432 v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4);
433 v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4);
434 v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4);
437 if(stream.count >= 2)
439 UShort y0 = *Pointer<UShort>(source0 + 2);
440 UShort y1 = *Pointer<UShort>(source1 + 2);
441 UShort y2 = *Pointer<UShort>(source2 + 2);
442 UShort y3 = *Pointer<UShort>(source3 + 2);
444 v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4);
445 v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4);
446 v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4);
447 v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4);
450 if(stream.count >= 3)
452 UShort z0 = *Pointer<UShort>(source0 + 4);
453 UShort z1 = *Pointer<UShort>(source1 + 4);
454 UShort z2 = *Pointer<UShort>(source2 + 4);
455 UShort z3 = *Pointer<UShort>(source3 + 4);
457 v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4);
458 v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4);
459 v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4);
460 v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4);
463 if(stream.count >= 4)
465 UShort w0 = *Pointer<UShort>(source0 + 6);
466 UShort w1 = *Pointer<UShort>(source1 + 6);
467 UShort w2 = *Pointer<UShort>(source2 + 6);
468 UShort w3 = *Pointer<UShort>(source3 + 6);
470 v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4);
471 v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4);
472 v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4);
473 v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4);
477 case STREAMTYPE_INDICES:
479 v.x.x = *Pointer<Float>(source0);
480 v.x.y = *Pointer<Float>(source1);
481 v.x.z = *Pointer<Float>(source2);
482 v.x.w = *Pointer<Float>(source3);
485 case STREAMTYPE_2_10_10_10_INT:
488 src = Insert(src, *Pointer<Int>(source0), 0);
489 src = Insert(src, *Pointer<Int>(source1), 1);
490 src = Insert(src, *Pointer<Int>(source2), 2);
491 src = Insert(src, *Pointer<Int>(source3), 3);
493 v.x = Float4((src << 22) >> 22);
494 v.y = Float4((src << 12) >> 22);
495 v.z = Float4((src << 02) >> 22);
496 v.w = Float4(src >> 30);
498 if(stream.normalized)
500 v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f));
501 v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f));
502 v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f));
503 v.w = Max(v.w, Float4(-1.0f));
507 case STREAMTYPE_2_10_10_10_UINT:
510 src = Insert(src, *Pointer<Int>(source0), 0);
511 src = Insert(src, *Pointer<Int>(source1), 1);
512 src = Insert(src, *Pointer<Int>(source2), 2);
513 src = Insert(src, *Pointer<Int>(source3), 3);
515 v.x = Float4(src & Int4(0x3FF));
516 v.y = Float4((src >> 10) & Int4(0x3FF));
517 v.z = Float4((src >> 20) & Int4(0x3FF));
518 v.w = Float4((src >> 30) & Int4(0x3));
520 if(stream.normalized)
522 v.x *= Float4(1.0f / 0x3FF);
523 v.y *= Float4(1.0f / 0x3FF);
524 v.z *= Float4(1.0f / 0x3FF);
525 v.w *= Float4(1.0f / 0x3);
533 if(stream.count < 1) v.x = Float4(0.0f);
534 if(stream.count < 2) v.y = Float4(0.0f);
535 if(stream.count < 3) v.z = Float4(0.0f);
536 if(stream.count < 4) v.w = Float4(1.0f);
541 void VertexRoutine::postTransform()
543 int pos = state.positionRegister;
546 if(state.preTransformed)
548 Float4 rhw = Float4(1.0f) / o[pos].w;
550 Float4 W = *Pointer<Float4>(data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f);
551 Float4 H = *Pointer<Float4>(data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f);
552 Float4 L = *Pointer<Float4>(data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f);
553 Float4 T = *Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f);
555 o[pos].x = (o[pos].x - L) / W * rhw;
556 o[pos].y = (o[pos].y - T) / H * rhw;
557 o[pos].z = o[pos].z * rhw;
561 if(!halfIntegerCoordinates && !state.preTransformed)
563 o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelX)) * o[pos].w;
564 o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,halfPixelY)) * o[pos].w;
567 if(state.superSampling)
569 o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w;
570 o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w;
573 if(symmetricNormalizedDepth && !state.fixedFunction)
575 o[pos].z = (o[pos].z + o[pos].w) * Float4(0.5f);
579 void VertexRoutine::writeCache(Pointer<Byte> &cacheLine)
583 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
585 if(state.output[i].write)
592 if(state.output[i].xClamp)
594 v.x = Max(v.x, Float4(0.0f));
595 v.x = Min(v.x, Float4(1.0f));
598 if(state.output[i].yClamp)
600 v.y = Max(v.y, Float4(0.0f));
601 v.y = Min(v.y, Float4(1.0f));
604 if(state.output[i].zClamp)
606 v.z = Max(v.z, Float4(0.0f));
607 v.z = Min(v.z, Float4(1.0f));
610 if(state.output[i].wClamp)
612 v.w = Max(v.w, Float4(0.0f));
613 v.w = Min(v.w, Float4(1.0f));
616 if(state.output[i].write == 0x01)
618 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x;
619 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y;
620 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z;
621 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w;
625 if(state.output[i].write == 0x02)
627 transpose2x4(v.x, v.y, v.z, v.w);
631 transpose4x4(v.x, v.y, v.z, v.w);
634 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x;
635 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y;
636 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z;
637 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w;
642 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (clipFlags >> 0) & 0x0000000FF;
643 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (clipFlags >> 8) & 0x0000000FF;
644 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF;
645 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
647 int pos = state.positionRegister;
654 Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
655 Float4 rhw = Float4(1.0f) / w;
657 v.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Wx16))));
658 v.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,Hx16))));
662 transpose4x4(v.x, v.y, v.z, v.w);
664 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x;
665 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y;
666 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z;
667 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w;
670 void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cache)
672 for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
674 if(state.output[i].write)
676 *Pointer<Int4>(vertex + OFFSET(Vertex,v[i]), 16) = *Pointer<Int4>(cache + OFFSET(Vertex,v[i]), 16);
680 *Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X));
681 *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));