1 // SwiftShader Software Renderer
3 // Copyright(c) 2005-2012 TransGaming Inc.
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
12 #include "VertexRoutine.hpp"
14 #include "VertexShader.hpp"
17 #include "Renderer.hpp"
18 #include "Constants.hpp"
23 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
24 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
26 VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader) : state(state), shader(shader)
31 VertexRoutine::~VertexRoutine()
35 void VertexRoutine::generate()
37 Function<Void, Pointer<Byte>, Pointer<Byte>, Pointer<Byte>, Pointer<Byte> > function;
39 Pointer<Byte> vertex(function.arg(0));
40 Pointer<Byte> batch(function.arg(1));
41 Pointer<Byte> task(function.arg(2));
42 Pointer<Byte> data(function.arg(3));
44 const bool texldl = state.shaderContainsTexldl;
46 Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache);
47 Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex);
48 Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
50 UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
54 r.constants = *Pointer<Pointer<Byte> >(data + OFFSET(DrawData,constants));
58 UInt index = *Pointer<UInt>(batch);
59 UInt tagIndex = index & 0x0000003C;
60 UInt indexQ = !texldl ? UInt(index & 0xFFFFFFFC) : index; // FIXME: TEXLDL hack to have independent LODs, hurts performance.
62 If(*Pointer<UInt>(tagCache + tagIndex) != indexQ)
64 *Pointer<UInt>(tagCache + tagIndex) = indexQ;
71 Pointer<Byte> cacheLine0 = vertexCache + tagIndex * UInt((int)sizeof(Vertex));
72 writeCache(cacheLine0, r);
75 UInt cacheIndex = index & 0x0000003F;
76 Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
77 writeVertex(vertex, cacheLine);
79 vertex += sizeof(Vertex);
80 batch += sizeof(unsigned int);
83 Until(vertexCount == 0)
88 routine = function(L"VertexRoutine_%0.8X", state.shaderID);
91 Routine *VertexRoutine::getRoutine()
96 void VertexRoutine::readInput(Registers &r, UInt &index)
98 for(int i = 0; i < 16; i++)
100 Pointer<Byte> input = *Pointer<Pointer<Byte> >(r.data + OFFSET(DrawData,input) + sizeof(void*) * i);
101 UInt stride = *Pointer<UInt>(r.data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i);
103 r.v[i] = readStream(r, input, stride, state.input[i], index);
107 void VertexRoutine::computeClipFlags(Registers &r)
109 int pos = state.positionRegister;
112 if(state.preTransformed)
114 Float4 rhw = Float4(1.0f) / r.o[pos].w;
116 Float4 W = *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16)) * Float4(1.0f / 16.0f);
117 Float4 H = *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16)) * Float4(1.0f / 16.0f);
118 Float4 L = *Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) * Float4(1.0f / 16.0f);
119 Float4 T = *Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) * Float4(1.0f / 16.0f);
121 r.o[pos].x = (r.o[pos].x - L) / W * rhw;
122 r.o[pos].y = (r.o[pos].y - T) / H * rhw;
123 r.o[pos].z = r.o[pos].z * rhw;
127 if(state.superSampling)
129 r.o[pos].x = r.o[pos].x + *Pointer<Float4>(r.data + OFFSET(DrawData,XXXX)) * r.o[pos].w;
130 r.o[pos].y = r.o[pos].y + *Pointer<Float4>(r.data + OFFSET(DrawData,YYYY)) * r.o[pos].w;
133 Float4 clipX = r.o[pos].x;
134 Float4 clipY = r.o[pos].y;
136 if(state.multiSampling) // Clip at pixel edges instead of pixel centers
138 clipX += *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w;
139 clipY += *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w;
142 Int4 maxX = CmpLT(r.o[pos].w, clipX);
143 Int4 maxY = CmpLT(r.o[pos].w, clipY);
144 Int4 maxZ = CmpLT(r.o[pos].w, r.o[pos].z);
146 Int4 minX = CmpNLE(-r.o[pos].w, clipX);
147 Int4 minY = CmpNLE(-r.o[pos].w, clipY);
148 Int4 minZ = CmpNLE(Float4(0.0f), r.o[pos].z);
152 flags = SignMask(maxX);
153 r.clipFlags = *Pointer<Int>(r.constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing
154 flags = SignMask(maxY);
155 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxY) + flags * 4);
156 flags = SignMask(maxZ);
157 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,maxZ) + flags * 4);
158 flags = SignMask(minX);
159 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minX) + flags * 4);
160 flags = SignMask(minY);
161 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minY) + flags * 4);
162 flags = SignMask(minZ);
163 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,minZ) + flags * 4);
165 Int4 finiteX = CmpLE(Abs(r.o[pos].x), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
166 Int4 finiteY = CmpLE(Abs(r.o[pos].y), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
167 Int4 finiteZ = CmpLE(Abs(r.o[pos].z), *Pointer<Float4>(r.constants + OFFSET(Constants,maxPos)));
169 flags = SignMask(finiteX & finiteY & finiteZ);
170 r.clipFlags |= *Pointer<Int>(r.constants + OFFSET(Constants,fini) + flags * 4);
172 if(state.preTransformed)
174 r.clipFlags &= 0xFBFBFBFB; // Don't clip against far clip plane
178 Vector4f VertexRoutine::readStream(Registers &r, Pointer<Byte> &buffer, UInt &stride, const Stream &stream, const UInt &index)
180 const bool texldl = state.shaderContainsTexldl;
184 Pointer<Byte> source0 = buffer + index * stride;
185 Pointer<Byte> source1 = source0 + (!texldl ? stride : 0);
186 Pointer<Byte> source2 = source1 + (!texldl ? stride : 0);
187 Pointer<Byte> source3 = source2 + (!texldl ? stride : 0);
191 case STREAMTYPE_FLOAT:
193 if(stream.count == 0)
195 // Null stream, all default components
197 else if(stream.count == 1)
199 v.x.x = *Pointer<Float>(source0);
200 v.x.y = *Pointer<Float>(source1);
201 v.x.z = *Pointer<Float>(source2);
202 v.x.w = *Pointer<Float>(source3);
206 v.x = *Pointer<Float4>(source0);
207 v.y = *Pointer<Float4>(source1);
208 v.z = *Pointer<Float4>(source2);
209 v.w = *Pointer<Float4>(source3);
211 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
215 case STREAMTYPE_BYTE:
217 v.x = Float4(*Pointer<Byte4>(source0));
218 v.y = Float4(*Pointer<Byte4>(source1));
219 v.z = Float4(*Pointer<Byte4>(source2));
220 v.w = Float4(*Pointer<Byte4>(source3));
222 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
224 if(stream.normalized)
226 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
227 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
228 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
229 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
233 case STREAMTYPE_SBYTE:
235 v.x = Float4(*Pointer<SByte4>(source0));
236 v.y = Float4(*Pointer<SByte4>(source1));
237 v.z = Float4(*Pointer<SByte4>(source2));
238 v.w = Float4(*Pointer<SByte4>(source3));
240 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
242 if(stream.normalized)
244 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
245 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
246 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
247 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleSByte));
251 case STREAMTYPE_COLOR:
253 v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
254 v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
255 v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
256 v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleByte));
258 transpose4x4(v.x, v.y, v.z, v.w);
266 case STREAMTYPE_SHORT:
268 v.x = Float4(*Pointer<Short4>(source0));
269 v.y = Float4(*Pointer<Short4>(source1));
270 v.z = Float4(*Pointer<Short4>(source2));
271 v.w = Float4(*Pointer<Short4>(source3));
273 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
275 if(stream.normalized)
277 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
278 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
279 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
280 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleShort));
284 case STREAMTYPE_USHORT:
286 v.x = Float4(*Pointer<UShort4>(source0));
287 v.y = Float4(*Pointer<UShort4>(source1));
288 v.z = Float4(*Pointer<UShort4>(source2));
289 v.w = Float4(*Pointer<UShort4>(source3));
291 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
293 if(stream.normalized)
295 if(stream.count >= 1) v.x *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
296 if(stream.count >= 2) v.y *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
297 if(stream.count >= 3) v.z *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
298 if(stream.count >= 4) v.w *= *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleUShort));
302 case STREAMTYPE_UDEC3:
308 x = y = z = *Pointer<Int>(source0);
310 v.x.x = Float(x & 0x000003FF);
311 v.x.y = Float(y & 0x000FFC00);
312 v.x.z = Float(z & 0x3FF00000);
318 x = y = z = *Pointer<Int>(source1);
320 v.y.x = Float(x & 0x000003FF);
321 v.y.y = Float(y & 0x000FFC00);
322 v.y.z = Float(z & 0x3FF00000);
328 x = y = z = *Pointer<Int>(source2);
330 v.z.x = Float(x & 0x000003FF);
331 v.z.y = Float(y & 0x000FFC00);
332 v.z.z = Float(z & 0x3FF00000);
338 x = y = z = *Pointer<Int>(source3);
340 v.w.x = Float(x & 0x000003FF);
341 v.w.y = Float(y & 0x000FFC00);
342 v.w.z = Float(z & 0x3FF00000);
345 transpose4x3(v.x, v.y, v.z, v.w);
347 v.y *= Float4(1.0f / 0x00000400);
348 v.z *= Float4(1.0f / 0x00100000);
351 case STREAMTYPE_DEC3N:
357 x = y = z = *Pointer<Int>(source0);
359 v.x.x = Float((x << 22) & 0xFFC00000);
360 v.x.y = Float((y << 12) & 0xFFC00000);
361 v.x.z = Float((z << 2) & 0xFFC00000);
367 x = y = z = *Pointer<Int>(source1);
369 v.y.x = Float((x << 22) & 0xFFC00000);
370 v.y.y = Float((y << 12) & 0xFFC00000);
371 v.y.z = Float((z << 2) & 0xFFC00000);
377 x = y = z = *Pointer<Int>(source2);
379 v.z.x = Float((x << 22) & 0xFFC00000);
380 v.z.y = Float((y << 12) & 0xFFC00000);
381 v.z.z = Float((z << 2) & 0xFFC00000);
387 x = y = z = *Pointer<Int>(source3);
389 v.w.x = Float((x << 22) & 0xFFC00000);
390 v.w.y = Float((y << 12) & 0xFFC00000);
391 v.w.z = Float((z << 2) & 0xFFC00000);
394 transpose4x3(v.x, v.y, v.z, v.w);
396 v.x *= Float4(1.0f / 0x00400000 / 511.0f);
397 v.y *= Float4(1.0f / 0x00400000 / 511.0f);
398 v.z *= Float4(1.0f / 0x00400000 / 511.0f);
401 case STREAMTYPE_FIXED:
403 v.x = Float4(*Pointer<Int4>(source0)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
404 v.y = Float4(*Pointer<Int4>(source1)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
405 v.z = Float4(*Pointer<Int4>(source2)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
406 v.w = Float4(*Pointer<Int4>(source3)) * *Pointer<Float4>(r.constants + OFFSET(Constants,unscaleFixed));
408 transpose4xN(v.x, v.y, v.z, v.w, stream.count);
411 case STREAMTYPE_HALF:
413 if(stream.count >= 1)
415 UShort x0 = *Pointer<UShort>(source0 + 0);
416 UShort x1 = *Pointer<UShort>(source1 + 0);
417 UShort x2 = *Pointer<UShort>(source2 + 0);
418 UShort x3 = *Pointer<UShort>(source3 + 0);
420 v.x.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x0) * 4);
421 v.x.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x1) * 4);
422 v.x.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x2) * 4);
423 v.x.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(x3) * 4);
426 if(stream.count >= 2)
428 UShort y0 = *Pointer<UShort>(source0 + 2);
429 UShort y1 = *Pointer<UShort>(source1 + 2);
430 UShort y2 = *Pointer<UShort>(source2 + 2);
431 UShort y3 = *Pointer<UShort>(source3 + 2);
433 v.y.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y0) * 4);
434 v.y.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y1) * 4);
435 v.y.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y2) * 4);
436 v.y.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(y3) * 4);
439 if(stream.count >= 3)
441 UShort z0 = *Pointer<UShort>(source0 + 4);
442 UShort z1 = *Pointer<UShort>(source1 + 4);
443 UShort z2 = *Pointer<UShort>(source2 + 4);
444 UShort z3 = *Pointer<UShort>(source3 + 4);
446 v.z.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z0) * 4);
447 v.z.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z1) * 4);
448 v.z.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z2) * 4);
449 v.z.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(z3) * 4);
452 if(stream.count >= 4)
454 UShort w0 = *Pointer<UShort>(source0 + 6);
455 UShort w1 = *Pointer<UShort>(source1 + 6);
456 UShort w2 = *Pointer<UShort>(source2 + 6);
457 UShort w3 = *Pointer<UShort>(source3 + 6);
459 v.w.x = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w0) * 4);
460 v.w.y = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w1) * 4);
461 v.w.z = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w2) * 4);
462 v.w.w = *Pointer<Float>(r.constants + OFFSET(Constants,half2float) + Int(w3) * 4);
466 case STREAMTYPE_INDICES:
468 v.x.x = *Pointer<Float>(source0);
469 v.x.y = *Pointer<Float>(source1);
470 v.x.z = *Pointer<Float>(source2);
471 v.x.w = *Pointer<Float>(source3);
478 if(stream.count < 1) v.x = Float4(0.0f);
479 if(stream.count < 2) v.y = Float4(0.0f);
480 if(stream.count < 3) v.z = Float4(0.0f);
481 if(stream.count < 4) v.w = Float4(1.0f);
486 void VertexRoutine::postTransform(Registers &r)
488 int pos = state.positionRegister;
490 if(halfIntegerCoordinates)
492 r.o[pos].x = r.o[pos].x - *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelX)) * r.o[pos].w;
493 r.o[pos].y = r.o[pos].y - *Pointer<Float4>(r.data + OFFSET(DrawData,halfPixelY)) * r.o[pos].w;
496 if(symmetricNormalizedDepth)
498 r.o[pos].z = (r.o[pos].z + r.o[pos].w) * Float4(0.5f);
502 void VertexRoutine::writeCache(Pointer<Byte> &cacheLine, Registers &r)
506 for(int i = 0; i < 12; i++)
508 if(state.output[i].write)
515 if(state.output[i].xClamp)
517 v.x = Max(v.x, Float4(0.0f));
518 v.x = Min(v.x, Float4(1.0f));
521 if(state.output[i].yClamp)
523 v.y = Max(v.y, Float4(0.0f));
524 v.y = Min(v.y, Float4(1.0f));
527 if(state.output[i].zClamp)
529 v.z = Max(v.z, Float4(0.0f));
530 v.z = Min(v.z, Float4(1.0f));
533 if(state.output[i].wClamp)
535 v.w = Max(v.w, Float4(0.0f));
536 v.w = Min(v.w, Float4(1.0f));
539 if(state.output[i].write == 0x01)
541 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0) = v.x.x;
542 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1) = v.x.y;
543 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2) = v.x.z;
544 *Pointer<Float>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3) = v.x.w;
548 if(state.output[i].write == 0x02)
550 transpose2x4(v.x, v.y, v.z, v.w);
554 transpose4x4(v.x, v.y, v.z, v.w);
557 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 0, 16) = v.x;
558 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 1, 16) = v.y;
559 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 2, 16) = v.z;
560 *Pointer<Float4>(cacheLine + OFFSET(Vertex,v[i]) + sizeof(Vertex) * 3, 16) = v.w;
565 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 0) = (r.clipFlags >> 0) & 0x0000000FF; // FIXME: unsigned char Vertex::clipFlags
566 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 1) = (r.clipFlags >> 8) & 0x0000000FF;
567 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (r.clipFlags >> 16) & 0x0000000FF;
568 *Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (r.clipFlags >> 24) & 0x0000000FF;
570 int pos = state.positionRegister;
577 Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
578 Float4 rhw = Float4(1.0f) / w;
580 v.x = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,X0x16)) + v.x * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Wx16))));
581 v.y = As<Float4>(RoundInt(*Pointer<Float4>(r.data + OFFSET(DrawData,Y0x16)) + v.y * rhw * *Pointer<Float4>(r.data + OFFSET(DrawData,Hx16))));
585 transpose4x4(v.x, v.y, v.z, v.w);
587 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 0, 16) = v.x;
588 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 1, 16) = v.y;
589 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 2, 16) = v.z;
590 *Pointer<Float4>(cacheLine + OFFSET(Vertex,X) + sizeof(Vertex) * 3, 16) = v.w;
593 void VertexRoutine::writeVertex(Pointer<Byte> &vertex, Pointer<Byte> &cache)
595 for(int i = 0; i < 12; i++)
597 if(state.output[i].write)
599 *Pointer<Float4>(vertex + OFFSET(Vertex,v[i])) = *Pointer<Float4>(cache + OFFSET(Vertex,v[i]));
603 *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));
604 *Pointer<Float4>(vertex + OFFSET(Vertex,X)) = *Pointer<Float4>(cache + OFFSET(Vertex,X));