Pointer<Byte> tagCache = cache + OFFSET(VertexCache,tag);
UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount));
+ UInt primitiveNumber = *Pointer<UInt>(task + OFFSET(VertexTask, primitiveStart));
+ UInt indexInPrimitive = 0;
constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
*Pointer<UInt>(tagCache + tagIndex) = indexQ;
readInput(indexQ);
- pipeline();
+ pipeline(indexQ);
postTransform();
computeClipFlags();
Pointer<Byte> cacheLine = vertexCache + cacheIndex * UInt((int)sizeof(Vertex));
writeVertex(vertex, cacheLine);
+ if(state.transformFeedbackEnabled != 0)
+ {
+ transformFeedback(vertex, primitiveNumber, indexInPrimitive);
+
+ indexInPrimitive++;
+ If(indexInPrimitive == 3)
+ {
+ primitiveNumber++;
+ indexInPrimitive = 0;
+ }
+ }
+
vertex += sizeof(Vertex);
batch += sizeof(unsigned int);
vertexCount--;
void VertexRoutine::readInput(UInt &index)
{
- for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
+ for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
{
Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,input) + sizeof(void*) * i);
UInt stride = *Pointer<UInt>(data + OFFSET(DrawData,stride) + sizeof(unsigned int) * i);
Int4 maxX = CmpLT(o[pos].w, o[pos].x);
Int4 maxY = CmpLT(o[pos].w, o[pos].y);
Int4 maxZ = CmpLT(o[pos].w, o[pos].z);
-
Int4 minX = CmpNLE(-o[pos].w, o[pos].x);
Int4 minY = CmpNLE(-o[pos].w, o[pos].y);
- Int4 minZ = CmpNLE(Float4(0.0f), o[pos].z);
-
- Int flags;
-
- flags = SignMask(maxX);
- clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + flags * 4); // FIXME: Array indexing
- flags = SignMask(maxY);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + flags * 4);
- flags = SignMask(maxZ);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + flags * 4);
- flags = SignMask(minX);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + flags * 4);
- flags = SignMask(minY);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + flags * 4);
- flags = SignMask(minZ);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + flags * 4);
+ Int4 minZ = symmetricNormalizedDepth ? CmpNLE(-o[pos].w, o[pos].z) : CmpNLE(Float4(0.0f), o[pos].z);
+
+ clipFlags = *Pointer<Int>(constants + OFFSET(Constants,maxX) + SignMask(maxX) * 4); // FIXME: Array indexing
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxY) + SignMask(maxY) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,maxZ) + SignMask(maxZ) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minX) + SignMask(minX) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minY) + SignMask(minY) * 4);
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,minZ) + SignMask(minZ) * 4);
Int4 finiteX = CmpLE(Abs(o[pos].x), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteY = CmpLE(Abs(o[pos].y), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
Int4 finiteZ = CmpLE(Abs(o[pos].z), *Pointer<Float4>(constants + OFFSET(Constants,maxPos)));
- flags = SignMask(finiteX & finiteY & finiteZ);
- clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + flags * 4);
+ Int4 finiteXYZ = finiteX & finiteY & finiteZ;
+ clipFlags |= *Pointer<Int>(constants + OFFSET(Constants,fini) + SignMask(finiteXYZ) * 4);
if(state.preTransformed)
{
Pointer<Byte> source2 = source1 + (!textureSampling ? stride : 0);
Pointer<Byte> source3 = source2 + (!textureSampling ? stride : 0);
+ bool isNativeFloatAttrib = (stream.attribType == VertexShader::ATTRIBTYPE_FLOAT) || stream.normalized;
+
switch(stream.type)
{
case STREAMTYPE_FLOAT:
{
// Null stream, all default components
}
- else if(stream.count == 1)
- {
- v.x.x = *Pointer<Float>(source0);
- v.x.y = *Pointer<Float>(source1);
- v.x.z = *Pointer<Float>(source2);
- v.x.w = *Pointer<Float>(source3);
- }
else
{
- v.x = *Pointer<Float4>(source0);
- v.y = *Pointer<Float4>(source1);
- v.z = *Pointer<Float4>(source2);
- v.w = *Pointer<Float4>(source3);
+ if(stream.count == 1)
+ {
+ v.x.x = *Pointer<Float>(source0);
+ v.x.y = *Pointer<Float>(source1);
+ v.x.z = *Pointer<Float>(source2);
+ v.x.w = *Pointer<Float>(source3);
+ }
+ else
+ {
+ v.x = *Pointer<Float4>(source0);
+ v.y = *Pointer<Float4>(source1);
+ v.z = *Pointer<Float4>(source2);
+ v.w = *Pointer<Float4>(source3);
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
+
+ switch(stream.attribType)
+ {
+ case VertexShader::ATTRIBTYPE_INT:
+ if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
+ if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
+ if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
+ if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
+ break;
+ case VertexShader::ATTRIBTYPE_UINT:
+ if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
+ if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
+ if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
+ if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
+ break;
+ default:
+ break;
+ }
}
}
break;
case STREAMTYPE_BYTE:
+ if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float
{
v.x = Float4(*Pointer<Byte4>(source0));
v.y = Float4(*Pointer<Byte4>(source1));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte));
}
}
+ else // Stream: UByte, Shader attrib: Int / UInt
+ {
+ v.x = As<Float4>(Int4(*Pointer<Byte4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<Byte4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
break;
case STREAMTYPE_SBYTE:
+ if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float
{
v.x = Float4(*Pointer<SByte4>(source0));
v.y = Float4(*Pointer<SByte4>(source1));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte));
}
}
+ else // Stream: SByte, Shader attrib: Int / UInt
+ {
+ v.x = As<Float4>(Int4(*Pointer<SByte4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<SByte4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
break;
case STREAMTYPE_COLOR:
{
}
break;
case STREAMTYPE_SHORT:
+ if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
{
v.x = Float4(*Pointer<Short4>(source0));
v.y = Float4(*Pointer<Short4>(source1));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort));
}
}
+ else // Stream: Short, Shader attrib: Int/UInt, no type conversion
+ {
+ v.x = As<Float4>(Int4(*Pointer<Short4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<Short4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
break;
case STREAMTYPE_USHORT:
+ if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
{
v.x = Float4(*Pointer<UShort4>(source0));
v.y = Float4(*Pointer<UShort4>(source1));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort));
}
}
+ else // Stream: UShort, Shader attrib: Int/UInt, no type conversion
+ {
+ v.x = As<Float4>(Int4(*Pointer<UShort4>(source0)));
+ v.y = As<Float4>(Int4(*Pointer<UShort4>(source1)));
+ v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
+ v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
+
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ }
break;
case STREAMTYPE_INT:
+ if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float
{
- if(stream.normalized)
- {
- v.x = Float4(*Pointer<Int4>(source0));
- v.y = Float4(*Pointer<Int4>(source1));
- v.z = Float4(*Pointer<Int4>(source2));
- v.w = Float4(*Pointer<Int4>(source3));
+ v.x = Float4(*Pointer<Int4>(source0));
+ v.y = Float4(*Pointer<Int4>(source1));
+ v.z = Float4(*Pointer<Int4>(source2));
+ v.w = Float4(*Pointer<Int4>(source3));
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ if(stream.normalized)
+ {
if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
}
- else
- {
- v.x = As<Float4>(*Pointer<Int4>(source0));
- v.y = As<Float4>(*Pointer<Int4>(source1));
- v.z = As<Float4>(*Pointer<Int4>(source2));
- v.w = As<Float4>(*Pointer<Int4>(source3));
+ }
+ else // Stream: Int, Shader attrib: Int/UInt, no type conversion
+ {
+ v.x = *Pointer<Float4>(source0);
+ v.y = *Pointer<Float4>(source1);
+ v.z = *Pointer<Float4>(source2);
+ v.w = *Pointer<Float4>(source3);
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
}
break;
case STREAMTYPE_UINT:
+ if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float
{
- if(stream.normalized)
- {
- v.x = Float4(*Pointer<UInt4>(source0));
- v.y = Float4(*Pointer<UInt4>(source1));
- v.z = Float4(*Pointer<UInt4>(source2));
- v.w = Float4(*Pointer<UInt4>(source3));
+ v.x = Float4(*Pointer<UInt4>(source0));
+ v.y = Float4(*Pointer<UInt4>(source1));
+ v.z = Float4(*Pointer<UInt4>(source2));
+ v.w = Float4(*Pointer<UInt4>(source3));
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+ if(stream.normalized)
+ {
if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
}
- else
- {
- v.x = As<Float4>(*Pointer<UInt4>(source0));
- v.y = As<Float4>(*Pointer<UInt4>(source1));
- v.z = As<Float4>(*Pointer<UInt4>(source2));
- v.w = As<Float4>(*Pointer<UInt4>(source3));
+ }
+ else // Stream: UInt, Shader attrib: Int/UInt, no type conversion
+ {
+ v.x = *Pointer<Float4>(source0);
+ v.y = *Pointer<Float4>(source1);
+ v.z = *Pointer<Float4>(source2);
+ v.w = *Pointer<Float4>(source3);
- transpose4xN(v.x, v.y, v.z, v.w, stream.count);
- }
+ transpose4xN(v.x, v.y, v.z, v.w, stream.count);
}
break;
case STREAMTYPE_UDEC3:
if(stream.count < 1) v.x = Float4(0.0f);
if(stream.count < 2) v.y = Float4(0.0f);
if(stream.count < 3) v.z = Float4(0.0f);
- if(stream.count < 4) v.w = Float4(1.0f);
+ if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(0));
return v;
}
o[pos].x = o[pos].x + *Pointer<Float4>(data + OFFSET(DrawData,XXXX)) * o[pos].w;
o[pos].y = o[pos].y + *Pointer<Float4>(data + OFFSET(DrawData,YYYY)) * o[pos].w;
}
-
- if(symmetricNormalizedDepth && !state.fixedFunction)
- {
- o[pos].z = (o[pos].z + o[pos].w) * Float4(0.5f);
- }
}
void VertexRoutine::writeCache(Pointer<Byte> &cacheLine)
}
else
{
- if(state.output[i].write == 0x02)
+ if(state.output[i].write == 0x03)
{
transpose2x4(v.x, v.y, v.z, v.w);
}
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 2) = (clipFlags >> 16) & 0x0000000FF;
*Pointer<Int>(cacheLine + OFFSET(Vertex,clipFlags) + sizeof(Vertex) * 3) = (clipFlags >> 24) & 0x0000000FF;
+ // Viewport transform
int pos = state.positionRegister;
v.x = o[pos].x;
v.z = o[pos].z;
v.w = o[pos].w;
+ if(symmetricNormalizedDepth)
+ {
+ v.z = (v.z + v.w) * Float4(0.5f); // [-1, 1] -> [0, 1]
+ }
+
Float4 w = As<Float4>(As<Int4>(v.w) | (As<Int4>(CmpEQ(v.w, Float4(0.0f))) & As<Int4>(Float4(1.0f))));
Float4 rhw = Float4(1.0f) / w;
*Pointer<Int4>(vertex + OFFSET(Vertex,X)) = *Pointer<Int4>(cache + OFFSET(Vertex,X));
*Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cache + OFFSET(Vertex,clipFlags));
}
+
+ void VertexRoutine::transformFeedback(const Pointer<Byte> &vertex, const UInt &primitiveNumber, const UInt &indexInPrimitive)
+ {
+ If(indexInPrimitive < state.verticesPerPrimitive)
+ {
+ UInt tOffset = primitiveNumber * state.verticesPerPrimitive + indexInPrimitive;
+
+ for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
+ {
+ if(state.transformFeedbackEnabled & (1ULL << i))
+ {
+ UInt reg = *Pointer<UInt>(data + OFFSET(DrawData, vs.reg[i]));
+ UInt row = *Pointer<UInt>(data + OFFSET(DrawData, vs.row[i]));
+ UInt col = *Pointer<UInt>(data + OFFSET(DrawData, vs.col[i]));
+ UInt str = *Pointer<UInt>(data + OFFSET(DrawData, vs.str[i]));
+
+ Pointer<Byte> t = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, vs.t[i])) + (tOffset * str * sizeof(float));
+ Pointer<Byte> v = vertex + OFFSET(Vertex, v) + reg * sizeof(float);
+
+ For(UInt r = 0, r < row, r++)
+ {
+ UInt rOffsetX = r * col * sizeof(float);
+ UInt rOffset4 = r * sizeof(float4);
+
+ For(UInt c = 0, c < col, c++)
+ {
+ UInt cOffset = c * sizeof(float);
+ *Pointer<Float>(t + rOffsetX + cOffset) = *Pointer<Float>(v + rOffset4 + cOffset);
+ }
+ }
+ }
+ }
+ }
+ }
}