OSDN Git Service

Make Blitter part of Renderer.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
index 6a5195d..a84423d 100644 (file)
@@ -1,13 +1,16 @@
-// SwiftShader Software Renderer
+// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
 //
-// Copyright(c) 2005-2012 TransGaming Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
 //
-// All rights reserved. No part of this software may be copied, distributed, transmitted,
-// transcribed, stored in a retrieval system, translated into any human or computer
-// language by any means, or disclosed to third parties without the explicit written
-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
-// or implied, including but not limited to any patent rights, are granted to you.
+//    http://www.apache.org/licenses/LICENSE-2.0
 //
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "Renderer.hpp"
 
 #include "Debug.hpp"
 #include "Reactor/Reactor.hpp"
 
-#include <malloc.h>
-#include <assert.h>
-#include <float.h>
-
 #undef max
 
 bool disableServer = true;
@@ -47,14 +46,20 @@ namespace sw
        extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
        extern bool booleanFaceRegister;
        extern bool fullPixelPositionRegister;
+       extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
+       extern bool secondaryColor;             // Specular lighting is applied after texturing
 
        extern bool forceWindowed;
        extern bool complementaryDepthBuffer;
        extern bool postBlendSRGB;
        extern bool exactColorRounding;
-       extern Context::TransparencyAntialiasing transparencyAntialiasing;
+       extern TransparencyAntialiasing transparencyAntialiasing;
        extern bool forceClearRegisters;
 
+       extern bool precacheVertex;
+       extern bool precacheSetup;
+       extern bool precachePixel;
+
        int batchSize = 128;
        int threadCount = 1;
        int unitCount = 1;
@@ -76,11 +81,11 @@ namespace sw
        {
                queries = 0;
 
-               vsDirtyConstF = 256 + 1;
+               vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
                vsDirtyConstI = 16;
                vsDirtyConstB = 16;
 
-               psDirtyConstF = 224;
+               psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
                psDirtyConstI = 16;
                psDirtyConstB = 16;
 
@@ -97,16 +102,19 @@ namespace sw
                deallocate(data);
        }
 
-       Renderer::Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
+       Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
        {
-               sw::halfIntegerCoordinates = halfIntegerCoordinates;
-               sw::symmetricNormalizedDepth = symmetricNormalizedDepth;
-               sw::booleanFaceRegister = booleanFaceRegister;
-               sw::fullPixelPositionRegister = fullPixelPositionRegister;
+               sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
+               sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
+               sw::booleanFaceRegister = conventions.booleanFaceRegister;
+               sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
+               sw::leadingVertexFirst = conventions.leadingVertexFirst;
+               sw::secondaryColor = conventions.secondaryColor;
                sw::exactColorRounding = exactColorRounding;
 
                setRenderTarget(0, 0);
-               clipper = new Clipper();
+               clipper = new Clipper(symmetricNormalizedDepth);
+               blitter = new Blitter;
 
                updateViewMatrix = true;
                updateBaseMatrix = true;
@@ -170,7 +178,10 @@ namespace sw
                sync->destruct();
 
                delete clipper;
-               clipper = 0;
+               clipper = nullptr;
+
+               delete blitter;
+               blitter = nullptr;
 
                terminateThreads();
                delete resumeApp;
@@ -180,17 +191,22 @@ namespace sw
                        delete drawCall[draw];
                }
 
-               deleteBatches();
-
                delete swiftConfig;
        }
 
-       void Renderer::blit(Surface *source, const Rect &sRect, Surface *dest, const Rect &dRect, bool filter)
+       // This object has to be mem aligned
+       void* Renderer::operator new(size_t size)
+       {
+               ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
+               return sw::allocate(sizeof(Renderer), 16);
+       }
+
+       void Renderer::operator delete(void * mem)
        {
-               blitter.blit(source, sRect, dest, dRect, filter);
+               sw::deallocate(mem);
        }
 
-       void Renderer::draw(Context::DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
+       void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
        {
                #ifndef NDEBUG
                        if(count < minPrimitives || count > maxPrimitives)
@@ -204,12 +220,12 @@ namespace sw
                updateConfiguration();
                updateClipper();
 
-               int ss = context->renderTarget[0]->getSuperSampleCount();
-               int ms = context->renderTarget[0]->getMultiSampleCount();
+               int ss = context->getSuperSampleCount();
+               int ms = context->getMultiSampleCount();
 
                for(int q = 0; q < ss; q++)
                {
-                       int oldMultiSampleMask = context->multiSampleMask;
+                       unsigned int oldMultiSampleMask = context->multiSampleMask;
                        context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
 
                        if(!context->multiSampleMask)
@@ -221,7 +237,7 @@ namespace sw
 
                        if(update || oldMultiSampleMask != context->multiSampleMask)
                        {
-                               vertexState = VertexProcessor::update();
+                               vertexState = VertexProcessor::update(drawType);
                                setupState = SetupProcessor::update();
                                pixelState = PixelProcessor::update();
 
@@ -232,31 +248,35 @@ namespace sw
 
                        int batch = batchSize / ms;
 
+                       int (Renderer::*setupPrimitives)(int batch, int count);
+
                        if(context->isDrawTriangle())
                        {
                                switch(context->fillMode)
                                {
-                               case Context::FILL_SOLID:
-                                       setupPrimitives = setupSolidTriangles;
+                               case FILL_SOLID:
+                                       setupPrimitives = &Renderer::setupSolidTriangles;
                                        break;
-                               case Context::FILL_WIREFRAME:
-                                       setupPrimitives = setupWireframeTriangle;
+                               case FILL_WIREFRAME:
+                                       setupPrimitives = &Renderer::setupWireframeTriangle;
                                        batch = 1;
                                        break;
-                               case Context::FILL_VERTEX:
-                                       setupPrimitives = setupVertexTriangle;
+                               case FILL_VERTEX:
+                                       setupPrimitives = &Renderer::setupVertexTriangle;
                                        batch = 1;
                                        break;
-                               default: ASSERT(false);
+                               default:
+                                       ASSERT(false);
+                                       return;
                                }
                        }
                        else if(context->isDrawLine())
                        {
-                               setupPrimitives = setupLines;
+                               setupPrimitives = &Renderer::setupLines;
                        }
                        else   // Point draw
                        {
-                               setupPrimitives = setupPoints;
+                               setupPrimitives = &Renderer::setupPoints;
                        }
 
                        DrawCall *draw = 0;
@@ -285,12 +305,17 @@ namespace sw
 
                        if(queries.size() != 0)
                        {
+                               draw->queries = new std::list<Query*>();
+                               bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
                                for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
                                {
-                                       InterlockedIncrement((volatile long*)&(*query)->reference);
+                                       Query* q = *query;
+                                       if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
+                                       {
+                                               atomicIncrement(&(q->reference));
+                                               draw->queries->push_back(q);
+                                       }
                                }
-
-                               draw->queries = new std::list<Query*>(queries);
                        }
 
                        draw->drawType = drawType;
@@ -303,13 +328,13 @@ namespace sw
                        draw->vertexRoutine = vertexRoutine;
                        draw->setupRoutine = setupRoutine;
                        draw->pixelRoutine = pixelRoutine;
-                       draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();;
+                       draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
                        draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
                        draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
                        draw->setupPrimitives = setupPrimitives;
                        draw->setupState = setupState;
 
-                       for(int i = 0; i < 16; i++)
+                       for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
                        {
                                draw->vertexStream[i] = context->input[i].resource;
                                data->input[i] = context->input[i].buffer;
@@ -328,12 +353,12 @@ namespace sw
 
                        draw->indexBuffer = context->indexBuffer;
 
-                       for(int sampler = 0; sampler < 20; sampler++)
+                       for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
                        {
                                draw->texture[sampler] = 0;
                        }
 
-                       for(int sampler = 0; sampler < 16; sampler++)
+                       for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
                        {
                                if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
                                {
@@ -364,8 +389,17 @@ namespace sw
                                        memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
                                        draw->psDirtyConstB = 0;
                                }
+
+                               PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
                        }
-                       
+                       else
+                       {
+                               for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
+                               {
+                                       draw->pUniformBuffers[i] = nullptr;
+                               }
+                       }
+
                        if(context->pixelShaderVersion() <= 0x0104)
                        {
                                for(int stage = 0; stage < 8; stage++)
@@ -382,14 +416,14 @@ namespace sw
                        {
                                if(context->vertexShader->getVersion() >= 0x0300)
                                {
-                                       for(int sampler = 0; sampler < 4; sampler++)
+                                       for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
                                        {
                                                if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
                                                {
-                                                       draw->texture[16 + sampler] = context->texture[16 + sampler];
-                                                       draw->texture[16 + sampler]->lock(PUBLIC, PRIVATE);
+                                                       draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
+                                                       draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
 
-                                                       data->mipmap[16 + sampler] = context->sampler[16 + sampler].getTextureData();
+                                                       data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
                                                }
                                        }
                                }
@@ -411,14 +445,32 @@ namespace sw
                                        memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
                                        draw->vsDirtyConstB = 0;
                                }
+
+                               if(context->vertexShader->isInstanceIdDeclared())
+                               {
+                                       data->instanceID = context->instanceID;
+                               }
+
+                               VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
+                               VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
                        }
                        else
                        {
                                data->ff = ff;
 
-                               draw->vsDirtyConstF = 256 + 1;
+                               draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
                                draw->vsDirtyConstI = 16;
                                draw->vsDirtyConstB = 16;
+
+                               for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
+                               {
+                                       draw->vUniformBuffers[i] = nullptr;
+                               }
+
+                               for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
+                               {
+                                       draw->transformFeedbackBuffers[i] = nullptr;
+                               }
                        }
 
                        if(pixelState.stencilActive)
@@ -437,11 +489,13 @@ namespace sw
                                data->point = point;
                        }
 
+                       data->lineWidth = context->lineWidth;
+
                        data->factor = factor;
 
-                       if(pixelState.transparencyAntialiasing == Context::TRANSPARENCY_ALPHA_TO_COVERAGE)
+                       if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
                        {
-                               float ref = (float)context->alphaReference * (1.0f / 255.0f);
+                               float ref = context->alphaReference * (1.0f / 255.0f);
                                float margin = sw::min(ref, 1.0f - ref);
 
                                if(ms == 4)
@@ -520,8 +574,8 @@ namespace sw
 
                                data->Wx16 = replicate(W * 16);
                                data->Hx16 = replicate(H * 16);
-                               data->X0x16 = replicate(X0 * 16);
-                               data->Y0x16 = replicate(Y0 * 16);
+                               data->X0x16 = replicate(X0 * 16 - 8);
+                               data->Y0x16 = replicate(Y0 * 16 - 8);
                                data->XXXX = replicate(X[s][q] / W);
                                data->YYYY = replicate(Y[s][q] / H);
                                data->halfPixelX = replicate(0.5f / W);
@@ -545,7 +599,7 @@ namespace sw
 
                        // Target
                        {
-                               for(int index = 0; index < 4; index++)
+                               for(int index = 0; index < RENDERTARGETS; index++)
                                {
                                        draw->renderTarget[index] = context->renderTarget[index];
 
@@ -557,17 +611,21 @@ namespace sw
                                        }
                                }
 
-                               draw->depthStencil = context->depthStencil;
+                               draw->depthBuffer = context->depthBuffer;
+                               draw->stencilBuffer = context->stencilBuffer;
 
-                               if(draw->depthStencil)
+                               if(draw->depthBuffer)
                                {
-                                       data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
-                                       data->depthPitchB = context->depthStencil->getInternalPitchB();
-                                       data->depthSliceB = context->depthStencil->getInternalSliceB();
+                                       data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
+                                       data->depthPitchB = context->depthBuffer->getInternalPitchB();
+                                       data->depthSliceB = context->depthBuffer->getInternalSliceB();
+                               }
 
-                                       data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
-                                       data->stencilPitchB = context->depthStencil->getStencilPitchB();
-                                       data->stencilSliceB = context->depthStencil->getStencilSliceB();
+                               if(draw->stencilBuffer)
+                               {
+                                       data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
+                                       data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
+                                       data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
                                }
                        }
 
@@ -584,22 +642,49 @@ namespace sw
 
                        draw->references = (count + batch - 1) / batch;
 
-                       mutex.lock();
+                       schedulerMutex.lock();
                        nextDraw++;
-                       mutex.unlock();
+                       schedulerMutex.unlock();
 
-                       if(!threadsAwake)
+                       #ifndef NDEBUG
+                       if(threadCount == 1)   // Use main thread for draw execution
                        {
-                               suspend[0]->wait();
-
                                threadsAwake = 1;
                                task[0].type = Task::RESUME;
 
-                               resume[0]->signal();
+                               taskLoop(0);
+                       }
+                       else
+                       #endif
+                       {
+                               if(!threadsAwake)
+                               {
+                                       suspend[0]->wait();
+
+                                       threadsAwake = 1;
+                                       task[0].type = Task::RESUME;
+
+                                       resume[0]->signal();
+                               }
                        }
                }
        }
 
+       void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
+       {
+               blitter->clear(pixel, format, dest, dRect, rgbaMask);
+       }
+
+       void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
+       {
+               blitter->blit(source, sRect, dest, dRect, filter, isStencil);
+       }
+
+       void Renderer::blit3D(Surface *source, Surface *dest)
+       {
+               blitter->blit3D(source, dest);
+       }
+
        void Renderer::threadFunction(void *parameters)
        {
                Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
@@ -607,7 +692,8 @@ namespace sw
 
                if(logPrecision < IEEE)
                {
-                       _controlfp(_DN_FLUSH, _MCW_DN);
+                       CPUID::setFlushToZero(true);
+                       CPUID::setDenormalsAreZero(true);
                }
 
                renderer->threadLoop(threadIndex);
@@ -666,7 +752,7 @@ namespace sw
                                }
                        }
                }
-       
+
                // Find primitive tasks
                if(currentDraw == nextDraw)
                {
@@ -716,7 +802,7 @@ namespace sw
 
        void Renderer::scheduleTask(int threadIndex)
        {
-               mutex.lock();
+               schedulerMutex.lock();
 
                if((int)qSize < threadCount - threadsAwake + 1)
                {
@@ -753,7 +839,7 @@ namespace sw
                        threadsAwake--;
                }
 
-               mutex.unlock();
+               schedulerMutex.unlock();
        }
 
        void Renderer::executeTask(int threadIndex)
@@ -767,11 +853,11 @@ namespace sw
                case Task::PRIMITIVES:
                        {
                                int unit = task[threadIndex].primitiveUnit;
-                               
+
                                int input = primitiveProgress[unit].firstPrimitive;
                                int count = primitiveProgress[unit].primitiveCount;
                                DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
-                               int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
+                               int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
 
                                processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
 
@@ -781,7 +867,12 @@ namespace sw
                                        startTick = time;
                                #endif
 
-                               int visible = setupPrimitives(this, unit, count);
+                               int visible = 0;
+
+                               if(!draw->setupState.rasterizerDiscard)
+                               {
+                                       visible = (this->*setupPrimitives)(unit, count);
+                               }
 
                                primitiveProgress[unit].visible = visible;
                                primitiveProgress[unit].references = clusterCount;
@@ -838,8 +929,9 @@ namespace sw
                DrawData &data = *draw.data;
                int primitive = primitiveProgress[unit].firstPrimitive;
                int count = primitiveProgress[unit].primitiveCount;
+               int processedPrimitives = primitive + count;
 
-               pixelProgress[cluster].processedPrimitives = primitive + count;
+               pixelProgress[cluster].processedPrimitives = processedPrimitives;
 
                if(pixelProgress[cluster].processedPrimitives >= draw.count)
                {
@@ -847,11 +939,11 @@ namespace sw
                        pixelProgress[cluster].processedPrimitives = 0;
                }
 
-               int ref = InterlockedDecrement((volatile long*)&primitiveProgress[unit].references);
+               int ref = atomicDecrement(&primitiveProgress[unit].references);
 
                if(ref == 0)
                {
-                       ref = InterlockedDecrement((volatile long*)&draw.references);
+                       ref = atomicDecrement(&draw.references);
 
                        if(ref == 0)
                        {
@@ -871,19 +963,29 @@ namespace sw
                                        {
                                                Query *query = *q;
 
-                                               for(int cluster = 0; cluster < clusterCount; cluster++)
+                                               switch(query->type)
                                                {
-                                                       InterlockedExchangeAdd((volatile long*)&query->data, data.occlusion[cluster]);
+                                               case Query::FRAGMENTS_PASSED:
+                                                       for(int cluster = 0; cluster < clusterCount; cluster++)
+                                                       {
+                                                               atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
+                                                       }
+                                                       break;
+                                               case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+                                                       atomicAdd((volatile int*)&query->data, processedPrimitives);
+                                                       break;
+                                               default:
+                                                       break;
                                                }
 
-                                               InterlockedDecrement((volatile long*)&query->reference);
+                                               atomicDecrement(&query->reference);
                                        }
 
                                        delete draw.queries;
                                        draw.queries = 0;
                                }
 
-                               for(int i = 0; i < 4; i++)
+                               for(int i = 0; i < RENDERTARGETS; i++)
                                {
                                        if(draw.renderTarget[i])
                                        {
@@ -891,13 +993,17 @@ namespace sw
                                        }
                                }
 
-                               if(draw.depthStencil)
+                               if(draw.depthBuffer)
                                {
-                                       draw.depthStencil->unlockInternal();
-                                       draw.depthStencil->unlockStencil();
+                                       draw.depthBuffer->unlockInternal();
                                }
 
-                               for(int i = 0; i < 16 + 4; i++)
+                               if(draw.stencilBuffer)
+                               {
+                                       draw.stencilBuffer->unlockStencil();
+                               }
+
+                               for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
                                {
                                        if(draw.texture[i])
                                        {
@@ -905,7 +1011,7 @@ namespace sw
                                        }
                                }
 
-                               for(int i = 0; i < 16; i++)
+                               for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
                                {
                                        if(draw.vertexStream[i])
                                        {
@@ -918,6 +1024,26 @@ namespace sw
                                        draw.indexBuffer->unlock();
                                }
 
+                               for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
+                               {
+                                       if(draw.pUniformBuffers[i])
+                                       {
+                                               draw.pUniformBuffers[i]->unlock();
+                                       }
+                                       if(draw.vUniformBuffers[i])
+                                       {
+                                               draw.vUniformBuffers[i]->unlock();
+                                       }
+                               }
+
+                               for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
+                               {
+                                       if(draw.transformFeedbackBuffers[i])
+                                       {
+                                               draw.transformFeedbackBuffers[i]->unlock();
+                                       }
+                               }
+
                                draw.vertexRoutine->unbind();
                                draw.setupRoutine->unbind();
                                draw.pixelRoutine->unbind();
@@ -932,7 +1058,7 @@ namespace sw
                pixelProgress[cluster].executing = false;
        }
 
-       void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int count, unsigned int loop, int thread)
+       void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
        {
                Triangle *triangle = triangleBatch[unit];
                DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
@@ -952,11 +1078,11 @@ namespace sw
 
                switch(draw->drawType)
                {
-               case Context::DRAW_POINTLIST:
+               case DRAW_POINTLIST:
                        {
                                unsigned int index = start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index;
                                        batch[i][1] = index;
@@ -966,11 +1092,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_LINELIST:
+               case DRAW_LINELIST:
                        {
                                unsigned int index = 2 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index + 0;
                                        batch[i][1] = index + 1;
@@ -980,11 +1106,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_LINESTRIP:
+               case DRAW_LINESTRIP:
                        {
                                unsigned int index = start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index + 0;
                                        batch[i][1] = index + 1;
@@ -994,11 +1120,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_LINELOOP:
+               case DRAW_LINELOOP:
                        {
                                unsigned int index = start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = (index + 0) % loop;
                                        batch[i][1] = (index + 1) % loop;
@@ -1008,11 +1134,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_TRIANGLELIST:
+               case DRAW_TRIANGLELIST:
                        {
                                unsigned int index = 3 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index + 0;
                                        batch[i][1] = index + 1;
@@ -1022,11 +1148,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_TRIANGLESTRIP:
+               case DRAW_TRIANGLESTRIP:
                        {
                                unsigned int index = start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index + 0;
                                        batch[i][1] = index + (index & 1) + 1;
@@ -1036,11 +1162,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_TRIANGLEFAN:
+               case DRAW_TRIANGLEFAN:
                        {
                                unsigned int index = start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index + 1;
                                        batch[i][1] = index + 2;
@@ -1050,11 +1176,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDPOINTLIST8:
+               case DRAW_INDEXEDPOINTLIST8:
                        {
                                const unsigned char *index = (const unsigned char*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = *index;
                                        batch[i][1] = *index;
@@ -1064,11 +1190,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDPOINTLIST16:
+               case DRAW_INDEXEDPOINTLIST16:
                        {
                                const unsigned short *index = (const unsigned short*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = *index;
                                        batch[i][1] = *index;
@@ -1078,11 +1204,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDPOINTLIST32:
+               case DRAW_INDEXEDPOINTLIST32:
                        {
                                const unsigned int *index = (const unsigned int*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = *index;
                                        batch[i][1] = *index;
@@ -1092,11 +1218,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINELIST8:
+               case DRAW_INDEXEDLINELIST8:
                        {
                                const unsigned char *index = (const unsigned char*)indices + 2 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1106,11 +1232,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINELIST16:
+               case DRAW_INDEXEDLINELIST16:
                        {
                                const unsigned short *index = (const unsigned short*)indices + 2 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1120,11 +1246,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINELIST32:
+               case DRAW_INDEXEDLINELIST32:
                        {
                                const unsigned int *index = (const unsigned int*)indices + 2 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1134,11 +1260,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINESTRIP8:
+               case DRAW_INDEXEDLINESTRIP8:
                        {
                                const unsigned char *index = (const unsigned char*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1148,11 +1274,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINESTRIP16:
+               case DRAW_INDEXEDLINESTRIP16:
                        {
                                const unsigned short *index = (const unsigned short*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1162,11 +1288,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINESTRIP32:
+               case DRAW_INDEXEDLINESTRIP32:
                        {
                                const unsigned int *index = (const unsigned int*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1176,47 +1302,47 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINELOOP8:
+               case DRAW_INDEXEDLINELOOP8:
                        {
-                               const unsigned char *index = (const unsigned char*)indices + start;
+                               const unsigned char *index = (const unsigned char*)indices;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
-                                       batch[i][0] = index[(i + 0) % loop];
-                                       batch[i][1] = index[(i + 1) % loop];
-                                       batch[i][2] = index[(i + 1) % loop];
+                                       batch[i][0] = index[(start + i + 0) % loop];
+                                       batch[i][1] = index[(start + i + 1) % loop];
+                                       batch[i][2] = index[(start + i + 1) % loop];
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINELOOP16:
+               case DRAW_INDEXEDLINELOOP16:
                        {
-                               const unsigned short *index = (const unsigned short*)indices + start;
+                               const unsigned short *index = (const unsigned short*)indices;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
-                                       batch[i][0] = index[(i + 0) % loop];
-                                       batch[i][1] = index[(i + 1) % loop];
-                                       batch[i][2] = index[(i + 1) % loop];
+                                       batch[i][0] = index[(start + i + 0) % loop];
+                                       batch[i][1] = index[(start + i + 1) % loop];
+                                       batch[i][2] = index[(start + i + 1) % loop];
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDLINELOOP32:
+               case DRAW_INDEXEDLINELOOP32:
                        {
-                               const unsigned int *index = (const unsigned int*)indices + start;
+                               const unsigned int *index = (const unsigned int*)indices;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
-                                       batch[i][0] = index[(i + 0) % loop];
-                                       batch[i][1] = index[(i + 1) % loop];
-                                       batch[i][2] = index[(i + 1) % loop];
+                                       batch[i][0] = index[(start + i + 0) % loop];
+                                       batch[i][1] = index[(start + i + 1) % loop];
+                                       batch[i][2] = index[(start + i + 1) % loop];
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLELIST8:
+               case DRAW_INDEXEDTRIANGLELIST8:
                        {
                                const unsigned char *index = (const unsigned char*)indices + 3 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1226,11 +1352,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLELIST16:
+               case DRAW_INDEXEDTRIANGLELIST16:
                        {
                                const unsigned short *index = (const unsigned short*)indices + 3 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1240,11 +1366,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLELIST32:
+               case DRAW_INDEXEDTRIANGLELIST32:
                        {
                                const unsigned int *index = (const unsigned int*)indices + 3 * start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[1];
@@ -1254,11 +1380,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLESTRIP8:
+               case DRAW_INDEXEDTRIANGLESTRIP8:
                        {
                                const unsigned char *index = (const unsigned char*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[((start + i) & 1) + 1];
@@ -1268,11 +1394,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLESTRIP16:
+               case DRAW_INDEXEDTRIANGLESTRIP16:
                        {
                                const unsigned short *index = (const unsigned short*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[((start + i) & 1) + 1];
@@ -1282,11 +1408,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLESTRIP32:
+               case DRAW_INDEXEDTRIANGLESTRIP32:
                        {
                                const unsigned int *index = (const unsigned int*)indices + start;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[0];
                                        batch[i][1] = index[((start + i) & 1) + 1];
@@ -1296,11 +1422,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLEFAN8:
+               case DRAW_INDEXEDTRIANGLEFAN8:
                        {
                                const unsigned char *index = (const unsigned char*)indices;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[start + i + 1];
                                        batch[i][1] = index[start + i + 2];
@@ -1308,11 +1434,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLEFAN16:
+               case DRAW_INDEXEDTRIANGLEFAN16:
                        {
                                const unsigned short *index = (const unsigned short*)indices;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[start + i + 1];
                                        batch[i][1] = index[start + i + 2];
@@ -1320,11 +1446,11 @@ namespace sw
                                }
                        }
                        break;
-               case Context::DRAW_INDEXEDTRIANGLEFAN32:
+               case DRAW_INDEXEDTRIANGLEFAN32:
                        {
                                const unsigned int *index = (const unsigned int*)indices;
 
-                               for(unsigned int i = 0; i < count; i++)
+                               for(unsigned int i = 0; i < triangleCount; i++)
                                {
                                        batch[i][0] = index[start + i + 1];
                                        batch[i][1] = index[start + i + 2];
@@ -1332,20 +1458,40 @@ namespace sw
                                }
                        }
                        break;
+               case DRAW_QUADLIST:
+                       {
+                               unsigned int index = 4 * start / 2;
+
+                               for(unsigned int i = 0; i < triangleCount; i += 2)
+                               {
+                                       batch[i+0][0] = index + 0;
+                                       batch[i+0][1] = index + 1;
+                                       batch[i+0][2] = index + 2;
+
+                                       batch[i+1][0] = index + 0;
+                                       batch[i+1][1] = index + 2;
+                                       batch[i+1][2] = index + 3;
+
+                                       index += 4;
+                               }
+                       }
+                       break;
                default:
                        ASSERT(false);
+                       return;
                }
 
-               task->count = count * 3;
+               task->primitiveStart = start;
+               task->vertexCount = triangleCount * 3;
                vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
        }
 
-       int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
+       int Renderer::setupSolidTriangles(int unit, int count)
        {
-               Triangle *triangle = renderer->triangleBatch[unit];
-               Primitive *primitive = renderer->primitiveBatch[unit];
+               Triangle *triangle = triangleBatch[unit];
+               Primitive *primitive = primitiveBatch[unit];
 
-               DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
+               DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
                SetupProcessor::State &state = draw.setupState;
                const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
 
@@ -1368,7 +1514,7 @@ namespace sw
 
                                if(clipFlagsOr != Clipper::CLIP_FINITE)
                                {
-                                       if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
+                                       if(!clipper->clip(polygon, clipFlagsOr, draw))
                                        {
                                                continue;
                                        }
@@ -1385,15 +1531,14 @@ namespace sw
                return visible;
        }
 
-       int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
+       int Renderer::setupWireframeTriangle(int unit, int count)
        {
-               Triangle *triangle = renderer->triangleBatch[unit];
-               Primitive *primitive = renderer->primitiveBatch[unit];
+               Triangle *triangle = triangleBatch[unit];
+               Primitive *primitive = primitiveBatch[unit];
                int visible = 0;
 
-               DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
+               DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
                SetupProcessor::State &state = draw.setupState;
-               SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
 
                const Vertex &v0 = triangle[0].v0;
                const Vertex &v1 = triangle[0].v1;
@@ -1401,11 +1546,11 @@ namespace sw
 
                float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
 
-               if(state.cullMode == Context::CULL_CLOCKWISE)
+               if(state.cullMode == CULL_CLOCKWISE)
                {
                        if(d >= 0) return 0;
                }
-               else if(state.cullMode == Context::CULL_COUNTERCLOCKWISE)
+               else if(state.cullMode == CULL_COUNTERCLOCKWISE)
                {
                        if(d <= 0) return 0;
                }
@@ -1421,15 +1566,15 @@ namespace sw
                        for(int i = 0; i < 2; i++)
                        {
                                triangle[1].v0.C[i] = triangle[0].v0.C[i];
-                               triangle[1].v1.C[i] = triangle[0].v1.C[i];
+                               triangle[1].v1.C[i] = triangle[0].v0.C[i];
                                triangle[2].v0.C[i] = triangle[0].v0.C[i];
-                               triangle[2].v1.C[i] = triangle[0].v1.C[i];
+                               triangle[2].v1.C[i] = triangle[0].v0.C[i];
                        }
                }
 
                for(int i = 0; i < 3; i++)
                {
-                       if(setupLine(renderer, *primitive, *triangle, draw))
+                       if(setupLine(*primitive, *triangle, draw))
                        {
                                primitive->area = 0.5f * d;
 
@@ -1442,14 +1587,14 @@ namespace sw
 
                return visible;
        }
-       
-       int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
+
+       int Renderer::setupVertexTriangle(int unit, int count)
        {
-               Triangle *triangle = renderer->triangleBatch[unit];
-               Primitive *primitive = renderer->primitiveBatch[unit];
+               Triangle *triangle = triangleBatch[unit];
+               Primitive *primitive = primitiveBatch[unit];
                int visible = 0;
 
-               DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
+               DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
                SetupProcessor::State &state = draw.setupState;
 
                const Vertex &v0 = triangle[0].v0;
@@ -1458,11 +1603,11 @@ namespace sw
 
                float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
 
-               if(state.cullMode == Context::CULL_CLOCKWISE)
+               if(state.cullMode == CULL_CLOCKWISE)
                {
                        if(d >= 0) return 0;
                }
-               else if(state.cullMode == Context::CULL_COUNTERCLOCKWISE)
+               else if(state.cullMode == CULL_COUNTERCLOCKWISE)
                {
                        if(d <= 0) return 0;
                }
@@ -1473,7 +1618,7 @@ namespace sw
 
                for(int i = 0; i < 3; i++)
                {
-                       if(setupPoint(renderer, *primitive, *triangle, draw))
+                       if(setupPoint(*primitive, *triangle, draw))
                        {
                                primitive->area = 0.5f * d;
 
@@ -1487,20 +1632,20 @@ namespace sw
                return visible;
        }
 
-       int Renderer::setupLines(Renderer *renderer, int unit, int count)
+       int Renderer::setupLines(int unit, int count)
        {
-               Triangle *triangle = renderer->triangleBatch[unit];
-               Primitive *primitive = renderer->primitiveBatch[unit];
+               Triangle *triangle = triangleBatch[unit];
+               Primitive *primitive = primitiveBatch[unit];
                int visible = 0;
 
-               DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
+               DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
                SetupProcessor::State &state = draw.setupState;
 
                int ms = state.multiSample;
 
                for(int i = 0; i < count; i++)
                {
-                       if(setupLine(renderer, *primitive, *triangle, draw))
+                       if(setupLine(*primitive, *triangle, draw))
                        {
                                primitive += ms;
                                visible++;
@@ -1512,20 +1657,20 @@ namespace sw
                return visible;
        }
 
-       int Renderer::setupPoints(Renderer *renderer, int unit, int count)
+       int Renderer::setupPoints(int unit, int count)
        {
-               Triangle *triangle = renderer->triangleBatch[unit];
-               Primitive *primitive = renderer->primitiveBatch[unit];
+               Triangle *triangle = triangleBatch[unit];
+               Primitive *primitive = primitiveBatch[unit];
                int visible = 0;
 
-               DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
+               DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
                SetupProcessor::State &state = draw.setupState;
 
                int ms = state.multiSample;
 
                for(int i = 0; i < count; i++)
                {
-                       if(setupPoint(renderer, *primitive, *triangle, draw))
+                       if(setupPoint(*primitive, *triangle, draw))
                        {
                                primitive += ms;
                                visible++;
@@ -1537,12 +1682,14 @@ namespace sw
                return visible;
        }
 
-       bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
+       bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
        {
                const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
                const SetupProcessor::State &state = draw.setupState;
                const DrawData &data = *draw.data;
 
+               float lineWidth = data.lineWidth;
+
                Vertex &v0 = triangle.v0;
                Vertex &v1 = triangle.v1;
 
@@ -1577,7 +1724,7 @@ namespace sw
                        P[2] = P1;
                        P[3] = P0;
 
-                       float scale = 0.5f / sqrt(dx*dx + dy*dy);
+                       float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
 
                        dx *= scale;
                        dy *= scale;
@@ -1594,19 +1741,19 @@ namespace sw
 
                        P[0].x += -dy0w + -dx0w;
                        P[0].y += -dx0h + +dy0h;
-                       C[0] = computeClipFlags(P[0], data);
+                       C[0] = clipper->computeClipFlags(P[0]);
 
                        P[1].x += -dy1w + +dx1w;
                        P[1].y += -dx1h + +dy1h;
-                       C[1] = computeClipFlags(P[1], data);
+                       C[1] = clipper->computeClipFlags(P[1]);
 
                        P[2].x += +dy1w + +dx1w;
                        P[2].y += +dx1h + -dy1h;
-                       C[2] = computeClipFlags(P[2], data);
+                       C[2] = clipper->computeClipFlags(P[2]);
 
                        P[3].x += +dy0w + -dx0w;
                        P[3].y += +dx0h + +dy0h;
-                       C[3] = computeClipFlags(P[3], data);
+                       C[3] = clipper->computeClipFlags(P[3]);
 
                        if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
                        {
@@ -1616,7 +1763,7 @@ namespace sw
 
                                if(clipFlagsOr != Clipper::CLIP_FINITE)
                                {
-                                       if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
+                                       if(!clipper->clip(polygon, clipFlagsOr, draw))
                                        {
                                                return false;
                                        }
@@ -1639,35 +1786,35 @@ namespace sw
                        P[6] = P1;
                        P[7] = P1;
 
-                       float dx0 = 0.5f * P0.w / W;
-                       float dy0 = 0.5f * P0.w / H;
+                       float dx0 = lineWidth * 0.5f * P0.w / W;
+                       float dy0 = lineWidth * 0.5f * P0.w / H;
 
-                       float dx1 = 0.5f * P1.w / W;
-                       float dy1 = 0.5f * P1.w / H;
+                       float dx1 = lineWidth * 0.5f * P1.w / W;
+                       float dy1 = lineWidth * 0.5f * P1.w / H;
 
                        P[0].x += -dx0;
-                       C[0] = computeClipFlags(P[0], data);
+                       C[0] = clipper->computeClipFlags(P[0]);
 
                        P[1].y += +dy0;
-                       C[1] = computeClipFlags(P[1], data);
+                       C[1] = clipper->computeClipFlags(P[1]);
 
                        P[2].x += +dx0;
-                       C[2] = computeClipFlags(P[2], data);
+                       C[2] = clipper->computeClipFlags(P[2]);
 
                        P[3].y += -dy0;
-                       C[3] = computeClipFlags(P[3], data);
+                       C[3] = clipper->computeClipFlags(P[3]);
 
                        P[4].x += -dx1;
-                       C[4] = computeClipFlags(P[4], data);
+                       C[4] = clipper->computeClipFlags(P[4]);
 
                        P[5].y += +dy1;
-                       C[5] = computeClipFlags(P[5], data);
+                       C[5] = clipper->computeClipFlags(P[5]);
 
                        P[6].x += +dx1;
-                       C[6] = computeClipFlags(P[6], data);
+                       C[6] = clipper->computeClipFlags(P[6]);
 
                        P[7].y += -dy1;
-                       C[7] = computeClipFlags(P[7], data);
+                       C[7] = clipper->computeClipFlags(P[7]);
 
                        if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
                        {
@@ -1722,7 +1869,7 @@ namespace sw
 
                                if(clipFlagsOr != Clipper::CLIP_FINITE)
                                {
-                                       if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
+                                       if(!clipper->clip(polygon, clipFlagsOr, draw))
                                        {
                                                return false;
                                        }
@@ -1735,7 +1882,7 @@ namespace sw
                return false;
        }
 
-       bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
+       bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
        {
                const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
                const SetupProcessor::State &state = draw.setupState;
@@ -1747,7 +1894,7 @@ namespace sw
 
                int pts = state.pointSizeRegister;
 
-               if(state.pointSizeRegister != 0xF)
+               if(state.pointSizeRegister != Unused)
                {
                        pSize = v.v[pts].y;
                }
@@ -1773,19 +1920,19 @@ namespace sw
 
                P[0].x -= X;
                P[0].y += Y;
-               C[0] = computeClipFlags(P[0], data);
+               C[0] = clipper->computeClipFlags(P[0]);
 
                P[1].x += X;
                P[1].y += Y;
-               C[1] = computeClipFlags(P[1], data);
+               C[1] = clipper->computeClipFlags(P[1]);
 
                P[2].x += X;
                P[2].y -= Y;
-               C[2] = computeClipFlags(P[2], data);
+               C[2] = clipper->computeClipFlags(P[2]);
 
                P[3].x -= X;
                P[3].y -= Y;
-               C[3] = computeClipFlags(P[3], data);
+               C[3] = clipper->computeClipFlags(P[3]);
 
                triangle.v1 = triangle.v0;
                triangle.v2 = triangle.v0;
@@ -1801,40 +1948,22 @@ namespace sw
 
                        if(clipFlagsOr != Clipper::CLIP_FINITE)
                        {
-                               if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
+                               if(!clipper->clip(polygon, clipFlagsOr, draw))
                                {
                                        return false;
                                }
                        }
-                       
+
                        return setupRoutine(&primitive, &triangle, &polygon, &data);
                }
 
                return false;
        }
 
-       unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
-       {
-               float clX = v.x + data.halfPixelX[0] * v.w;
-               float clY = v.y + data.halfPixelY[0] * v.w;
-
-               return ((clX > v.w)  << 0) |
-                          ((clY > v.w)  << 1) |
-                          ((v.z > v.w)  << 2) |
-                          ((clX < -v.w) << 3) |
-                      ((clY < -v.w) << 4) |
-                          ((v.z < 0)    << 5) |
-                          Clipper::CLIP_FINITE;   // FIXME: xyz finite
-       }
-
-       void Renderer::initializeThreads(int threads)
+       void Renderer::initializeThreads()
        {
-               terminateThreads();
-               deleteBatches();
-
-               threadCount = threads;
-               unitCount = ceilPow2(threads);
-               clusterCount = ceilPow2(threads);
+               unitCount = ceilPow2(threadCount);
+               clusterCount = ceilPow2(threadCount);
 
                for(int i = 0; i < unitCount; i++)
                {
@@ -1878,7 +2007,7 @@ namespace sw
                                exitThreads = true;
                                resume[thread]->signal();
                                worker[thread]->join();
-                               
+
                                delete worker[thread];
                                worker[thread] = 0;
                                delete resume[thread];
@@ -1886,14 +2015,11 @@ namespace sw
                                delete suspend[thread];
                                suspend[thread] = 0;
                        }
-               
+
                        deallocate(vertexTask[thread]);
                        vertexTask[thread] = 0;
                }
-       }
 
-       void Renderer::deleteBatches()
-       {
                for(int i = 0; i < 16; i++)
                {
                        deallocate(triangleBatch[i]);
@@ -1908,9 +2034,9 @@ namespace sw
        {
                if(!vertexShader) return;
 
-               int count = vertexShader->getLength();
+               size_t count = vertexShader->getLength();
 
-               for(int i = 0; i < count; i++)
+               for(size_t i = 0; i < count; i++)
                {
                        const Shader::Instruction *instruction = vertexShader->getInstruction(i);
 
@@ -1952,9 +2078,9 @@ namespace sw
        {
                if(!pixelShader) return;
 
-               int count = pixelShader->getLength();
+               size_t count = pixelShader->getLength();
 
-               for(int i = 0; i < count; i++)
+               for(size_t i = 0; i < count; i++)
                {
                        const Shader::Instruction *instruction = pixelShader->getInstruction(i);
 
@@ -2002,29 +2128,29 @@ namespace sw
                context->sampleMask = mask;
        }
 
-       void Renderer::setTransparencyAntialiasing(Context::TransparencyAntialiasing transparencyAntialiasing)
+       void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
        {
                sw::transparencyAntialiasing = transparencyAntialiasing;
        }
 
        bool Renderer::isReadWriteTexture(int sampler)
        {
-               for(int index = 0; index < 4; index++)
+               for(int index = 0; index < RENDERTARGETS; index++)
                {
                        if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
                        {
                                return true;
                        }
                }
-       
-               if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
+
+               if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
                {
                        return true;
                }
 
                return false;
        }
-       
+
        void Renderer::updateClipper()
        {
                if(updateClipPlanes)
@@ -2056,15 +2182,15 @@ namespace sw
 
        void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
        {
-               ASSERT(sampler < (16 + 4));
+               ASSERT(sampler < TOTAL_IMAGE_UNITS);
 
                context->texture[sampler] = resource;
        }
 
        void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
        {
-               ASSERT(sampler < (16 + 4) && face < 6 && level < MIPMAP_LEVELS);
-               
+               ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
+
                context->sampler[sampler].setTextureLevel(face, level, surface, type);
        }
 
@@ -2176,7 +2302,7 @@ namespace sw
                }
        }
 
-       void Renderer::setMaxAnisotropy(SamplerType type, int sampler, unsigned int maxAnisotropy)
+       void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
        {
                if(type == SAMPLER_PIXEL)
                {
@@ -2188,6 +2314,102 @@ namespace sw
                }
        }
 
+       void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setSwizzleR(sampler, swizzleR);
+               }
+               else
+               {
+                       VertexProcessor::setSwizzleR(sampler, swizzleR);
+               }
+       }
+
+       void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setSwizzleG(sampler, swizzleG);
+               }
+               else
+               {
+                       VertexProcessor::setSwizzleG(sampler, swizzleG);
+               }
+       }
+
+       void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setSwizzleB(sampler, swizzleB);
+               }
+               else
+               {
+                       VertexProcessor::setSwizzleB(sampler, swizzleB);
+               }
+       }
+
+       void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setSwizzleA(sampler, swizzleA);
+               }
+               else
+               {
+                       VertexProcessor::setSwizzleA(sampler, swizzleA);
+               }
+       }
+
+       void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setBaseLevel(sampler, baseLevel);
+               }
+               else
+               {
+                       VertexProcessor::setBaseLevel(sampler, baseLevel);
+               }
+       }
+
+       void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setMaxLevel(sampler, maxLevel);
+               }
+               else
+               {
+                       VertexProcessor::setMaxLevel(sampler, maxLevel);
+               }
+       }
+
+       void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setMinLod(sampler, minLod);
+               }
+               else
+               {
+                       VertexProcessor::setMinLod(sampler, minLod);
+               }
+       }
+
+       void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
+       {
+               if(type == SAMPLER_PIXEL)
+               {
+                       PixelProcessor::setMaxLod(sampler, maxLod);
+               }
+               else
+               {
+                       VertexProcessor::setMaxLod(sampler, maxLod);
+               }
+       }
+
        void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
        {
                context->setPointSpriteEnable(pointSpriteEnable);
@@ -2198,6 +2420,11 @@ namespace sw
                context->setPointScaleEnable(pointScaleEnable);
        }
 
+       void Renderer::setLineWidth(float width)
+       {
+               context->lineWidth = width;
+       }
+
        void Renderer::setDepthBias(float bias)
        {
                depthBias = bias;
@@ -2208,6 +2435,11 @@ namespace sw
                slopeDepthBias = slopeBias;
        }
 
+       void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
+       {
+               context->rasterizerDiscard = rasterizerDiscard;
+       }
+
        void Renderer::setPixelShader(const PixelShader *shader)
        {
                context->pixelShader = shader;
@@ -2351,7 +2583,7 @@ namespace sw
        {
                queries.push_back(query);
        }
-       
+
        void Renderer::removeQuery(Query *query)
        {
                queries.remove(query);
@@ -2362,7 +2594,7 @@ namespace sw
                {
                        return threadCount;
                }
-               
+
                int64_t Renderer::getVertexTime(int thread)
                {
                        return vertexTime[thread];
@@ -2372,7 +2604,7 @@ namespace sw
                {
                        return setupTime[thread];
                }
-                       
+
                int64_t Renderer::getPixelTime(int thread)
                {
                        return pixelTime[thread];
@@ -2406,7 +2638,7 @@ namespace sw
 
        void Renderer::setClipPlane(unsigned int index, const float plane[4])
        {
-               if(index < 6)
+               if(index < MAX_CLIP_PLANES)
                {
                        userPlane[index] = plane;
                }
@@ -2423,26 +2655,30 @@ namespace sw
                {
                        terminateThreads();
 
-                       SwiftConfig::Configuration configuration = {0};
+                       SwiftConfig::Configuration configuration = {};
                        swiftConfig->getConfiguration(configuration);
 
+                       precacheVertex = !newConfiguration && configuration.precache;
+                       precacheSetup = !newConfiguration && configuration.precache;
+                       precachePixel = !newConfiguration && configuration.precache;
+
                        VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
                        PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
                        SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
 
                        switch(configuration.textureSampleQuality)
                        {
-                       case 0:  Sampler::setFilterQuality(FILTER_POINT);                       break;
-                       case 1:  Sampler::setFilterQuality(FILTER_LINEAR);                      break;
-                       case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
-                       default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
+                       case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
+                       case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
+                       case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
+                       default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
                        }
 
                        switch(configuration.mipmapQuality)
                        {
                        case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
                        case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
-                       default: Sampler::setMipmapQuality(MIPMAP_LINEAR);  break;
+                       default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
                        }
 
                        setPerspectiveCorrection(configuration.perspectiveCorrection);
@@ -2489,16 +2725,16 @@ namespace sw
 
                        switch(configuration.transparencyAntialiasing)
                        {
-                       case 0:         transparencyAntialiasing = Context::TRANSPARENCY_NONE;                          break;
-                       case 1:         transparencyAntialiasing = Context::TRANSPARENCY_ALPHA_TO_COVERAGE;     break;
-                       default:        transparencyAntialiasing = Context::TRANSPARENCY_NONE;                          break;
+                       case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
+                       case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
+                       default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
                        }
 
                        switch(configuration.threadCount)
                        {
-                       case -1: initializeThreads(CPUID::coreCount());        break;
-                       case 0:  initializeThreads(CPUID::processAffinity());  break;
-                       default: initializeThreads(configuration.threadCount); break;
+                       case -1: threadCount = CPUID::coreCount();        break;
+                       case 0:  threadCount = CPUID::processAffinity();  break;
+                       default: threadCount = configuration.threadCount; break;
                        }
 
                        CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
@@ -2523,5 +2759,10 @@ namespace sw
                        maxPrimitives = configuration.maxPrimitives;
                #endif
                }
+
+               if(!initialUpdate && !worker[0])
+               {
+                       initializeThreads();
+               }
        }
 }