1 // SwiftShader Software Renderer
3 // Copyright(c) 2005-2012 TransGaming Inc.
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
12 #include "Renderer.hpp"
14 #include "Clipper.hpp"
16 #include "FrameBuffer.hpp"
18 #include "Surface.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
29 #include "Reactor/Reactor.hpp"
33 bool disableServer = true;
36 unsigned int minPrimitives = 1;
37 unsigned int maxPrimitives = 1 << 21;
42 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
43 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
44 extern bool booleanFaceRegister;
45 extern bool fullPixelPositionRegister;
46 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
47 extern bool secondaryColor; // Specular lighting is applied after texturing
49 extern bool forceWindowed;
50 extern bool complementaryDepthBuffer;
51 extern bool postBlendSRGB;
52 extern bool exactColorRounding;
53 extern TransparencyAntialiasing transparencyAntialiasing;
54 extern bool forceClearRegisters;
56 extern bool precacheVertex;
57 extern bool precacheSetup;
58 extern bool precachePixel;
65 TranscendentalPrecision logPrecision = ACCURATE;
66 TranscendentalPrecision expPrecision = ACCURATE;
67 TranscendentalPrecision rcpPrecision = ACCURATE;
68 TranscendentalPrecision rsqPrecision = ACCURATE;
69 bool perspectiveCorrection = true;
81 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
85 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
91 data = (DrawData*)allocate(sizeof(DrawData));
92 data->constants = &constants;
102 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
104 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
105 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
106 sw::booleanFaceRegister = conventions.booleanFaceRegister;
107 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
108 sw::leadingVertexFirst = conventions.leadingVertexFirst;
109 sw::secondaryColor = conventions.secondaryColor;
110 sw::exactColorRounding = exactColorRounding;
112 setRenderTarget(0, 0);
113 clipper = new Clipper();
115 updateViewMatrix = true;
116 updateBaseMatrix = true;
117 updateProjectionMatrix = true;
118 updateClipPlanes = true;
124 for(int i = 0; i < 16; i++)
134 resumeApp = new Event();
142 for(int i = 0; i < 16; i++)
144 triangleBatch[i] = 0;
145 primitiveBatch[i] = 0;
148 for(int draw = 0; draw < DRAW_COUNT; draw++)
150 drawCall[draw] = new DrawCall();
151 drawList[draw] = drawCall[draw];
154 for(int unit = 0; unit < 16; unit++)
156 primitiveProgress[unit].init();
159 for(int cluster = 0; cluster < 16; cluster++)
161 pixelProgress[cluster].init();
166 swiftConfig = new SwiftConfig(disableServer);
167 updateConfiguration(true);
169 sync = new Resource(0);
172 Renderer::~Renderer()
182 for(int draw = 0; draw < DRAW_COUNT; draw++)
184 delete drawCall[draw];
190 void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
192 blitter.clear(pixel, format, dest, dRect, rgbaMask);
195 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
197 blitter.blit(source, sRect, dest, dRect, filter);
200 void Renderer::blit3D(Surface *source, Surface *dest)
202 blitter.blit3D(source, dest);
205 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
208 if(count < minPrimitives || count > maxPrimitives)
214 context->drawType = drawType;
216 updateConfiguration();
219 int ss = context->getSuperSampleCount();
220 int ms = context->getMultiSampleCount();
222 for(int q = 0; q < ss; q++)
224 unsigned int oldMultiSampleMask = context->multiSampleMask;
225 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
227 if(!context->multiSampleMask)
232 sync->lock(sw::PRIVATE);
234 Routine *vertexRoutine;
235 Routine *setupRoutine;
236 Routine *pixelRoutine;
238 if(update || oldMultiSampleMask != context->multiSampleMask)
240 vertexState = VertexProcessor::update();
241 setupState = SetupProcessor::update();
242 pixelState = PixelProcessor::update();
244 vertexRoutine = VertexProcessor::routine(vertexState);
245 setupRoutine = SetupProcessor::routine(setupState);
246 pixelRoutine = PixelProcessor::routine(pixelState);
249 int batch = batchSize / ms;
251 int (*setupPrimitives)(Renderer *renderer, int batch, int count);
253 if(context->isDrawTriangle())
255 switch(context->fillMode)
258 setupPrimitives = setupSolidTriangles;
261 setupPrimitives = setupWireframeTriangle;
265 setupPrimitives = setupVertexTriangle;
268 default: ASSERT(false);
271 else if(context->isDrawLine())
273 setupPrimitives = setupLines;
277 setupPrimitives = setupPoints;
284 for(int i = 0; i < DRAW_COUNT; i++)
286 if(drawCall[i]->references == -1)
289 drawList[nextDraw % DRAW_COUNT] = draw;
302 DrawData *data = draw->data;
304 if(queries.size() != 0)
306 draw->queries = new std::list<Query*>();
307 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
308 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
311 if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
313 atomicIncrement(&(q->reference));
314 draw->queries->push_back(q);
319 draw->drawType = drawType;
320 draw->batchSize = batch;
322 vertexRoutine->bind();
323 setupRoutine->bind();
324 pixelRoutine->bind();
326 draw->vertexRoutine = vertexRoutine;
327 draw->setupRoutine = setupRoutine;
328 draw->pixelRoutine = pixelRoutine;
329 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
330 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
331 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
332 draw->setupPrimitives = setupPrimitives;
333 draw->setupState = setupState;
335 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
337 draw->vertexStream[i] = context->input[i].resource;
338 data->input[i] = context->input[i].buffer;
339 data->stride[i] = context->input[i].stride;
341 if(draw->vertexStream[i])
343 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
347 if(context->indexBuffer)
349 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
352 draw->indexBuffer = context->indexBuffer;
354 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
356 draw->texture[sampler] = 0;
359 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
361 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
363 draw->texture[sampler] = context->texture[sampler];
364 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
366 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
370 if(context->pixelShader)
372 if(draw->psDirtyConstF)
374 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
375 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
376 draw->psDirtyConstF = 0;
379 if(draw->psDirtyConstI)
381 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
382 draw->psDirtyConstI = 0;
385 if(draw->psDirtyConstB)
387 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
388 draw->psDirtyConstB = 0;
391 PixelProcessor::lockUniformBuffers(data->ps.u);
394 if(context->pixelShaderVersion() <= 0x0104)
396 for(int stage = 0; stage < 8; stage++)
398 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
400 data->textureStage[stage] = context->textureStage[stage].uniforms;
406 if(context->vertexShader)
408 if(context->vertexShader->getVersion() >= 0x0300)
410 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
412 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
414 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
415 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
417 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
422 if(draw->vsDirtyConstF)
424 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
425 draw->vsDirtyConstF = 0;
428 if(draw->vsDirtyConstI)
430 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
431 draw->vsDirtyConstI = 0;
434 if(draw->vsDirtyConstB)
436 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
437 draw->vsDirtyConstB = 0;
440 if(context->vertexShader->instanceIdDeclared)
442 data->instanceID = context->instanceID;
445 VertexProcessor::lockUniformBuffers(data->vs.u);
451 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
452 draw->vsDirtyConstI = 16;
453 draw->vsDirtyConstB = 16;
456 if(pixelState.stencilActive)
458 data->stencil[0] = stencil;
459 data->stencil[1] = stencilCCW;
462 if(pixelState.fogActive)
467 if(setupState.isDrawPoint)
472 data->lineWidth = context->lineWidth;
474 data->factor = factor;
476 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
478 float ref = context->alphaReference * (1.0f / 255.0f);
479 float margin = sw::min(ref, 1.0f - ref);
483 data->a2c0 = replicate(ref - margin * 0.6f);
484 data->a2c1 = replicate(ref - margin * 0.2f);
485 data->a2c2 = replicate(ref + margin * 0.2f);
486 data->a2c3 = replicate(ref + margin * 0.6f);
490 data->a2c0 = replicate(ref - margin * 0.3f);
491 data->a2c1 = replicate(ref + margin * 0.3f);
496 if(pixelState.occlusionEnabled)
498 for(int cluster = 0; cluster < clusterCount; cluster++)
500 data->occlusion[cluster] = 0;
505 for(int cluster = 0; cluster < clusterCount; cluster++)
507 for(int i = 0; i < PERF_TIMERS; i++)
509 data->cycles[i][cluster] = 0;
516 float W = 0.5f * viewport.width;
517 float H = 0.5f * viewport.height;
518 float X0 = viewport.x0 + W;
519 float Y0 = viewport.y0 + H;
520 float N = viewport.minZ;
521 float F = viewport.maxZ;
524 if(context->isDrawTriangle(false))
529 if(complementaryDepthBuffer)
535 static const float X[5][16] = // Fragment offsets
537 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
538 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
539 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
540 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
541 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
544 static const float Y[5][16] = // Fragment offsets
546 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
547 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
548 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
549 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
550 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
553 int s = sw::log2(ss);
555 data->Wx16 = replicate(W * 16);
556 data->Hx16 = replicate(H * 16);
557 data->X0x16 = replicate(X0 * 16 - 8);
558 data->Y0x16 = replicate(Y0 * 16 - 8);
559 data->XXXX = replicate(X[s][q] / W);
560 data->YYYY = replicate(Y[s][q] / H);
561 data->halfPixelX = replicate(0.5f / W);
562 data->halfPixelY = replicate(0.5f / H);
563 data->viewportHeight = abs(viewport.height);
564 data->slopeDepthBias = slopeDepthBias;
565 data->depthRange = Z;
567 draw->clipFlags = clipFlags;
571 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
572 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
573 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
574 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
575 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
576 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
582 for(int index = 0; index < RENDERTARGETS; index++)
584 draw->renderTarget[index] = context->renderTarget[index];
586 if(draw->renderTarget[index])
588 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
589 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
590 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
594 draw->depthStencil = context->depthStencil;
596 if(draw->depthStencil)
598 data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
599 data->depthPitchB = context->depthStencil->getInternalPitchB();
600 data->depthSliceB = context->depthStencil->getInternalSliceB();
602 data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
603 data->stencilPitchB = context->depthStencil->getStencilPitchB();
604 data->stencilSliceB = context->depthStencil->getStencilSliceB();
610 data->scissorX0 = scissor.x0;
611 data->scissorX1 = scissor.x1;
612 data->scissorY0 = scissor.y0;
613 data->scissorY1 = scissor.y1;
619 draw->references = (count + batch - 1) / batch;
621 schedulerMutex.lock();
623 schedulerMutex.unlock();
632 task[0].type = Task::RESUME;
637 else // Use main thread for draw execution
640 task[0].type = Task::RESUME;
647 void Renderer::threadFunction(void *parameters)
649 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
650 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
652 if(logPrecision < IEEE)
654 CPUID::setFlushToZero(true);
655 CPUID::setDenormalsAreZero(true);
658 renderer->threadLoop(threadIndex);
661 void Renderer::threadLoop(int threadIndex)
665 taskLoop(threadIndex);
667 suspend[threadIndex]->signal();
668 resume[threadIndex]->wait();
672 void Renderer::taskLoop(int threadIndex)
674 while(task[threadIndex].type != Task::SUSPEND)
676 scheduleTask(threadIndex);
677 executeTask(threadIndex);
681 void Renderer::findAvailableTasks()
684 for(int cluster = 0; cluster < clusterCount; cluster++)
686 if(!pixelProgress[cluster].executing)
688 for(int unit = 0; unit < unitCount; unit++)
690 if(primitiveProgress[unit].references > 0) // Contains processed primitives
692 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
694 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
696 Task &task = taskQueue[qHead];
697 task.type = Task::PIXELS;
698 task.primitiveUnit = unit;
699 task.pixelCluster = cluster;
701 pixelProgress[cluster].executing = true;
703 // Commit to the task queue
704 qHead = (qHead + 1) % 32;
715 // Find primitive tasks
716 if(currentDraw == nextDraw)
718 return; // No more primitives to process
721 for(int unit = 0; unit < unitCount; unit++)
723 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
725 if(draw->primitive >= draw->count)
729 if(currentDraw == nextDraw)
731 return; // No more primitives to process
734 draw = drawList[currentDraw % DRAW_COUNT];
737 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
739 int primitive = draw->primitive;
740 int count = draw->count;
741 int batch = draw->batchSize;
743 primitiveProgress[unit].drawCall = currentDraw;
744 primitiveProgress[unit].firstPrimitive = primitive;
745 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
747 draw->primitive += batch;
749 Task &task = taskQueue[qHead];
750 task.type = Task::PRIMITIVES;
751 task.primitiveUnit = unit;
753 primitiveProgress[unit].references = -1;
755 // Commit to the task queue
756 qHead = (qHead + 1) % 32;
762 void Renderer::scheduleTask(int threadIndex)
764 schedulerMutex.lock();
766 if((int)qSize < threadCount - threadsAwake + 1)
768 findAvailableTasks();
773 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
776 if(threadsAwake != threadCount)
778 int wakeup = qSize - threadsAwake + 1;
780 for(int i = 0; i < threadCount && wakeup > 0; i++)
782 if(task[i].type == Task::SUSPEND)
785 task[i].type = Task::RESUME;
796 task[threadIndex].type = Task::SUSPEND;
801 schedulerMutex.unlock();
804 void Renderer::executeTask(int threadIndex)
807 int64_t startTick = Timer::ticks();
810 switch(task[threadIndex].type)
812 case Task::PRIMITIVES:
814 int unit = task[threadIndex].primitiveUnit;
816 int input = primitiveProgress[unit].firstPrimitive;
817 int count = primitiveProgress[unit].primitiveCount;
818 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
819 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
821 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
824 int64_t time = Timer::ticks();
825 vertexTime[threadIndex] += time - startTick;
829 int visible = draw->setupState.rasterizerDiscard ? 0 : setupPrimitives(this, unit, count);
831 primitiveProgress[unit].visible = visible;
832 primitiveProgress[unit].references = clusterCount;
835 setupTime[threadIndex] += Timer::ticks() - startTick;
841 int unit = task[threadIndex].primitiveUnit;
842 int visible = primitiveProgress[unit].visible;
846 int cluster = task[threadIndex].pixelCluster;
847 Primitive *primitive = primitiveBatch[unit];
848 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
849 DrawData *data = draw->data;
850 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
852 pixelRoutine(primitive, visible, cluster, data);
855 finishRendering(task[threadIndex]);
858 pixelTime[threadIndex] += Timer::ticks() - startTick;
871 void Renderer::synchronize()
873 sync->lock(sw::PUBLIC);
877 void Renderer::finishRendering(Task &pixelTask)
879 int unit = pixelTask.primitiveUnit;
880 int cluster = pixelTask.pixelCluster;
882 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
883 DrawData &data = *draw.data;
884 int primitive = primitiveProgress[unit].firstPrimitive;
885 int count = primitiveProgress[unit].primitiveCount;
887 pixelProgress[cluster].processedPrimitives = primitive + count;
889 int ref = atomicDecrement(&primitiveProgress[unit].references);
893 ref = atomicDecrement(&draw.references);
898 for(int cluster = 0; cluster < clusterCount; cluster++)
900 for(int i = 0; i < PERF_TIMERS; i++)
902 profiler.cycles[i] += data.cycles[i][cluster];
909 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
915 case Query::FRAGMENTS_PASSED:
916 for(int cluster = 0; cluster < clusterCount; cluster++)
918 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
921 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
922 atomicAdd((volatile int*)&query->data, pixelProgress[cluster].processedPrimitives);
928 atomicDecrement(&query->reference);
935 for(int i = 0; i < RENDERTARGETS; i++)
937 if(draw.renderTarget[i])
939 draw.renderTarget[i]->unlockInternal();
943 if(draw.depthStencil)
945 draw.depthStencil->unlockInternal();
946 draw.depthStencil->unlockStencil();
949 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
953 draw.texture[i]->unlock();
957 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
959 if(draw.vertexStream[i])
961 draw.vertexStream[i]->unlock();
967 draw.indexBuffer->unlock();
970 PixelProcessor::unlockUniformBuffers();
971 VertexProcessor::unlockUniformBuffers();
973 draw.vertexRoutine->unbind();
974 draw.setupRoutine->unbind();
975 draw.pixelRoutine->unbind();
979 draw.references = -1;
984 if(pixelProgress[cluster].processedPrimitives >= draw.count)
986 pixelProgress[cluster].drawCall++;
987 pixelProgress[cluster].processedPrimitives = 0;
990 pixelProgress[cluster].executing = false;
993 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
995 Triangle *triangle = triangleBatch[unit];
996 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
997 DrawData *data = draw->data;
998 VertexTask *task = vertexTask[thread];
1000 const void *indices = data->indices;
1001 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1003 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1005 task->vertexCache.clear();
1006 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1009 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
1011 switch(draw->drawType)
1013 case DRAW_POINTLIST:
1015 unsigned int index = start;
1017 for(unsigned int i = 0; i < triangleCount; i++)
1019 batch[i][0] = index;
1020 batch[i][1] = index;
1021 batch[i][2] = index;
1029 unsigned int index = 2 * start;
1031 for(unsigned int i = 0; i < triangleCount; i++)
1033 batch[i][0] = index + 0;
1034 batch[i][1] = index + 1;
1035 batch[i][2] = index + 1;
1041 case DRAW_LINESTRIP:
1043 unsigned int index = start;
1045 for(unsigned int i = 0; i < triangleCount; i++)
1047 batch[i][0] = index + 0;
1048 batch[i][1] = index + 1;
1049 batch[i][2] = index + 1;
1057 unsigned int index = start;
1059 for(unsigned int i = 0; i < triangleCount; i++)
1061 batch[i][0] = (index + 0) % loop;
1062 batch[i][1] = (index + 1) % loop;
1063 batch[i][2] = (index + 1) % loop;
1069 case DRAW_TRIANGLELIST:
1071 unsigned int index = 3 * start;
1073 for(unsigned int i = 0; i < triangleCount; i++)
1075 batch[i][0] = index + 0;
1076 batch[i][1] = index + 1;
1077 batch[i][2] = index + 2;
1083 case DRAW_TRIANGLESTRIP:
1085 unsigned int index = start;
1087 for(unsigned int i = 0; i < triangleCount; i++)
1089 batch[i][0] = index + 0;
1090 batch[i][1] = index + (index & 1) + 1;
1091 batch[i][2] = index + (~index & 1) + 1;
1097 case DRAW_TRIANGLEFAN:
1099 unsigned int index = start;
1101 for(unsigned int i = 0; i < triangleCount; i++)
1103 batch[i][0] = index + 1;
1104 batch[i][1] = index + 2;
1111 case DRAW_INDEXEDPOINTLIST8:
1113 const unsigned char *index = (const unsigned char*)indices + start;
1115 for(unsigned int i = 0; i < triangleCount; i++)
1117 batch[i][0] = *index;
1118 batch[i][1] = *index;
1119 batch[i][2] = *index;
1125 case DRAW_INDEXEDPOINTLIST16:
1127 const unsigned short *index = (const unsigned short*)indices + start;
1129 for(unsigned int i = 0; i < triangleCount; i++)
1131 batch[i][0] = *index;
1132 batch[i][1] = *index;
1133 batch[i][2] = *index;
1139 case DRAW_INDEXEDPOINTLIST32:
1141 const unsigned int *index = (const unsigned int*)indices + start;
1143 for(unsigned int i = 0; i < triangleCount; i++)
1145 batch[i][0] = *index;
1146 batch[i][1] = *index;
1147 batch[i][2] = *index;
1153 case DRAW_INDEXEDLINELIST8:
1155 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1157 for(unsigned int i = 0; i < triangleCount; i++)
1159 batch[i][0] = index[0];
1160 batch[i][1] = index[1];
1161 batch[i][2] = index[1];
1167 case DRAW_INDEXEDLINELIST16:
1169 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1171 for(unsigned int i = 0; i < triangleCount; i++)
1173 batch[i][0] = index[0];
1174 batch[i][1] = index[1];
1175 batch[i][2] = index[1];
1181 case DRAW_INDEXEDLINELIST32:
1183 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1185 for(unsigned int i = 0; i < triangleCount; i++)
1187 batch[i][0] = index[0];
1188 batch[i][1] = index[1];
1189 batch[i][2] = index[1];
1195 case DRAW_INDEXEDLINESTRIP8:
1197 const unsigned char *index = (const unsigned char*)indices + start;
1199 for(unsigned int i = 0; i < triangleCount; i++)
1201 batch[i][0] = index[0];
1202 batch[i][1] = index[1];
1203 batch[i][2] = index[1];
1209 case DRAW_INDEXEDLINESTRIP16:
1211 const unsigned short *index = (const unsigned short*)indices + start;
1213 for(unsigned int i = 0; i < triangleCount; i++)
1215 batch[i][0] = index[0];
1216 batch[i][1] = index[1];
1217 batch[i][2] = index[1];
1223 case DRAW_INDEXEDLINESTRIP32:
1225 const unsigned int *index = (const unsigned int*)indices + start;
1227 for(unsigned int i = 0; i < triangleCount; i++)
1229 batch[i][0] = index[0];
1230 batch[i][1] = index[1];
1231 batch[i][2] = index[1];
1237 case DRAW_INDEXEDLINELOOP8:
1239 const unsigned char *index = (const unsigned char*)indices;
1241 for(unsigned int i = 0; i < triangleCount; i++)
1243 batch[i][0] = index[(start + i + 0) % loop];
1244 batch[i][1] = index[(start + i + 1) % loop];
1245 batch[i][2] = index[(start + i + 1) % loop];
1249 case DRAW_INDEXEDLINELOOP16:
1251 const unsigned short *index = (const unsigned short*)indices;
1253 for(unsigned int i = 0; i < triangleCount; i++)
1255 batch[i][0] = index[(start + i + 0) % loop];
1256 batch[i][1] = index[(start + i + 1) % loop];
1257 batch[i][2] = index[(start + i + 1) % loop];
1261 case DRAW_INDEXEDLINELOOP32:
1263 const unsigned int *index = (const unsigned int*)indices;
1265 for(unsigned int i = 0; i < triangleCount; i++)
1267 batch[i][0] = index[(start + i + 0) % loop];
1268 batch[i][1] = index[(start + i + 1) % loop];
1269 batch[i][2] = index[(start + i + 1) % loop];
1273 case DRAW_INDEXEDTRIANGLELIST8:
1275 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1277 for(unsigned int i = 0; i < triangleCount; i++)
1279 batch[i][0] = index[0];
1280 batch[i][1] = index[1];
1281 batch[i][2] = index[2];
1287 case DRAW_INDEXEDTRIANGLELIST16:
1289 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1291 for(unsigned int i = 0; i < triangleCount; i++)
1293 batch[i][0] = index[0];
1294 batch[i][1] = index[1];
1295 batch[i][2] = index[2];
1301 case DRAW_INDEXEDTRIANGLELIST32:
1303 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1305 for(unsigned int i = 0; i < triangleCount; i++)
1307 batch[i][0] = index[0];
1308 batch[i][1] = index[1];
1309 batch[i][2] = index[2];
1315 case DRAW_INDEXEDTRIANGLESTRIP8:
1317 const unsigned char *index = (const unsigned char*)indices + start;
1319 for(unsigned int i = 0; i < triangleCount; i++)
1321 batch[i][0] = index[0];
1322 batch[i][1] = index[((start + i) & 1) + 1];
1323 batch[i][2] = index[(~(start + i) & 1) + 1];
1329 case DRAW_INDEXEDTRIANGLESTRIP16:
1331 const unsigned short *index = (const unsigned short*)indices + start;
1333 for(unsigned int i = 0; i < triangleCount; i++)
1335 batch[i][0] = index[0];
1336 batch[i][1] = index[((start + i) & 1) + 1];
1337 batch[i][2] = index[(~(start + i) & 1) + 1];
1343 case DRAW_INDEXEDTRIANGLESTRIP32:
1345 const unsigned int *index = (const unsigned int*)indices + start;
1347 for(unsigned int i = 0; i < triangleCount; i++)
1349 batch[i][0] = index[0];
1350 batch[i][1] = index[((start + i) & 1) + 1];
1351 batch[i][2] = index[(~(start + i) & 1) + 1];
1357 case DRAW_INDEXEDTRIANGLEFAN8:
1359 const unsigned char *index = (const unsigned char*)indices;
1361 for(unsigned int i = 0; i < triangleCount; i++)
1363 batch[i][0] = index[start + i + 1];
1364 batch[i][1] = index[start + i + 2];
1365 batch[i][2] = index[0];
1369 case DRAW_INDEXEDTRIANGLEFAN16:
1371 const unsigned short *index = (const unsigned short*)indices;
1373 for(unsigned int i = 0; i < triangleCount; i++)
1375 batch[i][0] = index[start + i + 1];
1376 batch[i][1] = index[start + i + 2];
1377 batch[i][2] = index[0];
1381 case DRAW_INDEXEDTRIANGLEFAN32:
1383 const unsigned int *index = (const unsigned int*)indices;
1385 for(unsigned int i = 0; i < triangleCount; i++)
1387 batch[i][0] = index[start + i + 1];
1388 batch[i][1] = index[start + i + 2];
1389 batch[i][2] = index[0];
1395 unsigned int index = 4 * start / 2;
1397 for(unsigned int i = 0; i < triangleCount; i += 2)
1399 batch[i+0][0] = index + 0;
1400 batch[i+0][1] = index + 1;
1401 batch[i+0][2] = index + 2;
1403 batch[i+1][0] = index + 0;
1404 batch[i+1][1] = index + 2;
1405 batch[i+1][2] = index + 3;
1416 task->vertexCount = triangleCount * 3;
1417 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1420 int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1422 Triangle *triangle = renderer->triangleBatch[unit];
1423 Primitive *primitive = renderer->primitiveBatch[unit];
1425 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1426 SetupProcessor::State &state = draw.setupState;
1427 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1429 int ms = state.multiSample;
1430 int pos = state.positionRegister;
1431 const DrawData *data = draw.data;
1434 for(int i = 0; i < count; i++, triangle++)
1436 Vertex &v0 = triangle->v0;
1437 Vertex &v1 = triangle->v1;
1438 Vertex &v2 = triangle->v2;
1440 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1442 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1444 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1446 if(clipFlagsOr != Clipper::CLIP_FINITE)
1448 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1454 if(setupRoutine(primitive, triangle, &polygon, data))
1465 int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1467 Triangle *triangle = renderer->triangleBatch[unit];
1468 Primitive *primitive = renderer->primitiveBatch[unit];
1471 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1472 SetupProcessor::State &state = draw.setupState;
1473 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1475 const Vertex &v0 = triangle[0].v0;
1476 const Vertex &v1 = triangle[0].v1;
1477 const Vertex &v2 = triangle[0].v2;
1479 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1481 if(state.cullMode == CULL_CLOCKWISE)
1483 if(d >= 0) return 0;
1485 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1487 if(d <= 0) return 0;
1491 triangle[1].v0 = v1;
1492 triangle[1].v1 = v2;
1493 triangle[2].v0 = v2;
1494 triangle[2].v1 = v0;
1496 if(state.color[0][0].flat) // FIXME
1498 for(int i = 0; i < 2; i++)
1500 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1501 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1502 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1503 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1507 for(int i = 0; i < 3; i++)
1509 if(setupLine(renderer, *primitive, *triangle, draw))
1511 primitive->area = 0.5f * d;
1523 int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1525 Triangle *triangle = renderer->triangleBatch[unit];
1526 Primitive *primitive = renderer->primitiveBatch[unit];
1529 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1530 SetupProcessor::State &state = draw.setupState;
1532 const Vertex &v0 = triangle[0].v0;
1533 const Vertex &v1 = triangle[0].v1;
1534 const Vertex &v2 = triangle[0].v2;
1536 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1538 if(state.cullMode == CULL_CLOCKWISE)
1540 if(d >= 0) return 0;
1542 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1544 if(d <= 0) return 0;
1548 triangle[1].v0 = v1;
1549 triangle[2].v0 = v2;
1551 for(int i = 0; i < 3; i++)
1553 if(setupPoint(renderer, *primitive, *triangle, draw))
1555 primitive->area = 0.5f * d;
1567 int Renderer::setupLines(Renderer *renderer, int unit, int count)
1569 Triangle *triangle = renderer->triangleBatch[unit];
1570 Primitive *primitive = renderer->primitiveBatch[unit];
1573 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1574 SetupProcessor::State &state = draw.setupState;
1576 int ms = state.multiSample;
1578 for(int i = 0; i < count; i++)
1580 if(setupLine(renderer, *primitive, *triangle, draw))
1592 int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1594 Triangle *triangle = renderer->triangleBatch[unit];
1595 Primitive *primitive = renderer->primitiveBatch[unit];
1598 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1599 SetupProcessor::State &state = draw.setupState;
1601 int ms = state.multiSample;
1603 for(int i = 0; i < count; i++)
1605 if(setupPoint(renderer, *primitive, *triangle, draw))
1617 bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1619 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1620 const SetupProcessor::State &state = draw.setupState;
1621 const DrawData &data = *draw.data;
1623 float lineWidth = data.lineWidth;
1625 Vertex &v0 = triangle.v0;
1626 Vertex &v1 = triangle.v1;
1628 int pos = state.positionRegister;
1630 const float4 &P0 = v0.v[pos];
1631 const float4 &P1 = v1.v[pos];
1633 if(P0.w <= 0 && P1.w <= 0)
1638 const float W = data.Wx16[0] * (1.0f / 16.0f);
1639 const float H = data.Hx16[0] * (1.0f / 16.0f);
1641 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1642 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1644 if(dx == 0 && dy == 0)
1649 if(false) // Rectangle
1659 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1664 float dx0w = dx * P0.w / W;
1665 float dy0h = dy * P0.w / H;
1666 float dx0h = dx * P0.w / H;
1667 float dy0w = dy * P0.w / W;
1669 float dx1w = dx * P1.w / W;
1670 float dy1h = dy * P1.w / H;
1671 float dx1h = dx * P1.w / H;
1672 float dy1w = dy * P1.w / W;
1674 P[0].x += -dy0w + -dx0w;
1675 P[0].y += -dx0h + +dy0h;
1676 C[0] = computeClipFlags(P[0], data);
1678 P[1].x += -dy1w + +dx1w;
1679 P[1].y += -dx1h + +dy1h;
1680 C[1] = computeClipFlags(P[1], data);
1682 P[2].x += +dy1w + +dx1w;
1683 P[2].y += +dx1h + -dy1h;
1684 C[2] = computeClipFlags(P[2], data);
1686 P[3].x += +dy0w + -dx0w;
1687 P[3].y += +dx0h + +dy0h;
1688 C[3] = computeClipFlags(P[3], data);
1690 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1692 Polygon polygon(P, 4);
1694 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1696 if(clipFlagsOr != Clipper::CLIP_FINITE)
1698 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1704 return setupRoutine(&primitive, &triangle, &polygon, &data);
1707 else // Diamond test convention
1721 float dx0 = lineWidth * 0.5f * P0.w / W;
1722 float dy0 = lineWidth * 0.5f * P0.w / H;
1724 float dx1 = lineWidth * 0.5f * P1.w / W;
1725 float dy1 = lineWidth * 0.5f * P1.w / H;
1728 C[0] = computeClipFlags(P[0], data);
1731 C[1] = computeClipFlags(P[1], data);
1734 C[2] = computeClipFlags(P[2], data);
1737 C[3] = computeClipFlags(P[3], data);
1740 C[4] = computeClipFlags(P[4], data);
1743 C[5] = computeClipFlags(P[5], data);
1746 C[6] = computeClipFlags(P[6], data);
1749 C[7] = computeClipFlags(P[7], data);
1751 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1757 if(dx > dy) // Right
1798 Polygon polygon(L, 6);
1800 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1802 if(clipFlagsOr != Clipper::CLIP_FINITE)
1804 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1810 return setupRoutine(&primitive, &triangle, &polygon, &data);
1817 bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1819 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1820 const SetupProcessor::State &state = draw.setupState;
1821 const DrawData &data = *draw.data;
1823 Vertex &v = triangle.v0;
1827 int pts = state.pointSizeRegister;
1829 if(state.pointSizeRegister != 0xF)
1835 pSize = data.point.pointSize[0];
1838 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1843 int pos = state.positionRegister;
1850 const float X = pSize * P[0].w * data.halfPixelX[0];
1851 const float Y = pSize * P[0].w * data.halfPixelY[0];
1855 C[0] = computeClipFlags(P[0], data);
1859 C[1] = computeClipFlags(P[1], data);
1863 C[2] = computeClipFlags(P[2], data);
1867 C[3] = computeClipFlags(P[3], data);
1869 triangle.v1 = triangle.v0;
1870 triangle.v2 = triangle.v0;
1872 triangle.v1.X += iround(16 * 0.5f * pSize);
1873 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1875 Polygon polygon(P, 4);
1877 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1879 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1881 if(clipFlagsOr != Clipper::CLIP_FINITE)
1883 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1889 return setupRoutine(&primitive, &triangle, &polygon, &data);
1895 unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1897 return ((v.x > v.w) << 0) |
1898 ((v.y > v.w) << 1) |
1899 ((v.z > v.w) << 2) |
1900 ((v.x < -v.w) << 3) |
1901 ((v.y < -v.w) << 4) |
1903 Clipper::CLIP_FINITE; // FIXME: xyz finite
1906 void Renderer::initializeThreads()
1908 unitCount = ceilPow2(threadCount);
1909 clusterCount = ceilPow2(threadCount);
1911 for(int i = 0; i < unitCount; i++)
1913 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1914 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1917 for(int i = 0; i < threadCount; i++)
1919 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1920 vertexTask[i]->vertexCache.drawCall = -1;
1922 task[i].type = Task::SUSPEND;
1924 resume[i] = new Event();
1925 suspend[i] = new Event();
1927 Parameters parameters;
1928 parameters.threadIndex = i;
1929 parameters.renderer = this;
1931 exitThreads = false;
1932 worker[i] = new Thread(threadFunction, ¶meters);
1935 suspend[i]->signal();
1939 void Renderer::terminateThreads()
1941 while(threadsAwake != 0)
1946 for(int thread = 0; thread < threadCount; thread++)
1951 resume[thread]->signal();
1952 worker[thread]->join();
1954 delete worker[thread];
1956 delete resume[thread];
1958 delete suspend[thread];
1959 suspend[thread] = 0;
1962 deallocate(vertexTask[thread]);
1963 vertexTask[thread] = 0;
1966 for(int i = 0; i < 16; i++)
1968 deallocate(triangleBatch[i]);
1969 triangleBatch[i] = 0;
1971 deallocate(primitiveBatch[i]);
1972 primitiveBatch[i] = 0;
1976 void Renderer::loadConstants(const VertexShader *vertexShader)
1978 if(!vertexShader) return;
1980 size_t count = vertexShader->getLength();
1982 for(size_t i = 0; i < count; i++)
1984 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1986 if(instruction->opcode == Shader::OPCODE_DEF)
1988 int index = instruction->dst.index;
1991 value[0] = instruction->src[0].value[0];
1992 value[1] = instruction->src[0].value[1];
1993 value[2] = instruction->src[0].value[2];
1994 value[3] = instruction->src[0].value[3];
1996 setVertexShaderConstantF(index, value);
1998 else if(instruction->opcode == Shader::OPCODE_DEFI)
2000 int index = instruction->dst.index;
2003 integer[0] = instruction->src[0].integer[0];
2004 integer[1] = instruction->src[0].integer[1];
2005 integer[2] = instruction->src[0].integer[2];
2006 integer[3] = instruction->src[0].integer[3];
2008 setVertexShaderConstantI(index, integer);
2010 else if(instruction->opcode == Shader::OPCODE_DEFB)
2012 int index = instruction->dst.index;
2013 int boolean = instruction->src[0].boolean[0];
2015 setVertexShaderConstantB(index, &boolean);
2020 void Renderer::loadConstants(const PixelShader *pixelShader)
2022 if(!pixelShader) return;
2024 size_t count = pixelShader->getLength();
2026 for(size_t i = 0; i < count; i++)
2028 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2030 if(instruction->opcode == Shader::OPCODE_DEF)
2032 int index = instruction->dst.index;
2035 value[0] = instruction->src[0].value[0];
2036 value[1] = instruction->src[0].value[1];
2037 value[2] = instruction->src[0].value[2];
2038 value[3] = instruction->src[0].value[3];
2040 setPixelShaderConstantF(index, value);
2042 else if(instruction->opcode == Shader::OPCODE_DEFI)
2044 int index = instruction->dst.index;
2047 integer[0] = instruction->src[0].integer[0];
2048 integer[1] = instruction->src[0].integer[1];
2049 integer[2] = instruction->src[0].integer[2];
2050 integer[3] = instruction->src[0].integer[3];
2052 setPixelShaderConstantI(index, integer);
2054 else if(instruction->opcode == Shader::OPCODE_DEFB)
2056 int index = instruction->dst.index;
2057 int boolean = instruction->src[0].boolean[0];
2059 setPixelShaderConstantB(index, &boolean);
2064 void Renderer::setIndexBuffer(Resource *indexBuffer)
2066 context->indexBuffer = indexBuffer;
2069 void Renderer::setMultiSampleMask(unsigned int mask)
2071 context->sampleMask = mask;
2074 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2076 sw::transparencyAntialiasing = transparencyAntialiasing;
2079 bool Renderer::isReadWriteTexture(int sampler)
2081 for(int index = 0; index < RENDERTARGETS; index++)
2083 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2089 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2097 void Renderer::updateClipper()
2099 if(updateClipPlanes)
2101 if(VertexProcessor::isFixedFunction()) // User plane in world space
2103 const Matrix &scissorWorld = getViewTransform();
2105 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2106 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2107 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2108 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2109 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2110 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2112 else // User plane in clip space
2114 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2115 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2116 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2117 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2118 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2119 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2122 updateClipPlanes = false;
2126 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2128 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2130 context->texture[sampler] = resource;
2133 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2135 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2137 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2140 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2142 if(type == SAMPLER_PIXEL)
2144 PixelProcessor::setTextureFilter(sampler, textureFilter);
2148 VertexProcessor::setTextureFilter(sampler, textureFilter);
2152 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2154 if(type == SAMPLER_PIXEL)
2156 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2160 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2164 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2166 if(type == SAMPLER_PIXEL)
2168 PixelProcessor::setGatherEnable(sampler, enable);
2172 VertexProcessor::setGatherEnable(sampler, enable);
2176 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2178 if(type == SAMPLER_PIXEL)
2180 PixelProcessor::setAddressingModeU(sampler, addressMode);
2184 VertexProcessor::setAddressingModeU(sampler, addressMode);
2188 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2190 if(type == SAMPLER_PIXEL)
2192 PixelProcessor::setAddressingModeV(sampler, addressMode);
2196 VertexProcessor::setAddressingModeV(sampler, addressMode);
2200 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2202 if(type == SAMPLER_PIXEL)
2204 PixelProcessor::setAddressingModeW(sampler, addressMode);
2208 VertexProcessor::setAddressingModeW(sampler, addressMode);
2212 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2214 if(type == SAMPLER_PIXEL)
2216 PixelProcessor::setReadSRGB(sampler, sRGB);
2220 VertexProcessor::setReadSRGB(sampler, sRGB);
2224 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2226 if(type == SAMPLER_PIXEL)
2228 PixelProcessor::setMipmapLOD(sampler, bias);
2232 VertexProcessor::setMipmapLOD(sampler, bias);
2236 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2238 if(type == SAMPLER_PIXEL)
2240 PixelProcessor::setBorderColor(sampler, borderColor);
2244 VertexProcessor::setBorderColor(sampler, borderColor);
2248 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2250 if(type == SAMPLER_PIXEL)
2252 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2256 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2260 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2262 if(type == SAMPLER_PIXEL)
2264 PixelProcessor::setSwizzleR(sampler, swizzleR);
2268 VertexProcessor::setSwizzleR(sampler, swizzleR);
2272 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2274 if(type == SAMPLER_PIXEL)
2276 PixelProcessor::setSwizzleG(sampler, swizzleG);
2280 VertexProcessor::setSwizzleG(sampler, swizzleG);
2284 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2286 if(type == SAMPLER_PIXEL)
2288 PixelProcessor::setSwizzleB(sampler, swizzleB);
2292 VertexProcessor::setSwizzleB(sampler, swizzleB);
2296 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2298 if(type == SAMPLER_PIXEL)
2300 PixelProcessor::setSwizzleA(sampler, swizzleA);
2304 VertexProcessor::setSwizzleA(sampler, swizzleA);
2308 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2310 context->setPointSpriteEnable(pointSpriteEnable);
2313 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2315 context->setPointScaleEnable(pointScaleEnable);
2318 void Renderer::setLineWidth(float width)
2320 context->lineWidth = width;
2323 void Renderer::setDepthBias(float bias)
2328 void Renderer::setSlopeDepthBias(float slopeBias)
2330 slopeDepthBias = slopeBias;
2333 void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2335 context->rasterizerDiscard = rasterizerDiscard;
2338 void Renderer::setPixelShader(const PixelShader *shader)
2340 context->pixelShader = shader;
2342 loadConstants(shader);
2345 void Renderer::setVertexShader(const VertexShader *shader)
2347 context->vertexShader = shader;
2349 loadConstants(shader);
2352 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2354 for(int i = 0; i < DRAW_COUNT; i++)
2356 if(drawCall[i]->psDirtyConstF < index + count)
2358 drawCall[i]->psDirtyConstF = index + count;
2362 for(int i = 0; i < count; i++)
2364 PixelProcessor::setFloatConstant(index + i, value);
2369 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2371 for(int i = 0; i < DRAW_COUNT; i++)
2373 if(drawCall[i]->psDirtyConstI < index + count)
2375 drawCall[i]->psDirtyConstI = index + count;
2379 for(int i = 0; i < count; i++)
2381 PixelProcessor::setIntegerConstant(index + i, value);
2386 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2388 for(int i = 0; i < DRAW_COUNT; i++)
2390 if(drawCall[i]->psDirtyConstB < index + count)
2392 drawCall[i]->psDirtyConstB = index + count;
2396 for(int i = 0; i < count; i++)
2398 PixelProcessor::setBooleanConstant(index + i, *boolean);
2403 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2405 for(int i = 0; i < DRAW_COUNT; i++)
2407 if(drawCall[i]->vsDirtyConstF < index + count)
2409 drawCall[i]->vsDirtyConstF = index + count;
2413 for(int i = 0; i < count; i++)
2415 VertexProcessor::setFloatConstant(index + i, value);
2420 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2422 for(int i = 0; i < DRAW_COUNT; i++)
2424 if(drawCall[i]->vsDirtyConstI < index + count)
2426 drawCall[i]->vsDirtyConstI = index + count;
2430 for(int i = 0; i < count; i++)
2432 VertexProcessor::setIntegerConstant(index + i, value);
2437 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2439 for(int i = 0; i < DRAW_COUNT; i++)
2441 if(drawCall[i]->vsDirtyConstB < index + count)
2443 drawCall[i]->vsDirtyConstB = index + count;
2447 for(int i = 0; i < count; i++)
2449 VertexProcessor::setBooleanConstant(index + i, *boolean);
2454 void Renderer::setModelMatrix(const Matrix &M, int i)
2456 VertexProcessor::setModelMatrix(M, i);
2459 void Renderer::setViewMatrix(const Matrix &V)
2461 VertexProcessor::setViewMatrix(V);
2462 updateClipPlanes = true;
2465 void Renderer::setBaseMatrix(const Matrix &B)
2467 VertexProcessor::setBaseMatrix(B);
2468 updateClipPlanes = true;
2471 void Renderer::setProjectionMatrix(const Matrix &P)
2473 VertexProcessor::setProjectionMatrix(P);
2474 updateClipPlanes = true;
2477 void Renderer::addQuery(Query *query)
2479 queries.push_back(query);
2482 void Renderer::removeQuery(Query *query)
2484 queries.remove(query);
2488 int Renderer::getThreadCount()
2493 int64_t Renderer::getVertexTime(int thread)
2495 return vertexTime[thread];
2498 int64_t Renderer::getSetupTime(int thread)
2500 return setupTime[thread];
2503 int64_t Renderer::getPixelTime(int thread)
2505 return pixelTime[thread];
2508 void Renderer::resetTimers()
2510 for(int thread = 0; thread < threadCount; thread++)
2512 vertexTime[thread] = 0;
2513 setupTime[thread] = 0;
2514 pixelTime[thread] = 0;
2519 void Renderer::setViewport(const Viewport &viewport)
2521 this->viewport = viewport;
2524 void Renderer::setScissor(const Rect &scissor)
2526 this->scissor = scissor;
2529 void Renderer::setClipFlags(int flags)
2531 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2534 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2536 if(index < MAX_CLIP_PLANES)
2538 userPlane[index] = plane;
2542 updateClipPlanes = true;
2545 void Renderer::updateConfiguration(bool initialUpdate)
2547 bool newConfiguration = swiftConfig->hasNewConfiguration();
2549 if(newConfiguration || initialUpdate)
2553 SwiftConfig::Configuration configuration = {};
2554 swiftConfig->getConfiguration(configuration);
2556 precacheVertex = !newConfiguration && configuration.precache;
2557 precacheSetup = !newConfiguration && configuration.precache;
2558 precachePixel = !newConfiguration && configuration.precache;
2560 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2561 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2562 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2564 switch(configuration.textureSampleQuality)
2566 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2567 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2568 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2569 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2572 switch(configuration.mipmapQuality)
2574 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2575 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2576 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2579 setPerspectiveCorrection(configuration.perspectiveCorrection);
2581 switch(configuration.transcendentalPrecision)
2584 logPrecision = APPROXIMATE;
2585 expPrecision = APPROXIMATE;
2586 rcpPrecision = APPROXIMATE;
2587 rsqPrecision = APPROXIMATE;
2590 logPrecision = PARTIAL;
2591 expPrecision = PARTIAL;
2592 rcpPrecision = PARTIAL;
2593 rsqPrecision = PARTIAL;
2596 logPrecision = ACCURATE;
2597 expPrecision = ACCURATE;
2598 rcpPrecision = ACCURATE;
2599 rsqPrecision = ACCURATE;
2602 logPrecision = WHQL;
2603 expPrecision = WHQL;
2604 rcpPrecision = WHQL;
2605 rsqPrecision = WHQL;
2608 logPrecision = IEEE;
2609 expPrecision = IEEE;
2610 rcpPrecision = IEEE;
2611 rsqPrecision = IEEE;
2614 logPrecision = ACCURATE;
2615 expPrecision = ACCURATE;
2616 rcpPrecision = ACCURATE;
2617 rsqPrecision = ACCURATE;
2621 switch(configuration.transparencyAntialiasing)
2623 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2624 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2625 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2628 switch(configuration.threadCount)
2630 case -1: threadCount = CPUID::coreCount(); break;
2631 case 0: threadCount = CPUID::processAffinity(); break;
2632 default: threadCount = configuration.threadCount; break;
2635 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2636 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2637 CPUID::setEnableSSE3(configuration.enableSSE3);
2638 CPUID::setEnableSSE2(configuration.enableSSE2);
2639 CPUID::setEnableSSE(configuration.enableSSE);
2641 for(int pass = 0; pass < 10; pass++)
2643 optimization[pass] = configuration.optimization[pass];
2646 forceWindowed = configuration.forceWindowed;
2647 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2648 postBlendSRGB = configuration.postBlendSRGB;
2649 exactColorRounding = configuration.exactColorRounding;
2650 forceClearRegisters = configuration.forceClearRegisters;
2653 minPrimitives = configuration.minPrimitives;
2654 maxPrimitives = configuration.maxPrimitives;
2658 if(!initialUpdate && !worker[0])
2660 initializeThreads();