1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Renderer.hpp"
17 #include "Clipper.hpp"
19 #include "FrameBuffer.hpp"
21 #include "Surface.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
32 #include "Reactor/Reactor.hpp"
36 bool disableServer = true;
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
47 extern bool booleanFaceRegister;
48 extern bool fullPixelPositionRegister;
49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
50 extern bool secondaryColor; // Specular lighting is applied after texturing
51 extern bool colorsDefaultToZero;
53 extern bool forceWindowed;
54 extern bool complementaryDepthBuffer;
55 extern bool postBlendSRGB;
56 extern bool exactColorRounding;
57 extern TransparencyAntialiasing transparencyAntialiasing;
58 extern bool forceClearRegisters;
60 extern bool precacheVertex;
61 extern bool precacheSetup;
62 extern bool precachePixel;
69 TranscendentalPrecision logPrecision = ACCURATE;
70 TranscendentalPrecision expPrecision = ACCURATE;
71 TranscendentalPrecision rcpPrecision = ACCURATE;
72 TranscendentalPrecision rsqPrecision = ACCURATE;
73 bool perspectiveCorrection = true;
85 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
89 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
95 data = (DrawData*)allocate(sizeof(DrawData));
96 data->constants = &constants;
106 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
108 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
109 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
110 sw::booleanFaceRegister = conventions.booleanFaceRegister;
111 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
112 sw::leadingVertexFirst = conventions.leadingVertexFirst;
113 sw::secondaryColor = conventions.secondaryColor;
114 sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
115 sw::exactColorRounding = exactColorRounding;
117 setRenderTarget(0, 0);
118 clipper = new Clipper(symmetricNormalizedDepth);
119 blitter = new Blitter;
121 updateViewMatrix = true;
122 updateBaseMatrix = true;
123 updateProjectionMatrix = true;
124 updateClipPlanes = true;
130 for(int i = 0; i < 16; i++)
140 resumeApp = new Event();
148 for(int i = 0; i < 16; i++)
150 triangleBatch[i] = 0;
151 primitiveBatch[i] = 0;
154 for(int draw = 0; draw < DRAW_COUNT; draw++)
156 drawCall[draw] = new DrawCall();
157 drawList[draw] = drawCall[draw];
160 for(int unit = 0; unit < 16; unit++)
162 primitiveProgress[unit].init();
165 for(int cluster = 0; cluster < 16; cluster++)
167 pixelProgress[cluster].init();
172 swiftConfig = new SwiftConfig(disableServer);
173 updateConfiguration(true);
175 sync = new Resource(0);
178 Renderer::~Renderer()
191 for(int draw = 0; draw < DRAW_COUNT; draw++)
193 delete drawCall[draw];
199 // This object has to be mem aligned
200 void* Renderer::operator new(size_t size)
202 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
203 return sw::allocate(sizeof(Renderer), 16);
206 void Renderer::operator delete(void * mem)
211 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
214 if(count < minPrimitives || count > maxPrimitives)
220 context->drawType = drawType;
222 updateConfiguration();
225 int ss = context->getSuperSampleCount();
226 int ms = context->getMultiSampleCount();
228 for(int q = 0; q < ss; q++)
230 unsigned int oldMultiSampleMask = context->multiSampleMask;
231 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
233 if(!context->multiSampleMask)
238 sync->lock(sw::PRIVATE);
240 if(update || oldMultiSampleMask != context->multiSampleMask)
242 vertexState = VertexProcessor::update(drawType);
243 setupState = SetupProcessor::update();
244 pixelState = PixelProcessor::update();
246 vertexRoutine = VertexProcessor::routine(vertexState);
247 setupRoutine = SetupProcessor::routine(setupState);
248 pixelRoutine = PixelProcessor::routine(pixelState);
251 int batch = batchSize / ms;
253 int (Renderer::*setupPrimitives)(int batch, int count);
255 if(context->isDrawTriangle())
257 switch(context->fillMode)
260 setupPrimitives = &Renderer::setupSolidTriangles;
263 setupPrimitives = &Renderer::setupWireframeTriangle;
267 setupPrimitives = &Renderer::setupVertexTriangle;
275 else if(context->isDrawLine())
277 setupPrimitives = &Renderer::setupLines;
281 setupPrimitives = &Renderer::setupPoints;
288 for(int i = 0; i < DRAW_COUNT; i++)
290 if(drawCall[i]->references == -1)
293 drawList[nextDraw % DRAW_COUNT] = draw;
306 DrawData *data = draw->data;
308 if(queries.size() != 0)
310 draw->queries = new std::list<Query*>();
311 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
312 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
315 if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
317 atomicIncrement(&(q->reference));
318 draw->queries->push_back(q);
323 draw->drawType = drawType;
324 draw->batchSize = batch;
326 vertexRoutine->bind();
327 setupRoutine->bind();
328 pixelRoutine->bind();
330 draw->vertexRoutine = vertexRoutine;
331 draw->setupRoutine = setupRoutine;
332 draw->pixelRoutine = pixelRoutine;
333 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
334 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
335 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
336 draw->setupPrimitives = setupPrimitives;
337 draw->setupState = setupState;
339 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
341 draw->vertexStream[i] = context->input[i].resource;
342 data->input[i] = context->input[i].buffer;
343 data->stride[i] = context->input[i].stride;
345 if(draw->vertexStream[i])
347 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
351 if(context->indexBuffer)
353 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
356 draw->indexBuffer = context->indexBuffer;
358 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
360 draw->texture[sampler] = 0;
363 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
365 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
367 draw->texture[sampler] = context->texture[sampler];
368 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
370 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
374 if(context->pixelShader)
376 if(draw->psDirtyConstF)
378 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
379 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
380 draw->psDirtyConstF = 0;
383 if(draw->psDirtyConstI)
385 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
386 draw->psDirtyConstI = 0;
389 if(draw->psDirtyConstB)
391 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
392 draw->psDirtyConstB = 0;
395 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
399 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
401 draw->pUniformBuffers[i] = nullptr;
405 if(context->pixelShaderVersion() <= 0x0104)
407 for(int stage = 0; stage < 8; stage++)
409 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
411 data->textureStage[stage] = context->textureStage[stage].uniforms;
417 if(context->vertexShader)
419 if(context->vertexShader->getVersion() >= 0x0300)
421 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
423 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
425 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
426 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
428 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
433 if(draw->vsDirtyConstF)
435 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
436 draw->vsDirtyConstF = 0;
439 if(draw->vsDirtyConstI)
441 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
442 draw->vsDirtyConstI = 0;
445 if(draw->vsDirtyConstB)
447 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
448 draw->vsDirtyConstB = 0;
451 if(context->vertexShader->isInstanceIdDeclared())
453 data->instanceID = context->instanceID;
456 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
457 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
463 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
464 draw->vsDirtyConstI = 16;
465 draw->vsDirtyConstB = 16;
467 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
469 draw->vUniformBuffers[i] = nullptr;
472 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
474 draw->transformFeedbackBuffers[i] = nullptr;
478 if(pixelState.stencilActive)
480 data->stencil[0] = stencil;
481 data->stencil[1] = stencilCCW;
484 if(pixelState.fogActive)
489 if(setupState.isDrawPoint)
494 data->lineWidth = context->lineWidth;
496 data->factor = factor;
498 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
500 float ref = context->alphaReference * (1.0f / 255.0f);
501 float margin = sw::min(ref, 1.0f - ref);
505 data->a2c0 = replicate(ref - margin * 0.6f);
506 data->a2c1 = replicate(ref - margin * 0.2f);
507 data->a2c2 = replicate(ref + margin * 0.2f);
508 data->a2c3 = replicate(ref + margin * 0.6f);
512 data->a2c0 = replicate(ref - margin * 0.3f);
513 data->a2c1 = replicate(ref + margin * 0.3f);
518 if(pixelState.occlusionEnabled)
520 for(int cluster = 0; cluster < clusterCount; cluster++)
522 data->occlusion[cluster] = 0;
527 for(int cluster = 0; cluster < clusterCount; cluster++)
529 for(int i = 0; i < PERF_TIMERS; i++)
531 data->cycles[i][cluster] = 0;
538 float W = 0.5f * viewport.width;
539 float H = 0.5f * viewport.height;
540 float X0 = viewport.x0 + W;
541 float Y0 = viewport.y0 + H;
542 float N = viewport.minZ;
543 float F = viewport.maxZ;
546 if(context->isDrawTriangle(false))
551 if(complementaryDepthBuffer)
557 static const float X[5][16] = // Fragment offsets
559 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
560 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
561 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
562 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
563 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
566 static const float Y[5][16] = // Fragment offsets
568 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
569 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
570 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
571 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
572 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
575 int s = sw::log2(ss);
577 data->Wx16 = replicate(W * 16);
578 data->Hx16 = replicate(H * 16);
579 data->X0x16 = replicate(X0 * 16 - 8);
580 data->Y0x16 = replicate(Y0 * 16 - 8);
581 data->XXXX = replicate(X[s][q] / W);
582 data->YYYY = replicate(Y[s][q] / H);
583 data->halfPixelX = replicate(0.5f / W);
584 data->halfPixelY = replicate(0.5f / H);
585 data->viewportHeight = abs(viewport.height);
586 data->slopeDepthBias = slopeDepthBias;
587 data->depthRange = Z;
589 draw->clipFlags = clipFlags;
593 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
594 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
595 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
596 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
597 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
598 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
604 for(int index = 0; index < RENDERTARGETS; index++)
606 draw->renderTarget[index] = context->renderTarget[index];
608 if(draw->renderTarget[index])
610 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
611 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
612 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
616 draw->depthBuffer = context->depthBuffer;
617 draw->stencilBuffer = context->stencilBuffer;
619 if(draw->depthBuffer)
621 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
622 data->depthPitchB = context->depthBuffer->getInternalPitchB();
623 data->depthSliceB = context->depthBuffer->getInternalSliceB();
626 if(draw->stencilBuffer)
628 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
629 data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
630 data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
636 data->scissorX0 = scissor.x0;
637 data->scissorX1 = scissor.x1;
638 data->scissorY0 = scissor.y0;
639 data->scissorY1 = scissor.y1;
645 draw->references = (count + batch - 1) / batch;
647 schedulerMutex.lock();
649 schedulerMutex.unlock();
652 if(threadCount == 1) // Use main thread for draw execution
655 task[0].type = Task::RESUME;
667 task[0].type = Task::RESUME;
675 void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
677 SliceRect rect = clearRect;
678 int samples = dest->getDepth();
680 for(rect.slice = 0; rect.slice < samples; rect.slice++)
682 blitter->clear(value, format, dest, rect, rgbaMask);
686 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
688 blitter->blit(source, sRect, dest, dRect, filter, isStencil);
691 void Renderer::blit3D(Surface *source, Surface *dest)
693 blitter->blit3D(source, dest);
696 void Renderer::threadFunction(void *parameters)
698 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
699 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
701 if(logPrecision < IEEE)
703 CPUID::setFlushToZero(true);
704 CPUID::setDenormalsAreZero(true);
707 renderer->threadLoop(threadIndex);
710 void Renderer::threadLoop(int threadIndex)
714 taskLoop(threadIndex);
716 suspend[threadIndex]->signal();
717 resume[threadIndex]->wait();
721 void Renderer::taskLoop(int threadIndex)
723 while(task[threadIndex].type != Task::SUSPEND)
725 scheduleTask(threadIndex);
726 executeTask(threadIndex);
730 void Renderer::findAvailableTasks()
733 for(int cluster = 0; cluster < clusterCount; cluster++)
735 if(!pixelProgress[cluster].executing)
737 for(int unit = 0; unit < unitCount; unit++)
739 if(primitiveProgress[unit].references > 0) // Contains processed primitives
741 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
743 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
745 Task &task = taskQueue[qHead];
746 task.type = Task::PIXELS;
747 task.primitiveUnit = unit;
748 task.pixelCluster = cluster;
750 pixelProgress[cluster].executing = true;
752 // Commit to the task queue
753 qHead = (qHead + 1) % 32;
764 // Find primitive tasks
765 if(currentDraw == nextDraw)
767 return; // No more primitives to process
770 for(int unit = 0; unit < unitCount; unit++)
772 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
774 if(draw->primitive >= draw->count)
778 if(currentDraw == nextDraw)
780 return; // No more primitives to process
783 draw = drawList[currentDraw % DRAW_COUNT];
786 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
788 int primitive = draw->primitive;
789 int count = draw->count;
790 int batch = draw->batchSize;
792 primitiveProgress[unit].drawCall = currentDraw;
793 primitiveProgress[unit].firstPrimitive = primitive;
794 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
796 draw->primitive += batch;
798 Task &task = taskQueue[qHead];
799 task.type = Task::PRIMITIVES;
800 task.primitiveUnit = unit;
802 primitiveProgress[unit].references = -1;
804 // Commit to the task queue
805 qHead = (qHead + 1) % 32;
811 void Renderer::scheduleTask(int threadIndex)
813 schedulerMutex.lock();
815 if((int)qSize < threadCount - threadsAwake + 1)
817 findAvailableTasks();
822 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
825 if(threadsAwake != threadCount)
827 int wakeup = qSize - threadsAwake + 1;
829 for(int i = 0; i < threadCount && wakeup > 0; i++)
831 if(task[i].type == Task::SUSPEND)
834 task[i].type = Task::RESUME;
845 task[threadIndex].type = Task::SUSPEND;
850 schedulerMutex.unlock();
853 void Renderer::executeTask(int threadIndex)
856 int64_t startTick = Timer::ticks();
859 switch(task[threadIndex].type)
861 case Task::PRIMITIVES:
863 int unit = task[threadIndex].primitiveUnit;
865 int input = primitiveProgress[unit].firstPrimitive;
866 int count = primitiveProgress[unit].primitiveCount;
867 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
868 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
870 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
873 int64_t time = Timer::ticks();
874 vertexTime[threadIndex] += time - startTick;
880 if(!draw->setupState.rasterizerDiscard)
882 visible = (this->*setupPrimitives)(unit, count);
885 primitiveProgress[unit].visible = visible;
886 primitiveProgress[unit].references = clusterCount;
889 setupTime[threadIndex] += Timer::ticks() - startTick;
895 int unit = task[threadIndex].primitiveUnit;
896 int visible = primitiveProgress[unit].visible;
900 int cluster = task[threadIndex].pixelCluster;
901 Primitive *primitive = primitiveBatch[unit];
902 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
903 DrawData *data = draw->data;
904 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
906 pixelRoutine(primitive, visible, cluster, data);
909 finishRendering(task[threadIndex]);
912 pixelTime[threadIndex] += Timer::ticks() - startTick;
925 void Renderer::synchronize()
927 sync->lock(sw::PUBLIC);
931 void Renderer::finishRendering(Task &pixelTask)
933 int unit = pixelTask.primitiveUnit;
934 int cluster = pixelTask.pixelCluster;
936 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
937 DrawData &data = *draw.data;
938 int primitive = primitiveProgress[unit].firstPrimitive;
939 int count = primitiveProgress[unit].primitiveCount;
940 int processedPrimitives = primitive + count;
942 pixelProgress[cluster].processedPrimitives = processedPrimitives;
944 if(pixelProgress[cluster].processedPrimitives >= draw.count)
946 pixelProgress[cluster].drawCall++;
947 pixelProgress[cluster].processedPrimitives = 0;
950 int ref = atomicDecrement(&primitiveProgress[unit].references);
954 ref = atomicDecrement(&draw.references);
959 for(int cluster = 0; cluster < clusterCount; cluster++)
961 for(int i = 0; i < PERF_TIMERS; i++)
963 profiler.cycles[i] += data.cycles[i][cluster];
970 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
976 case Query::FRAGMENTS_PASSED:
977 for(int cluster = 0; cluster < clusterCount; cluster++)
979 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
982 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
983 atomicAdd((volatile int*)&query->data, processedPrimitives);
989 atomicDecrement(&query->reference);
996 for(int i = 0; i < RENDERTARGETS; i++)
998 if(draw.renderTarget[i])
1000 draw.renderTarget[i]->unlockInternal();
1004 if(draw.depthBuffer)
1006 draw.depthBuffer->unlockInternal();
1009 if(draw.stencilBuffer)
1011 draw.stencilBuffer->unlockStencil();
1014 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1018 draw.texture[i]->unlock();
1022 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1024 if(draw.vertexStream[i])
1026 draw.vertexStream[i]->unlock();
1030 if(draw.indexBuffer)
1032 draw.indexBuffer->unlock();
1035 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1037 if(draw.pUniformBuffers[i])
1039 draw.pUniformBuffers[i]->unlock();
1041 if(draw.vUniformBuffers[i])
1043 draw.vUniformBuffers[i]->unlock();
1047 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1049 if(draw.transformFeedbackBuffers[i])
1051 draw.transformFeedbackBuffers[i]->unlock();
1055 draw.vertexRoutine->unbind();
1056 draw.setupRoutine->unbind();
1057 draw.pixelRoutine->unbind();
1061 draw.references = -1;
1062 resumeApp->signal();
1066 pixelProgress[cluster].executing = false;
1069 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1071 Triangle *triangle = triangleBatch[unit];
1072 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1073 DrawData *data = draw->data;
1074 VertexTask *task = vertexTask[thread];
1076 const void *indices = data->indices;
1077 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1079 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1081 task->vertexCache.clear();
1082 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1085 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
1087 switch(draw->drawType)
1089 case DRAW_POINTLIST:
1091 unsigned int index = start;
1093 for(unsigned int i = 0; i < triangleCount; i++)
1095 batch[i][0] = index;
1096 batch[i][1] = index;
1097 batch[i][2] = index;
1105 unsigned int index = 2 * start;
1107 for(unsigned int i = 0; i < triangleCount; i++)
1109 batch[i][0] = index + 0;
1110 batch[i][1] = index + 1;
1111 batch[i][2] = index + 1;
1117 case DRAW_LINESTRIP:
1119 unsigned int index = start;
1121 for(unsigned int i = 0; i < triangleCount; i++)
1123 batch[i][0] = index + 0;
1124 batch[i][1] = index + 1;
1125 batch[i][2] = index + 1;
1133 unsigned int index = start;
1135 for(unsigned int i = 0; i < triangleCount; i++)
1137 batch[i][0] = (index + 0) % loop;
1138 batch[i][1] = (index + 1) % loop;
1139 batch[i][2] = (index + 1) % loop;
1145 case DRAW_TRIANGLELIST:
1147 unsigned int index = 3 * start;
1149 for(unsigned int i = 0; i < triangleCount; i++)
1151 batch[i][0] = index + 0;
1152 batch[i][1] = index + 1;
1153 batch[i][2] = index + 2;
1159 case DRAW_TRIANGLESTRIP:
1161 unsigned int index = start;
1163 for(unsigned int i = 0; i < triangleCount; i++)
1165 batch[i][0] = index + 0;
1166 batch[i][1] = index + (index & 1) + 1;
1167 batch[i][2] = index + (~index & 1) + 1;
1173 case DRAW_TRIANGLEFAN:
1175 unsigned int index = start;
1177 for(unsigned int i = 0; i < triangleCount; i++)
1179 batch[i][0] = index + 1;
1180 batch[i][1] = index + 2;
1187 case DRAW_INDEXEDPOINTLIST8:
1189 const unsigned char *index = (const unsigned char*)indices + start;
1191 for(unsigned int i = 0; i < triangleCount; i++)
1193 batch[i][0] = *index;
1194 batch[i][1] = *index;
1195 batch[i][2] = *index;
1201 case DRAW_INDEXEDPOINTLIST16:
1203 const unsigned short *index = (const unsigned short*)indices + start;
1205 for(unsigned int i = 0; i < triangleCount; i++)
1207 batch[i][0] = *index;
1208 batch[i][1] = *index;
1209 batch[i][2] = *index;
1215 case DRAW_INDEXEDPOINTLIST32:
1217 const unsigned int *index = (const unsigned int*)indices + start;
1219 for(unsigned int i = 0; i < triangleCount; i++)
1221 batch[i][0] = *index;
1222 batch[i][1] = *index;
1223 batch[i][2] = *index;
1229 case DRAW_INDEXEDLINELIST8:
1231 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1233 for(unsigned int i = 0; i < triangleCount; i++)
1235 batch[i][0] = index[0];
1236 batch[i][1] = index[1];
1237 batch[i][2] = index[1];
1243 case DRAW_INDEXEDLINELIST16:
1245 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1247 for(unsigned int i = 0; i < triangleCount; i++)
1249 batch[i][0] = index[0];
1250 batch[i][1] = index[1];
1251 batch[i][2] = index[1];
1257 case DRAW_INDEXEDLINELIST32:
1259 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1261 for(unsigned int i = 0; i < triangleCount; i++)
1263 batch[i][0] = index[0];
1264 batch[i][1] = index[1];
1265 batch[i][2] = index[1];
1271 case DRAW_INDEXEDLINESTRIP8:
1273 const unsigned char *index = (const unsigned char*)indices + start;
1275 for(unsigned int i = 0; i < triangleCount; i++)
1277 batch[i][0] = index[0];
1278 batch[i][1] = index[1];
1279 batch[i][2] = index[1];
1285 case DRAW_INDEXEDLINESTRIP16:
1287 const unsigned short *index = (const unsigned short*)indices + start;
1289 for(unsigned int i = 0; i < triangleCount; i++)
1291 batch[i][0] = index[0];
1292 batch[i][1] = index[1];
1293 batch[i][2] = index[1];
1299 case DRAW_INDEXEDLINESTRIP32:
1301 const unsigned int *index = (const unsigned int*)indices + start;
1303 for(unsigned int i = 0; i < triangleCount; i++)
1305 batch[i][0] = index[0];
1306 batch[i][1] = index[1];
1307 batch[i][2] = index[1];
1313 case DRAW_INDEXEDLINELOOP8:
1315 const unsigned char *index = (const unsigned char*)indices;
1317 for(unsigned int i = 0; i < triangleCount; i++)
1319 batch[i][0] = index[(start + i + 0) % loop];
1320 batch[i][1] = index[(start + i + 1) % loop];
1321 batch[i][2] = index[(start + i + 1) % loop];
1325 case DRAW_INDEXEDLINELOOP16:
1327 const unsigned short *index = (const unsigned short*)indices;
1329 for(unsigned int i = 0; i < triangleCount; i++)
1331 batch[i][0] = index[(start + i + 0) % loop];
1332 batch[i][1] = index[(start + i + 1) % loop];
1333 batch[i][2] = index[(start + i + 1) % loop];
1337 case DRAW_INDEXEDLINELOOP32:
1339 const unsigned int *index = (const unsigned int*)indices;
1341 for(unsigned int i = 0; i < triangleCount; i++)
1343 batch[i][0] = index[(start + i + 0) % loop];
1344 batch[i][1] = index[(start + i + 1) % loop];
1345 batch[i][2] = index[(start + i + 1) % loop];
1349 case DRAW_INDEXEDTRIANGLELIST8:
1351 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1353 for(unsigned int i = 0; i < triangleCount; i++)
1355 batch[i][0] = index[0];
1356 batch[i][1] = index[1];
1357 batch[i][2] = index[2];
1363 case DRAW_INDEXEDTRIANGLELIST16:
1365 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1367 for(unsigned int i = 0; i < triangleCount; i++)
1369 batch[i][0] = index[0];
1370 batch[i][1] = index[1];
1371 batch[i][2] = index[2];
1377 case DRAW_INDEXEDTRIANGLELIST32:
1379 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1381 for(unsigned int i = 0; i < triangleCount; i++)
1383 batch[i][0] = index[0];
1384 batch[i][1] = index[1];
1385 batch[i][2] = index[2];
1391 case DRAW_INDEXEDTRIANGLESTRIP8:
1393 const unsigned char *index = (const unsigned char*)indices + start;
1395 for(unsigned int i = 0; i < triangleCount; i++)
1397 batch[i][0] = index[0];
1398 batch[i][1] = index[((start + i) & 1) + 1];
1399 batch[i][2] = index[(~(start + i) & 1) + 1];
1405 case DRAW_INDEXEDTRIANGLESTRIP16:
1407 const unsigned short *index = (const unsigned short*)indices + start;
1409 for(unsigned int i = 0; i < triangleCount; i++)
1411 batch[i][0] = index[0];
1412 batch[i][1] = index[((start + i) & 1) + 1];
1413 batch[i][2] = index[(~(start + i) & 1) + 1];
1419 case DRAW_INDEXEDTRIANGLESTRIP32:
1421 const unsigned int *index = (const unsigned int*)indices + start;
1423 for(unsigned int i = 0; i < triangleCount; i++)
1425 batch[i][0] = index[0];
1426 batch[i][1] = index[((start + i) & 1) + 1];
1427 batch[i][2] = index[(~(start + i) & 1) + 1];
1433 case DRAW_INDEXEDTRIANGLEFAN8:
1435 const unsigned char *index = (const unsigned char*)indices;
1437 for(unsigned int i = 0; i < triangleCount; i++)
1439 batch[i][0] = index[start + i + 1];
1440 batch[i][1] = index[start + i + 2];
1441 batch[i][2] = index[0];
1445 case DRAW_INDEXEDTRIANGLEFAN16:
1447 const unsigned short *index = (const unsigned short*)indices;
1449 for(unsigned int i = 0; i < triangleCount; i++)
1451 batch[i][0] = index[start + i + 1];
1452 batch[i][1] = index[start + i + 2];
1453 batch[i][2] = index[0];
1457 case DRAW_INDEXEDTRIANGLEFAN32:
1459 const unsigned int *index = (const unsigned int*)indices;
1461 for(unsigned int i = 0; i < triangleCount; i++)
1463 batch[i][0] = index[start + i + 1];
1464 batch[i][1] = index[start + i + 2];
1465 batch[i][2] = index[0];
1471 unsigned int index = 4 * start / 2;
1473 for(unsigned int i = 0; i < triangleCount; i += 2)
1475 batch[i+0][0] = index + 0;
1476 batch[i+0][1] = index + 1;
1477 batch[i+0][2] = index + 2;
1479 batch[i+1][0] = index + 0;
1480 batch[i+1][1] = index + 2;
1481 batch[i+1][2] = index + 3;
1492 task->primitiveStart = start;
1493 task->vertexCount = triangleCount * 3;
1494 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1497 int Renderer::setupSolidTriangles(int unit, int count)
1499 Triangle *triangle = triangleBatch[unit];
1500 Primitive *primitive = primitiveBatch[unit];
1502 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1503 SetupProcessor::State &state = draw.setupState;
1504 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1506 int ms = state.multiSample;
1507 int pos = state.positionRegister;
1508 const DrawData *data = draw.data;
1511 for(int i = 0; i < count; i++, triangle++)
1513 Vertex &v0 = triangle->v0;
1514 Vertex &v1 = triangle->v1;
1515 Vertex &v2 = triangle->v2;
1517 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1519 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1521 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1523 if(clipFlagsOr != Clipper::CLIP_FINITE)
1525 if(!clipper->clip(polygon, clipFlagsOr, draw))
1531 if(setupRoutine(primitive, triangle, &polygon, data))
1542 int Renderer::setupWireframeTriangle(int unit, int count)
1544 Triangle *triangle = triangleBatch[unit];
1545 Primitive *primitive = primitiveBatch[unit];
1548 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1549 SetupProcessor::State &state = draw.setupState;
1551 const Vertex &v0 = triangle[0].v0;
1552 const Vertex &v1 = triangle[0].v1;
1553 const Vertex &v2 = triangle[0].v2;
1555 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1557 if(state.cullMode == CULL_CLOCKWISE)
1559 if(d >= 0) return 0;
1561 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1563 if(d <= 0) return 0;
1567 triangle[1].v0 = v1;
1568 triangle[1].v1 = v2;
1569 triangle[2].v0 = v2;
1570 triangle[2].v1 = v0;
1572 if(state.color[0][0].flat) // FIXME
1574 for(int i = 0; i < 2; i++)
1576 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1577 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1578 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1579 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1583 for(int i = 0; i < 3; i++)
1585 if(setupLine(*primitive, *triangle, draw))
1587 primitive->area = 0.5f * d;
1599 int Renderer::setupVertexTriangle(int unit, int count)
1601 Triangle *triangle = triangleBatch[unit];
1602 Primitive *primitive = primitiveBatch[unit];
1605 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1606 SetupProcessor::State &state = draw.setupState;
1608 const Vertex &v0 = triangle[0].v0;
1609 const Vertex &v1 = triangle[0].v1;
1610 const Vertex &v2 = triangle[0].v2;
1612 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1614 if(state.cullMode == CULL_CLOCKWISE)
1616 if(d >= 0) return 0;
1618 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1620 if(d <= 0) return 0;
1624 triangle[1].v0 = v1;
1625 triangle[2].v0 = v2;
1627 for(int i = 0; i < 3; i++)
1629 if(setupPoint(*primitive, *triangle, draw))
1631 primitive->area = 0.5f * d;
1643 int Renderer::setupLines(int unit, int count)
1645 Triangle *triangle = triangleBatch[unit];
1646 Primitive *primitive = primitiveBatch[unit];
1649 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1650 SetupProcessor::State &state = draw.setupState;
1652 int ms = state.multiSample;
1654 for(int i = 0; i < count; i++)
1656 if(setupLine(*primitive, *triangle, draw))
1668 int Renderer::setupPoints(int unit, int count)
1670 Triangle *triangle = triangleBatch[unit];
1671 Primitive *primitive = primitiveBatch[unit];
1674 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1675 SetupProcessor::State &state = draw.setupState;
1677 int ms = state.multiSample;
1679 for(int i = 0; i < count; i++)
1681 if(setupPoint(*primitive, *triangle, draw))
1693 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1695 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1696 const SetupProcessor::State &state = draw.setupState;
1697 const DrawData &data = *draw.data;
1699 float lineWidth = data.lineWidth;
1701 Vertex &v0 = triangle.v0;
1702 Vertex &v1 = triangle.v1;
1704 int pos = state.positionRegister;
1706 const float4 &P0 = v0.v[pos];
1707 const float4 &P1 = v1.v[pos];
1709 if(P0.w <= 0 && P1.w <= 0)
1714 const float W = data.Wx16[0] * (1.0f / 16.0f);
1715 const float H = data.Hx16[0] * (1.0f / 16.0f);
1717 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1718 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1720 if(dx == 0 && dy == 0)
1725 if(false) // Rectangle
1735 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1740 float dx0w = dx * P0.w / W;
1741 float dy0h = dy * P0.w / H;
1742 float dx0h = dx * P0.w / H;
1743 float dy0w = dy * P0.w / W;
1745 float dx1w = dx * P1.w / W;
1746 float dy1h = dy * P1.w / H;
1747 float dx1h = dx * P1.w / H;
1748 float dy1w = dy * P1.w / W;
1750 P[0].x += -dy0w + -dx0w;
1751 P[0].y += -dx0h + +dy0h;
1752 C[0] = clipper->computeClipFlags(P[0]);
1754 P[1].x += -dy1w + +dx1w;
1755 P[1].y += -dx1h + +dy1h;
1756 C[1] = clipper->computeClipFlags(P[1]);
1758 P[2].x += +dy1w + +dx1w;
1759 P[2].y += +dx1h + -dy1h;
1760 C[2] = clipper->computeClipFlags(P[2]);
1762 P[3].x += +dy0w + -dx0w;
1763 P[3].y += +dx0h + +dy0h;
1764 C[3] = clipper->computeClipFlags(P[3]);
1766 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1768 Polygon polygon(P, 4);
1770 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1772 if(clipFlagsOr != Clipper::CLIP_FINITE)
1774 if(!clipper->clip(polygon, clipFlagsOr, draw))
1780 return setupRoutine(&primitive, &triangle, &polygon, &data);
1783 else // Diamond test convention
1797 float dx0 = lineWidth * 0.5f * P0.w / W;
1798 float dy0 = lineWidth * 0.5f * P0.w / H;
1800 float dx1 = lineWidth * 0.5f * P1.w / W;
1801 float dy1 = lineWidth * 0.5f * P1.w / H;
1804 C[0] = clipper->computeClipFlags(P[0]);
1807 C[1] = clipper->computeClipFlags(P[1]);
1810 C[2] = clipper->computeClipFlags(P[2]);
1813 C[3] = clipper->computeClipFlags(P[3]);
1816 C[4] = clipper->computeClipFlags(P[4]);
1819 C[5] = clipper->computeClipFlags(P[5]);
1822 C[6] = clipper->computeClipFlags(P[6]);
1825 C[7] = clipper->computeClipFlags(P[7]);
1827 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1833 if(dx > dy) // Right
1874 Polygon polygon(L, 6);
1876 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1878 if(clipFlagsOr != Clipper::CLIP_FINITE)
1880 if(!clipper->clip(polygon, clipFlagsOr, draw))
1886 return setupRoutine(&primitive, &triangle, &polygon, &data);
1893 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1895 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1896 const SetupProcessor::State &state = draw.setupState;
1897 const DrawData &data = *draw.data;
1899 Vertex &v = triangle.v0;
1903 int pts = state.pointSizeRegister;
1905 if(state.pointSizeRegister != Unused)
1911 pSize = data.point.pointSize[0];
1914 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1919 int pos = state.positionRegister;
1926 const float X = pSize * P[0].w * data.halfPixelX[0];
1927 const float Y = pSize * P[0].w * data.halfPixelY[0];
1931 C[0] = clipper->computeClipFlags(P[0]);
1935 C[1] = clipper->computeClipFlags(P[1]);
1939 C[2] = clipper->computeClipFlags(P[2]);
1943 C[3] = clipper->computeClipFlags(P[3]);
1945 triangle.v1 = triangle.v0;
1946 triangle.v2 = triangle.v0;
1948 triangle.v1.X += iround(16 * 0.5f * pSize);
1949 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1951 Polygon polygon(P, 4);
1953 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1955 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1957 if(clipFlagsOr != Clipper::CLIP_FINITE)
1959 if(!clipper->clip(polygon, clipFlagsOr, draw))
1965 return setupRoutine(&primitive, &triangle, &polygon, &data);
1971 void Renderer::initializeThreads()
1973 unitCount = ceilPow2(threadCount);
1974 clusterCount = ceilPow2(threadCount);
1976 for(int i = 0; i < unitCount; i++)
1978 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1979 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1982 for(int i = 0; i < threadCount; i++)
1984 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1985 vertexTask[i]->vertexCache.drawCall = -1;
1987 task[i].type = Task::SUSPEND;
1989 resume[i] = new Event();
1990 suspend[i] = new Event();
1992 Parameters parameters;
1993 parameters.threadIndex = i;
1994 parameters.renderer = this;
1996 exitThreads = false;
1997 worker[i] = new Thread(threadFunction, ¶meters);
2000 suspend[i]->signal();
2004 void Renderer::terminateThreads()
2006 while(threadsAwake != 0)
2011 for(int thread = 0; thread < threadCount; thread++)
2016 resume[thread]->signal();
2017 worker[thread]->join();
2019 delete worker[thread];
2021 delete resume[thread];
2023 delete suspend[thread];
2024 suspend[thread] = 0;
2027 deallocate(vertexTask[thread]);
2028 vertexTask[thread] = 0;
2031 for(int i = 0; i < 16; i++)
2033 deallocate(triangleBatch[i]);
2034 triangleBatch[i] = 0;
2036 deallocate(primitiveBatch[i]);
2037 primitiveBatch[i] = 0;
2041 void Renderer::loadConstants(const VertexShader *vertexShader)
2043 if(!vertexShader) return;
2045 size_t count = vertexShader->getLength();
2047 for(size_t i = 0; i < count; i++)
2049 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2051 if(instruction->opcode == Shader::OPCODE_DEF)
2053 int index = instruction->dst.index;
2056 value[0] = instruction->src[0].value[0];
2057 value[1] = instruction->src[0].value[1];
2058 value[2] = instruction->src[0].value[2];
2059 value[3] = instruction->src[0].value[3];
2061 setVertexShaderConstantF(index, value);
2063 else if(instruction->opcode == Shader::OPCODE_DEFI)
2065 int index = instruction->dst.index;
2068 integer[0] = instruction->src[0].integer[0];
2069 integer[1] = instruction->src[0].integer[1];
2070 integer[2] = instruction->src[0].integer[2];
2071 integer[3] = instruction->src[0].integer[3];
2073 setVertexShaderConstantI(index, integer);
2075 else if(instruction->opcode == Shader::OPCODE_DEFB)
2077 int index = instruction->dst.index;
2078 int boolean = instruction->src[0].boolean[0];
2080 setVertexShaderConstantB(index, &boolean);
2085 void Renderer::loadConstants(const PixelShader *pixelShader)
2087 if(!pixelShader) return;
2089 size_t count = pixelShader->getLength();
2091 for(size_t i = 0; i < count; i++)
2093 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2095 if(instruction->opcode == Shader::OPCODE_DEF)
2097 int index = instruction->dst.index;
2100 value[0] = instruction->src[0].value[0];
2101 value[1] = instruction->src[0].value[1];
2102 value[2] = instruction->src[0].value[2];
2103 value[3] = instruction->src[0].value[3];
2105 setPixelShaderConstantF(index, value);
2107 else if(instruction->opcode == Shader::OPCODE_DEFI)
2109 int index = instruction->dst.index;
2112 integer[0] = instruction->src[0].integer[0];
2113 integer[1] = instruction->src[0].integer[1];
2114 integer[2] = instruction->src[0].integer[2];
2115 integer[3] = instruction->src[0].integer[3];
2117 setPixelShaderConstantI(index, integer);
2119 else if(instruction->opcode == Shader::OPCODE_DEFB)
2121 int index = instruction->dst.index;
2122 int boolean = instruction->src[0].boolean[0];
2124 setPixelShaderConstantB(index, &boolean);
2129 void Renderer::setIndexBuffer(Resource *indexBuffer)
2131 context->indexBuffer = indexBuffer;
2134 void Renderer::setMultiSampleMask(unsigned int mask)
2136 context->sampleMask = mask;
2139 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2141 sw::transparencyAntialiasing = transparencyAntialiasing;
2144 bool Renderer::isReadWriteTexture(int sampler)
2146 for(int index = 0; index < RENDERTARGETS; index++)
2148 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2154 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2162 void Renderer::updateClipper()
2164 if(updateClipPlanes)
2166 if(VertexProcessor::isFixedFunction()) // User plane in world space
2168 const Matrix &scissorWorld = getViewTransform();
2170 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2171 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2172 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2173 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2174 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2175 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2177 else // User plane in clip space
2179 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2180 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2181 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2182 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2183 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2184 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2187 updateClipPlanes = false;
2191 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2193 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2195 context->texture[sampler] = resource;
2198 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2200 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2202 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2205 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2207 if(type == SAMPLER_PIXEL)
2209 PixelProcessor::setTextureFilter(sampler, textureFilter);
2213 VertexProcessor::setTextureFilter(sampler, textureFilter);
2217 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2219 if(type == SAMPLER_PIXEL)
2221 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2225 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2229 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2231 if(type == SAMPLER_PIXEL)
2233 PixelProcessor::setGatherEnable(sampler, enable);
2237 VertexProcessor::setGatherEnable(sampler, enable);
2241 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2243 if(type == SAMPLER_PIXEL)
2245 PixelProcessor::setAddressingModeU(sampler, addressMode);
2249 VertexProcessor::setAddressingModeU(sampler, addressMode);
2253 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2255 if(type == SAMPLER_PIXEL)
2257 PixelProcessor::setAddressingModeV(sampler, addressMode);
2261 VertexProcessor::setAddressingModeV(sampler, addressMode);
2265 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2267 if(type == SAMPLER_PIXEL)
2269 PixelProcessor::setAddressingModeW(sampler, addressMode);
2273 VertexProcessor::setAddressingModeW(sampler, addressMode);
2277 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2279 if(type == SAMPLER_PIXEL)
2281 PixelProcessor::setReadSRGB(sampler, sRGB);
2285 VertexProcessor::setReadSRGB(sampler, sRGB);
2289 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2291 if(type == SAMPLER_PIXEL)
2293 PixelProcessor::setMipmapLOD(sampler, bias);
2297 VertexProcessor::setMipmapLOD(sampler, bias);
2301 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2303 if(type == SAMPLER_PIXEL)
2305 PixelProcessor::setBorderColor(sampler, borderColor);
2309 VertexProcessor::setBorderColor(sampler, borderColor);
2313 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2315 if(type == SAMPLER_PIXEL)
2317 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2321 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2325 void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
2327 if(type == SAMPLER_PIXEL)
2329 PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2333 VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2337 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2339 if(type == SAMPLER_PIXEL)
2341 PixelProcessor::setSwizzleR(sampler, swizzleR);
2345 VertexProcessor::setSwizzleR(sampler, swizzleR);
2349 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2351 if(type == SAMPLER_PIXEL)
2353 PixelProcessor::setSwizzleG(sampler, swizzleG);
2357 VertexProcessor::setSwizzleG(sampler, swizzleG);
2361 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2363 if(type == SAMPLER_PIXEL)
2365 PixelProcessor::setSwizzleB(sampler, swizzleB);
2369 VertexProcessor::setSwizzleB(sampler, swizzleB);
2373 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2375 if(type == SAMPLER_PIXEL)
2377 PixelProcessor::setSwizzleA(sampler, swizzleA);
2381 VertexProcessor::setSwizzleA(sampler, swizzleA);
2385 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2387 if(type == SAMPLER_PIXEL)
2389 PixelProcessor::setBaseLevel(sampler, baseLevel);
2393 VertexProcessor::setBaseLevel(sampler, baseLevel);
2397 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2399 if(type == SAMPLER_PIXEL)
2401 PixelProcessor::setMaxLevel(sampler, maxLevel);
2405 VertexProcessor::setMaxLevel(sampler, maxLevel);
2409 void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2411 if(type == SAMPLER_PIXEL)
2413 PixelProcessor::setMinLod(sampler, minLod);
2417 VertexProcessor::setMinLod(sampler, minLod);
2421 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2423 if(type == SAMPLER_PIXEL)
2425 PixelProcessor::setMaxLod(sampler, maxLod);
2429 VertexProcessor::setMaxLod(sampler, maxLod);
2433 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2435 context->setPointSpriteEnable(pointSpriteEnable);
2438 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2440 context->setPointScaleEnable(pointScaleEnable);
2443 void Renderer::setLineWidth(float width)
2445 context->lineWidth = width;
2448 void Renderer::setDepthBias(float bias)
2453 void Renderer::setSlopeDepthBias(float slopeBias)
2455 slopeDepthBias = slopeBias;
2458 void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2460 context->rasterizerDiscard = rasterizerDiscard;
2463 void Renderer::setPixelShader(const PixelShader *shader)
2465 context->pixelShader = shader;
2467 loadConstants(shader);
2470 void Renderer::setVertexShader(const VertexShader *shader)
2472 context->vertexShader = shader;
2474 loadConstants(shader);
2477 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2479 for(int i = 0; i < DRAW_COUNT; i++)
2481 if(drawCall[i]->psDirtyConstF < index + count)
2483 drawCall[i]->psDirtyConstF = index + count;
2487 for(int i = 0; i < count; i++)
2489 PixelProcessor::setFloatConstant(index + i, value);
2494 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2496 for(int i = 0; i < DRAW_COUNT; i++)
2498 if(drawCall[i]->psDirtyConstI < index + count)
2500 drawCall[i]->psDirtyConstI = index + count;
2504 for(int i = 0; i < count; i++)
2506 PixelProcessor::setIntegerConstant(index + i, value);
2511 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2513 for(int i = 0; i < DRAW_COUNT; i++)
2515 if(drawCall[i]->psDirtyConstB < index + count)
2517 drawCall[i]->psDirtyConstB = index + count;
2521 for(int i = 0; i < count; i++)
2523 PixelProcessor::setBooleanConstant(index + i, *boolean);
2528 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2530 for(int i = 0; i < DRAW_COUNT; i++)
2532 if(drawCall[i]->vsDirtyConstF < index + count)
2534 drawCall[i]->vsDirtyConstF = index + count;
2538 for(int i = 0; i < count; i++)
2540 VertexProcessor::setFloatConstant(index + i, value);
2545 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2547 for(int i = 0; i < DRAW_COUNT; i++)
2549 if(drawCall[i]->vsDirtyConstI < index + count)
2551 drawCall[i]->vsDirtyConstI = index + count;
2555 for(int i = 0; i < count; i++)
2557 VertexProcessor::setIntegerConstant(index + i, value);
2562 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2564 for(int i = 0; i < DRAW_COUNT; i++)
2566 if(drawCall[i]->vsDirtyConstB < index + count)
2568 drawCall[i]->vsDirtyConstB = index + count;
2572 for(int i = 0; i < count; i++)
2574 VertexProcessor::setBooleanConstant(index + i, *boolean);
2579 void Renderer::setModelMatrix(const Matrix &M, int i)
2581 VertexProcessor::setModelMatrix(M, i);
2584 void Renderer::setViewMatrix(const Matrix &V)
2586 VertexProcessor::setViewMatrix(V);
2587 updateClipPlanes = true;
2590 void Renderer::setBaseMatrix(const Matrix &B)
2592 VertexProcessor::setBaseMatrix(B);
2593 updateClipPlanes = true;
2596 void Renderer::setProjectionMatrix(const Matrix &P)
2598 VertexProcessor::setProjectionMatrix(P);
2599 updateClipPlanes = true;
2602 void Renderer::addQuery(Query *query)
2604 queries.push_back(query);
2607 void Renderer::removeQuery(Query *query)
2609 queries.remove(query);
2613 int Renderer::getThreadCount()
2618 int64_t Renderer::getVertexTime(int thread)
2620 return vertexTime[thread];
2623 int64_t Renderer::getSetupTime(int thread)
2625 return setupTime[thread];
2628 int64_t Renderer::getPixelTime(int thread)
2630 return pixelTime[thread];
2633 void Renderer::resetTimers()
2635 for(int thread = 0; thread < threadCount; thread++)
2637 vertexTime[thread] = 0;
2638 setupTime[thread] = 0;
2639 pixelTime[thread] = 0;
2644 void Renderer::setViewport(const Viewport &viewport)
2646 this->viewport = viewport;
2649 void Renderer::setScissor(const Rect &scissor)
2651 this->scissor = scissor;
2654 void Renderer::setClipFlags(int flags)
2656 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2659 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2661 if(index < MAX_CLIP_PLANES)
2663 userPlane[index] = plane;
2667 updateClipPlanes = true;
2670 void Renderer::updateConfiguration(bool initialUpdate)
2672 bool newConfiguration = swiftConfig->hasNewConfiguration();
2674 if(newConfiguration || initialUpdate)
2678 SwiftConfig::Configuration configuration = {};
2679 swiftConfig->getConfiguration(configuration);
2681 precacheVertex = !newConfiguration && configuration.precache;
2682 precacheSetup = !newConfiguration && configuration.precache;
2683 precachePixel = !newConfiguration && configuration.precache;
2685 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2686 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2687 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2689 switch(configuration.textureSampleQuality)
2691 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2692 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2693 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2694 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2697 switch(configuration.mipmapQuality)
2699 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2700 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2701 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2704 setPerspectiveCorrection(configuration.perspectiveCorrection);
2706 switch(configuration.transcendentalPrecision)
2709 logPrecision = APPROXIMATE;
2710 expPrecision = APPROXIMATE;
2711 rcpPrecision = APPROXIMATE;
2712 rsqPrecision = APPROXIMATE;
2715 logPrecision = PARTIAL;
2716 expPrecision = PARTIAL;
2717 rcpPrecision = PARTIAL;
2718 rsqPrecision = PARTIAL;
2721 logPrecision = ACCURATE;
2722 expPrecision = ACCURATE;
2723 rcpPrecision = ACCURATE;
2724 rsqPrecision = ACCURATE;
2727 logPrecision = WHQL;
2728 expPrecision = WHQL;
2729 rcpPrecision = WHQL;
2730 rsqPrecision = WHQL;
2733 logPrecision = IEEE;
2734 expPrecision = IEEE;
2735 rcpPrecision = IEEE;
2736 rsqPrecision = IEEE;
2739 logPrecision = ACCURATE;
2740 expPrecision = ACCURATE;
2741 rcpPrecision = ACCURATE;
2742 rsqPrecision = ACCURATE;
2746 switch(configuration.transparencyAntialiasing)
2748 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2749 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2750 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2753 switch(configuration.threadCount)
2755 case -1: threadCount = CPUID::coreCount(); break;
2756 case 0: threadCount = CPUID::processAffinity(); break;
2757 default: threadCount = configuration.threadCount; break;
2760 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2761 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2762 CPUID::setEnableSSE3(configuration.enableSSE3);
2763 CPUID::setEnableSSE2(configuration.enableSSE2);
2764 CPUID::setEnableSSE(configuration.enableSSE);
2766 for(int pass = 0; pass < 10; pass++)
2768 optimization[pass] = configuration.optimization[pass];
2771 forceWindowed = configuration.forceWindowed;
2772 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2773 postBlendSRGB = configuration.postBlendSRGB;
2774 exactColorRounding = configuration.exactColorRounding;
2775 forceClearRegisters = configuration.forceClearRegisters;
2778 minPrimitives = configuration.minPrimitives;
2779 maxPrimitives = configuration.maxPrimitives;
2783 if(!initialUpdate && !worker[0])
2785 initializeThreads();