1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Renderer.hpp"
17 #include "Clipper.hpp"
19 #include "FrameBuffer.hpp"
21 #include "Surface.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
32 #include "Reactor/Reactor.hpp"
36 bool disableServer = true;
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
47 extern bool booleanFaceRegister;
48 extern bool fullPixelPositionRegister;
49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
50 extern bool secondaryColor; // Specular lighting is applied after texturing
52 extern bool forceWindowed;
53 extern bool complementaryDepthBuffer;
54 extern bool postBlendSRGB;
55 extern bool exactColorRounding;
56 extern TransparencyAntialiasing transparencyAntialiasing;
57 extern bool forceClearRegisters;
59 extern bool precacheVertex;
60 extern bool precacheSetup;
61 extern bool precachePixel;
68 TranscendentalPrecision logPrecision = ACCURATE;
69 TranscendentalPrecision expPrecision = ACCURATE;
70 TranscendentalPrecision rcpPrecision = ACCURATE;
71 TranscendentalPrecision rsqPrecision = ACCURATE;
72 bool perspectiveCorrection = true;
84 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
88 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
94 data = (DrawData*)allocate(sizeof(DrawData));
95 data->constants = &constants;
105 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
107 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109 sw::booleanFaceRegister = conventions.booleanFaceRegister;
110 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111 sw::leadingVertexFirst = conventions.leadingVertexFirst;
112 sw::secondaryColor = conventions.secondaryColor;
113 sw::exactColorRounding = exactColorRounding;
115 setRenderTarget(0, 0);
116 clipper = new Clipper(symmetricNormalizedDepth);
117 blitter = new Blitter;
119 updateViewMatrix = true;
120 updateBaseMatrix = true;
121 updateProjectionMatrix = true;
122 updateClipPlanes = true;
128 for(int i = 0; i < 16; i++)
138 resumeApp = new Event();
146 for(int i = 0; i < 16; i++)
148 triangleBatch[i] = 0;
149 primitiveBatch[i] = 0;
152 for(int draw = 0; draw < DRAW_COUNT; draw++)
154 drawCall[draw] = new DrawCall();
155 drawList[draw] = drawCall[draw];
158 for(int unit = 0; unit < 16; unit++)
160 primitiveProgress[unit].init();
163 for(int cluster = 0; cluster < 16; cluster++)
165 pixelProgress[cluster].init();
170 swiftConfig = new SwiftConfig(disableServer);
171 updateConfiguration(true);
173 sync = new Resource(0);
176 Renderer::~Renderer()
189 for(int draw = 0; draw < DRAW_COUNT; draw++)
191 delete drawCall[draw];
197 // This object has to be mem aligned
198 void* Renderer::operator new(size_t size)
200 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
201 return sw::allocate(sizeof(Renderer), 16);
204 void Renderer::operator delete(void * mem)
209 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
212 if(count < minPrimitives || count > maxPrimitives)
218 context->drawType = drawType;
220 updateConfiguration();
223 int ss = context->getSuperSampleCount();
224 int ms = context->getMultiSampleCount();
226 for(int q = 0; q < ss; q++)
228 unsigned int oldMultiSampleMask = context->multiSampleMask;
229 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
231 if(!context->multiSampleMask)
236 sync->lock(sw::PRIVATE);
238 if(update || oldMultiSampleMask != context->multiSampleMask)
240 vertexState = VertexProcessor::update(drawType);
241 setupState = SetupProcessor::update();
242 pixelState = PixelProcessor::update();
244 vertexRoutine = VertexProcessor::routine(vertexState);
245 setupRoutine = SetupProcessor::routine(setupState);
246 pixelRoutine = PixelProcessor::routine(pixelState);
249 int batch = batchSize / ms;
251 int (Renderer::*setupPrimitives)(int batch, int count);
253 if(context->isDrawTriangle())
255 switch(context->fillMode)
258 setupPrimitives = &Renderer::setupSolidTriangles;
261 setupPrimitives = &Renderer::setupWireframeTriangle;
265 setupPrimitives = &Renderer::setupVertexTriangle;
273 else if(context->isDrawLine())
275 setupPrimitives = &Renderer::setupLines;
279 setupPrimitives = &Renderer::setupPoints;
286 for(int i = 0; i < DRAW_COUNT; i++)
288 if(drawCall[i]->references == -1)
291 drawList[nextDraw % DRAW_COUNT] = draw;
304 DrawData *data = draw->data;
306 if(queries.size() != 0)
308 draw->queries = new std::list<Query*>();
309 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
310 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
313 if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
315 atomicIncrement(&(q->reference));
316 draw->queries->push_back(q);
321 draw->drawType = drawType;
322 draw->batchSize = batch;
324 vertexRoutine->bind();
325 setupRoutine->bind();
326 pixelRoutine->bind();
328 draw->vertexRoutine = vertexRoutine;
329 draw->setupRoutine = setupRoutine;
330 draw->pixelRoutine = pixelRoutine;
331 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
332 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
333 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
334 draw->setupPrimitives = setupPrimitives;
335 draw->setupState = setupState;
337 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
339 draw->vertexStream[i] = context->input[i].resource;
340 data->input[i] = context->input[i].buffer;
341 data->stride[i] = context->input[i].stride;
343 if(draw->vertexStream[i])
345 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
349 if(context->indexBuffer)
351 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
354 draw->indexBuffer = context->indexBuffer;
356 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
358 draw->texture[sampler] = 0;
361 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
363 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
365 draw->texture[sampler] = context->texture[sampler];
366 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
368 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
372 if(context->pixelShader)
374 if(draw->psDirtyConstF)
376 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
377 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
378 draw->psDirtyConstF = 0;
381 if(draw->psDirtyConstI)
383 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
384 draw->psDirtyConstI = 0;
387 if(draw->psDirtyConstB)
389 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
390 draw->psDirtyConstB = 0;
393 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
397 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
399 draw->pUniformBuffers[i] = nullptr;
403 if(context->pixelShaderVersion() <= 0x0104)
405 for(int stage = 0; stage < 8; stage++)
407 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
409 data->textureStage[stage] = context->textureStage[stage].uniforms;
415 if(context->vertexShader)
417 if(context->vertexShader->getVersion() >= 0x0300)
419 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
421 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
423 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
424 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
426 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
431 if(draw->vsDirtyConstF)
433 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
434 draw->vsDirtyConstF = 0;
437 if(draw->vsDirtyConstI)
439 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
440 draw->vsDirtyConstI = 0;
443 if(draw->vsDirtyConstB)
445 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
446 draw->vsDirtyConstB = 0;
449 if(context->vertexShader->isInstanceIdDeclared())
451 data->instanceID = context->instanceID;
454 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
455 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
461 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
462 draw->vsDirtyConstI = 16;
463 draw->vsDirtyConstB = 16;
465 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
467 draw->vUniformBuffers[i] = nullptr;
470 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
472 draw->transformFeedbackBuffers[i] = nullptr;
476 if(pixelState.stencilActive)
478 data->stencil[0] = stencil;
479 data->stencil[1] = stencilCCW;
482 if(pixelState.fogActive)
487 if(setupState.isDrawPoint)
492 data->lineWidth = context->lineWidth;
494 data->factor = factor;
496 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
498 float ref = context->alphaReference * (1.0f / 255.0f);
499 float margin = sw::min(ref, 1.0f - ref);
503 data->a2c0 = replicate(ref - margin * 0.6f);
504 data->a2c1 = replicate(ref - margin * 0.2f);
505 data->a2c2 = replicate(ref + margin * 0.2f);
506 data->a2c3 = replicate(ref + margin * 0.6f);
510 data->a2c0 = replicate(ref - margin * 0.3f);
511 data->a2c1 = replicate(ref + margin * 0.3f);
516 if(pixelState.occlusionEnabled)
518 for(int cluster = 0; cluster < clusterCount; cluster++)
520 data->occlusion[cluster] = 0;
525 for(int cluster = 0; cluster < clusterCount; cluster++)
527 for(int i = 0; i < PERF_TIMERS; i++)
529 data->cycles[i][cluster] = 0;
536 float W = 0.5f * viewport.width;
537 float H = 0.5f * viewport.height;
538 float X0 = viewport.x0 + W;
539 float Y0 = viewport.y0 + H;
540 float N = viewport.minZ;
541 float F = viewport.maxZ;
544 if(context->isDrawTriangle(false))
549 if(complementaryDepthBuffer)
555 static const float X[5][16] = // Fragment offsets
557 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
558 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
559 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
560 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
561 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
564 static const float Y[5][16] = // Fragment offsets
566 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
567 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
568 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
569 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
570 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
573 int s = sw::log2(ss);
575 data->Wx16 = replicate(W * 16);
576 data->Hx16 = replicate(H * 16);
577 data->X0x16 = replicate(X0 * 16 - 8);
578 data->Y0x16 = replicate(Y0 * 16 - 8);
579 data->XXXX = replicate(X[s][q] / W);
580 data->YYYY = replicate(Y[s][q] / H);
581 data->halfPixelX = replicate(0.5f / W);
582 data->halfPixelY = replicate(0.5f / H);
583 data->viewportHeight = abs(viewport.height);
584 data->slopeDepthBias = slopeDepthBias;
585 data->depthRange = Z;
587 draw->clipFlags = clipFlags;
591 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
592 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
593 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
594 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
595 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
596 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
602 for(int index = 0; index < RENDERTARGETS; index++)
604 draw->renderTarget[index] = context->renderTarget[index];
606 if(draw->renderTarget[index])
608 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
609 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
610 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
614 draw->depthBuffer = context->depthBuffer;
615 draw->stencilBuffer = context->stencilBuffer;
617 if(draw->depthBuffer)
619 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
620 data->depthPitchB = context->depthBuffer->getInternalPitchB();
621 data->depthSliceB = context->depthBuffer->getInternalSliceB();
624 if(draw->stencilBuffer)
626 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
627 data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
628 data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
634 data->scissorX0 = scissor.x0;
635 data->scissorX1 = scissor.x1;
636 data->scissorY0 = scissor.y0;
637 data->scissorY1 = scissor.y1;
643 draw->references = (count + batch - 1) / batch;
645 schedulerMutex.lock();
647 schedulerMutex.unlock();
650 if(threadCount == 1) // Use main thread for draw execution
653 task[0].type = Task::RESUME;
665 task[0].type = Task::RESUME;
673 void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
675 blitter->clear(pixel, format, dest, dRect, rgbaMask);
678 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
680 blitter->blit(source, sRect, dest, dRect, filter, isStencil);
683 void Renderer::blit3D(Surface *source, Surface *dest)
685 blitter->blit3D(source, dest);
688 void Renderer::threadFunction(void *parameters)
690 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
691 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
693 if(logPrecision < IEEE)
695 CPUID::setFlushToZero(true);
696 CPUID::setDenormalsAreZero(true);
699 renderer->threadLoop(threadIndex);
702 void Renderer::threadLoop(int threadIndex)
706 taskLoop(threadIndex);
708 suspend[threadIndex]->signal();
709 resume[threadIndex]->wait();
713 void Renderer::taskLoop(int threadIndex)
715 while(task[threadIndex].type != Task::SUSPEND)
717 scheduleTask(threadIndex);
718 executeTask(threadIndex);
722 void Renderer::findAvailableTasks()
725 for(int cluster = 0; cluster < clusterCount; cluster++)
727 if(!pixelProgress[cluster].executing)
729 for(int unit = 0; unit < unitCount; unit++)
731 if(primitiveProgress[unit].references > 0) // Contains processed primitives
733 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
735 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
737 Task &task = taskQueue[qHead];
738 task.type = Task::PIXELS;
739 task.primitiveUnit = unit;
740 task.pixelCluster = cluster;
742 pixelProgress[cluster].executing = true;
744 // Commit to the task queue
745 qHead = (qHead + 1) % 32;
756 // Find primitive tasks
757 if(currentDraw == nextDraw)
759 return; // No more primitives to process
762 for(int unit = 0; unit < unitCount; unit++)
764 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
766 if(draw->primitive >= draw->count)
770 if(currentDraw == nextDraw)
772 return; // No more primitives to process
775 draw = drawList[currentDraw % DRAW_COUNT];
778 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
780 int primitive = draw->primitive;
781 int count = draw->count;
782 int batch = draw->batchSize;
784 primitiveProgress[unit].drawCall = currentDraw;
785 primitiveProgress[unit].firstPrimitive = primitive;
786 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
788 draw->primitive += batch;
790 Task &task = taskQueue[qHead];
791 task.type = Task::PRIMITIVES;
792 task.primitiveUnit = unit;
794 primitiveProgress[unit].references = -1;
796 // Commit to the task queue
797 qHead = (qHead + 1) % 32;
803 void Renderer::scheduleTask(int threadIndex)
805 schedulerMutex.lock();
807 if((int)qSize < threadCount - threadsAwake + 1)
809 findAvailableTasks();
814 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
817 if(threadsAwake != threadCount)
819 int wakeup = qSize - threadsAwake + 1;
821 for(int i = 0; i < threadCount && wakeup > 0; i++)
823 if(task[i].type == Task::SUSPEND)
826 task[i].type = Task::RESUME;
837 task[threadIndex].type = Task::SUSPEND;
842 schedulerMutex.unlock();
845 void Renderer::executeTask(int threadIndex)
848 int64_t startTick = Timer::ticks();
851 switch(task[threadIndex].type)
853 case Task::PRIMITIVES:
855 int unit = task[threadIndex].primitiveUnit;
857 int input = primitiveProgress[unit].firstPrimitive;
858 int count = primitiveProgress[unit].primitiveCount;
859 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
860 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
862 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
865 int64_t time = Timer::ticks();
866 vertexTime[threadIndex] += time - startTick;
872 if(!draw->setupState.rasterizerDiscard)
874 visible = (this->*setupPrimitives)(unit, count);
877 primitiveProgress[unit].visible = visible;
878 primitiveProgress[unit].references = clusterCount;
881 setupTime[threadIndex] += Timer::ticks() - startTick;
887 int unit = task[threadIndex].primitiveUnit;
888 int visible = primitiveProgress[unit].visible;
892 int cluster = task[threadIndex].pixelCluster;
893 Primitive *primitive = primitiveBatch[unit];
894 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
895 DrawData *data = draw->data;
896 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
898 pixelRoutine(primitive, visible, cluster, data);
901 finishRendering(task[threadIndex]);
904 pixelTime[threadIndex] += Timer::ticks() - startTick;
917 void Renderer::synchronize()
919 sync->lock(sw::PUBLIC);
923 void Renderer::finishRendering(Task &pixelTask)
925 int unit = pixelTask.primitiveUnit;
926 int cluster = pixelTask.pixelCluster;
928 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
929 DrawData &data = *draw.data;
930 int primitive = primitiveProgress[unit].firstPrimitive;
931 int count = primitiveProgress[unit].primitiveCount;
932 int processedPrimitives = primitive + count;
934 pixelProgress[cluster].processedPrimitives = processedPrimitives;
936 if(pixelProgress[cluster].processedPrimitives >= draw.count)
938 pixelProgress[cluster].drawCall++;
939 pixelProgress[cluster].processedPrimitives = 0;
942 int ref = atomicDecrement(&primitiveProgress[unit].references);
946 ref = atomicDecrement(&draw.references);
951 for(int cluster = 0; cluster < clusterCount; cluster++)
953 for(int i = 0; i < PERF_TIMERS; i++)
955 profiler.cycles[i] += data.cycles[i][cluster];
962 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
968 case Query::FRAGMENTS_PASSED:
969 for(int cluster = 0; cluster < clusterCount; cluster++)
971 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
974 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
975 atomicAdd((volatile int*)&query->data, processedPrimitives);
981 atomicDecrement(&query->reference);
988 for(int i = 0; i < RENDERTARGETS; i++)
990 if(draw.renderTarget[i])
992 draw.renderTarget[i]->unlockInternal();
998 draw.depthBuffer->unlockInternal();
1001 if(draw.stencilBuffer)
1003 draw.stencilBuffer->unlockStencil();
1006 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1010 draw.texture[i]->unlock();
1014 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1016 if(draw.vertexStream[i])
1018 draw.vertexStream[i]->unlock();
1022 if(draw.indexBuffer)
1024 draw.indexBuffer->unlock();
1027 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1029 if(draw.pUniformBuffers[i])
1031 draw.pUniformBuffers[i]->unlock();
1033 if(draw.vUniformBuffers[i])
1035 draw.vUniformBuffers[i]->unlock();
1039 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1041 if(draw.transformFeedbackBuffers[i])
1043 draw.transformFeedbackBuffers[i]->unlock();
1047 draw.vertexRoutine->unbind();
1048 draw.setupRoutine->unbind();
1049 draw.pixelRoutine->unbind();
1053 draw.references = -1;
1054 resumeApp->signal();
1058 pixelProgress[cluster].executing = false;
1061 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1063 Triangle *triangle = triangleBatch[unit];
1064 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1065 DrawData *data = draw->data;
1066 VertexTask *task = vertexTask[thread];
1068 const void *indices = data->indices;
1069 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1071 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1073 task->vertexCache.clear();
1074 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1077 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
1079 switch(draw->drawType)
1081 case DRAW_POINTLIST:
1083 unsigned int index = start;
1085 for(unsigned int i = 0; i < triangleCount; i++)
1087 batch[i][0] = index;
1088 batch[i][1] = index;
1089 batch[i][2] = index;
1097 unsigned int index = 2 * start;
1099 for(unsigned int i = 0; i < triangleCount; i++)
1101 batch[i][0] = index + 0;
1102 batch[i][1] = index + 1;
1103 batch[i][2] = index + 1;
1109 case DRAW_LINESTRIP:
1111 unsigned int index = start;
1113 for(unsigned int i = 0; i < triangleCount; i++)
1115 batch[i][0] = index + 0;
1116 batch[i][1] = index + 1;
1117 batch[i][2] = index + 1;
1125 unsigned int index = start;
1127 for(unsigned int i = 0; i < triangleCount; i++)
1129 batch[i][0] = (index + 0) % loop;
1130 batch[i][1] = (index + 1) % loop;
1131 batch[i][2] = (index + 1) % loop;
1137 case DRAW_TRIANGLELIST:
1139 unsigned int index = 3 * start;
1141 for(unsigned int i = 0; i < triangleCount; i++)
1143 batch[i][0] = index + 0;
1144 batch[i][1] = index + 1;
1145 batch[i][2] = index + 2;
1151 case DRAW_TRIANGLESTRIP:
1153 unsigned int index = start;
1155 for(unsigned int i = 0; i < triangleCount; i++)
1157 batch[i][0] = index + 0;
1158 batch[i][1] = index + (index & 1) + 1;
1159 batch[i][2] = index + (~index & 1) + 1;
1165 case DRAW_TRIANGLEFAN:
1167 unsigned int index = start;
1169 for(unsigned int i = 0; i < triangleCount; i++)
1171 batch[i][0] = index + 1;
1172 batch[i][1] = index + 2;
1179 case DRAW_INDEXEDPOINTLIST8:
1181 const unsigned char *index = (const unsigned char*)indices + start;
1183 for(unsigned int i = 0; i < triangleCount; i++)
1185 batch[i][0] = *index;
1186 batch[i][1] = *index;
1187 batch[i][2] = *index;
1193 case DRAW_INDEXEDPOINTLIST16:
1195 const unsigned short *index = (const unsigned short*)indices + start;
1197 for(unsigned int i = 0; i < triangleCount; i++)
1199 batch[i][0] = *index;
1200 batch[i][1] = *index;
1201 batch[i][2] = *index;
1207 case DRAW_INDEXEDPOINTLIST32:
1209 const unsigned int *index = (const unsigned int*)indices + start;
1211 for(unsigned int i = 0; i < triangleCount; i++)
1213 batch[i][0] = *index;
1214 batch[i][1] = *index;
1215 batch[i][2] = *index;
1221 case DRAW_INDEXEDLINELIST8:
1223 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1225 for(unsigned int i = 0; i < triangleCount; i++)
1227 batch[i][0] = index[0];
1228 batch[i][1] = index[1];
1229 batch[i][2] = index[1];
1235 case DRAW_INDEXEDLINELIST16:
1237 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1239 for(unsigned int i = 0; i < triangleCount; i++)
1241 batch[i][0] = index[0];
1242 batch[i][1] = index[1];
1243 batch[i][2] = index[1];
1249 case DRAW_INDEXEDLINELIST32:
1251 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1253 for(unsigned int i = 0; i < triangleCount; i++)
1255 batch[i][0] = index[0];
1256 batch[i][1] = index[1];
1257 batch[i][2] = index[1];
1263 case DRAW_INDEXEDLINESTRIP8:
1265 const unsigned char *index = (const unsigned char*)indices + start;
1267 for(unsigned int i = 0; i < triangleCount; i++)
1269 batch[i][0] = index[0];
1270 batch[i][1] = index[1];
1271 batch[i][2] = index[1];
1277 case DRAW_INDEXEDLINESTRIP16:
1279 const unsigned short *index = (const unsigned short*)indices + start;
1281 for(unsigned int i = 0; i < triangleCount; i++)
1283 batch[i][0] = index[0];
1284 batch[i][1] = index[1];
1285 batch[i][2] = index[1];
1291 case DRAW_INDEXEDLINESTRIP32:
1293 const unsigned int *index = (const unsigned int*)indices + start;
1295 for(unsigned int i = 0; i < triangleCount; i++)
1297 batch[i][0] = index[0];
1298 batch[i][1] = index[1];
1299 batch[i][2] = index[1];
1305 case DRAW_INDEXEDLINELOOP8:
1307 const unsigned char *index = (const unsigned char*)indices;
1309 for(unsigned int i = 0; i < triangleCount; i++)
1311 batch[i][0] = index[(start + i + 0) % loop];
1312 batch[i][1] = index[(start + i + 1) % loop];
1313 batch[i][2] = index[(start + i + 1) % loop];
1317 case DRAW_INDEXEDLINELOOP16:
1319 const unsigned short *index = (const unsigned short*)indices;
1321 for(unsigned int i = 0; i < triangleCount; i++)
1323 batch[i][0] = index[(start + i + 0) % loop];
1324 batch[i][1] = index[(start + i + 1) % loop];
1325 batch[i][2] = index[(start + i + 1) % loop];
1329 case DRAW_INDEXEDLINELOOP32:
1331 const unsigned int *index = (const unsigned int*)indices;
1333 for(unsigned int i = 0; i < triangleCount; i++)
1335 batch[i][0] = index[(start + i + 0) % loop];
1336 batch[i][1] = index[(start + i + 1) % loop];
1337 batch[i][2] = index[(start + i + 1) % loop];
1341 case DRAW_INDEXEDTRIANGLELIST8:
1343 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1345 for(unsigned int i = 0; i < triangleCount; i++)
1347 batch[i][0] = index[0];
1348 batch[i][1] = index[1];
1349 batch[i][2] = index[2];
1355 case DRAW_INDEXEDTRIANGLELIST16:
1357 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1359 for(unsigned int i = 0; i < triangleCount; i++)
1361 batch[i][0] = index[0];
1362 batch[i][1] = index[1];
1363 batch[i][2] = index[2];
1369 case DRAW_INDEXEDTRIANGLELIST32:
1371 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1373 for(unsigned int i = 0; i < triangleCount; i++)
1375 batch[i][0] = index[0];
1376 batch[i][1] = index[1];
1377 batch[i][2] = index[2];
1383 case DRAW_INDEXEDTRIANGLESTRIP8:
1385 const unsigned char *index = (const unsigned char*)indices + start;
1387 for(unsigned int i = 0; i < triangleCount; i++)
1389 batch[i][0] = index[0];
1390 batch[i][1] = index[((start + i) & 1) + 1];
1391 batch[i][2] = index[(~(start + i) & 1) + 1];
1397 case DRAW_INDEXEDTRIANGLESTRIP16:
1399 const unsigned short *index = (const unsigned short*)indices + start;
1401 for(unsigned int i = 0; i < triangleCount; i++)
1403 batch[i][0] = index[0];
1404 batch[i][1] = index[((start + i) & 1) + 1];
1405 batch[i][2] = index[(~(start + i) & 1) + 1];
1411 case DRAW_INDEXEDTRIANGLESTRIP32:
1413 const unsigned int *index = (const unsigned int*)indices + start;
1415 for(unsigned int i = 0; i < triangleCount; i++)
1417 batch[i][0] = index[0];
1418 batch[i][1] = index[((start + i) & 1) + 1];
1419 batch[i][2] = index[(~(start + i) & 1) + 1];
1425 case DRAW_INDEXEDTRIANGLEFAN8:
1427 const unsigned char *index = (const unsigned char*)indices;
1429 for(unsigned int i = 0; i < triangleCount; i++)
1431 batch[i][0] = index[start + i + 1];
1432 batch[i][1] = index[start + i + 2];
1433 batch[i][2] = index[0];
1437 case DRAW_INDEXEDTRIANGLEFAN16:
1439 const unsigned short *index = (const unsigned short*)indices;
1441 for(unsigned int i = 0; i < triangleCount; i++)
1443 batch[i][0] = index[start + i + 1];
1444 batch[i][1] = index[start + i + 2];
1445 batch[i][2] = index[0];
1449 case DRAW_INDEXEDTRIANGLEFAN32:
1451 const unsigned int *index = (const unsigned int*)indices;
1453 for(unsigned int i = 0; i < triangleCount; i++)
1455 batch[i][0] = index[start + i + 1];
1456 batch[i][1] = index[start + i + 2];
1457 batch[i][2] = index[0];
1463 unsigned int index = 4 * start / 2;
1465 for(unsigned int i = 0; i < triangleCount; i += 2)
1467 batch[i+0][0] = index + 0;
1468 batch[i+0][1] = index + 1;
1469 batch[i+0][2] = index + 2;
1471 batch[i+1][0] = index + 0;
1472 batch[i+1][1] = index + 2;
1473 batch[i+1][2] = index + 3;
1484 task->primitiveStart = start;
1485 task->vertexCount = triangleCount * 3;
1486 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1489 int Renderer::setupSolidTriangles(int unit, int count)
1491 Triangle *triangle = triangleBatch[unit];
1492 Primitive *primitive = primitiveBatch[unit];
1494 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1495 SetupProcessor::State &state = draw.setupState;
1496 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1498 int ms = state.multiSample;
1499 int pos = state.positionRegister;
1500 const DrawData *data = draw.data;
1503 for(int i = 0; i < count; i++, triangle++)
1505 Vertex &v0 = triangle->v0;
1506 Vertex &v1 = triangle->v1;
1507 Vertex &v2 = triangle->v2;
1509 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1511 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1513 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1515 if(clipFlagsOr != Clipper::CLIP_FINITE)
1517 if(!clipper->clip(polygon, clipFlagsOr, draw))
1523 if(setupRoutine(primitive, triangle, &polygon, data))
1534 int Renderer::setupWireframeTriangle(int unit, int count)
1536 Triangle *triangle = triangleBatch[unit];
1537 Primitive *primitive = primitiveBatch[unit];
1540 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1541 SetupProcessor::State &state = draw.setupState;
1543 const Vertex &v0 = triangle[0].v0;
1544 const Vertex &v1 = triangle[0].v1;
1545 const Vertex &v2 = triangle[0].v2;
1547 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1549 if(state.cullMode == CULL_CLOCKWISE)
1551 if(d >= 0) return 0;
1553 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1555 if(d <= 0) return 0;
1559 triangle[1].v0 = v1;
1560 triangle[1].v1 = v2;
1561 triangle[2].v0 = v2;
1562 triangle[2].v1 = v0;
1564 if(state.color[0][0].flat) // FIXME
1566 for(int i = 0; i < 2; i++)
1568 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1569 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1570 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1571 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1575 for(int i = 0; i < 3; i++)
1577 if(setupLine(*primitive, *triangle, draw))
1579 primitive->area = 0.5f * d;
1591 int Renderer::setupVertexTriangle(int unit, int count)
1593 Triangle *triangle = triangleBatch[unit];
1594 Primitive *primitive = primitiveBatch[unit];
1597 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1598 SetupProcessor::State &state = draw.setupState;
1600 const Vertex &v0 = triangle[0].v0;
1601 const Vertex &v1 = triangle[0].v1;
1602 const Vertex &v2 = triangle[0].v2;
1604 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1606 if(state.cullMode == CULL_CLOCKWISE)
1608 if(d >= 0) return 0;
1610 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1612 if(d <= 0) return 0;
1616 triangle[1].v0 = v1;
1617 triangle[2].v0 = v2;
1619 for(int i = 0; i < 3; i++)
1621 if(setupPoint(*primitive, *triangle, draw))
1623 primitive->area = 0.5f * d;
1635 int Renderer::setupLines(int unit, int count)
1637 Triangle *triangle = triangleBatch[unit];
1638 Primitive *primitive = primitiveBatch[unit];
1641 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1642 SetupProcessor::State &state = draw.setupState;
1644 int ms = state.multiSample;
1646 for(int i = 0; i < count; i++)
1648 if(setupLine(*primitive, *triangle, draw))
1660 int Renderer::setupPoints(int unit, int count)
1662 Triangle *triangle = triangleBatch[unit];
1663 Primitive *primitive = primitiveBatch[unit];
1666 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1667 SetupProcessor::State &state = draw.setupState;
1669 int ms = state.multiSample;
1671 for(int i = 0; i < count; i++)
1673 if(setupPoint(*primitive, *triangle, draw))
1685 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1687 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1688 const SetupProcessor::State &state = draw.setupState;
1689 const DrawData &data = *draw.data;
1691 float lineWidth = data.lineWidth;
1693 Vertex &v0 = triangle.v0;
1694 Vertex &v1 = triangle.v1;
1696 int pos = state.positionRegister;
1698 const float4 &P0 = v0.v[pos];
1699 const float4 &P1 = v1.v[pos];
1701 if(P0.w <= 0 && P1.w <= 0)
1706 const float W = data.Wx16[0] * (1.0f / 16.0f);
1707 const float H = data.Hx16[0] * (1.0f / 16.0f);
1709 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1710 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1712 if(dx == 0 && dy == 0)
1717 if(false) // Rectangle
1727 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1732 float dx0w = dx * P0.w / W;
1733 float dy0h = dy * P0.w / H;
1734 float dx0h = dx * P0.w / H;
1735 float dy0w = dy * P0.w / W;
1737 float dx1w = dx * P1.w / W;
1738 float dy1h = dy * P1.w / H;
1739 float dx1h = dx * P1.w / H;
1740 float dy1w = dy * P1.w / W;
1742 P[0].x += -dy0w + -dx0w;
1743 P[0].y += -dx0h + +dy0h;
1744 C[0] = clipper->computeClipFlags(P[0]);
1746 P[1].x += -dy1w + +dx1w;
1747 P[1].y += -dx1h + +dy1h;
1748 C[1] = clipper->computeClipFlags(P[1]);
1750 P[2].x += +dy1w + +dx1w;
1751 P[2].y += +dx1h + -dy1h;
1752 C[2] = clipper->computeClipFlags(P[2]);
1754 P[3].x += +dy0w + -dx0w;
1755 P[3].y += +dx0h + +dy0h;
1756 C[3] = clipper->computeClipFlags(P[3]);
1758 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1760 Polygon polygon(P, 4);
1762 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1764 if(clipFlagsOr != Clipper::CLIP_FINITE)
1766 if(!clipper->clip(polygon, clipFlagsOr, draw))
1772 return setupRoutine(&primitive, &triangle, &polygon, &data);
1775 else // Diamond test convention
1789 float dx0 = lineWidth * 0.5f * P0.w / W;
1790 float dy0 = lineWidth * 0.5f * P0.w / H;
1792 float dx1 = lineWidth * 0.5f * P1.w / W;
1793 float dy1 = lineWidth * 0.5f * P1.w / H;
1796 C[0] = clipper->computeClipFlags(P[0]);
1799 C[1] = clipper->computeClipFlags(P[1]);
1802 C[2] = clipper->computeClipFlags(P[2]);
1805 C[3] = clipper->computeClipFlags(P[3]);
1808 C[4] = clipper->computeClipFlags(P[4]);
1811 C[5] = clipper->computeClipFlags(P[5]);
1814 C[6] = clipper->computeClipFlags(P[6]);
1817 C[7] = clipper->computeClipFlags(P[7]);
1819 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1825 if(dx > dy) // Right
1866 Polygon polygon(L, 6);
1868 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1870 if(clipFlagsOr != Clipper::CLIP_FINITE)
1872 if(!clipper->clip(polygon, clipFlagsOr, draw))
1878 return setupRoutine(&primitive, &triangle, &polygon, &data);
1885 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1887 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1888 const SetupProcessor::State &state = draw.setupState;
1889 const DrawData &data = *draw.data;
1891 Vertex &v = triangle.v0;
1895 int pts = state.pointSizeRegister;
1897 if(state.pointSizeRegister != Unused)
1903 pSize = data.point.pointSize[0];
1906 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1911 int pos = state.positionRegister;
1918 const float X = pSize * P[0].w * data.halfPixelX[0];
1919 const float Y = pSize * P[0].w * data.halfPixelY[0];
1923 C[0] = clipper->computeClipFlags(P[0]);
1927 C[1] = clipper->computeClipFlags(P[1]);
1931 C[2] = clipper->computeClipFlags(P[2]);
1935 C[3] = clipper->computeClipFlags(P[3]);
1937 triangle.v1 = triangle.v0;
1938 triangle.v2 = triangle.v0;
1940 triangle.v1.X += iround(16 * 0.5f * pSize);
1941 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1943 Polygon polygon(P, 4);
1945 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1947 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1949 if(clipFlagsOr != Clipper::CLIP_FINITE)
1951 if(!clipper->clip(polygon, clipFlagsOr, draw))
1957 return setupRoutine(&primitive, &triangle, &polygon, &data);
1963 void Renderer::initializeThreads()
1965 unitCount = ceilPow2(threadCount);
1966 clusterCount = ceilPow2(threadCount);
1968 for(int i = 0; i < unitCount; i++)
1970 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1971 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1974 for(int i = 0; i < threadCount; i++)
1976 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1977 vertexTask[i]->vertexCache.drawCall = -1;
1979 task[i].type = Task::SUSPEND;
1981 resume[i] = new Event();
1982 suspend[i] = new Event();
1984 Parameters parameters;
1985 parameters.threadIndex = i;
1986 parameters.renderer = this;
1988 exitThreads = false;
1989 worker[i] = new Thread(threadFunction, ¶meters);
1992 suspend[i]->signal();
1996 void Renderer::terminateThreads()
1998 while(threadsAwake != 0)
2003 for(int thread = 0; thread < threadCount; thread++)
2008 resume[thread]->signal();
2009 worker[thread]->join();
2011 delete worker[thread];
2013 delete resume[thread];
2015 delete suspend[thread];
2016 suspend[thread] = 0;
2019 deallocate(vertexTask[thread]);
2020 vertexTask[thread] = 0;
2023 for(int i = 0; i < 16; i++)
2025 deallocate(triangleBatch[i]);
2026 triangleBatch[i] = 0;
2028 deallocate(primitiveBatch[i]);
2029 primitiveBatch[i] = 0;
2033 void Renderer::loadConstants(const VertexShader *vertexShader)
2035 if(!vertexShader) return;
2037 size_t count = vertexShader->getLength();
2039 for(size_t i = 0; i < count; i++)
2041 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2043 if(instruction->opcode == Shader::OPCODE_DEF)
2045 int index = instruction->dst.index;
2048 value[0] = instruction->src[0].value[0];
2049 value[1] = instruction->src[0].value[1];
2050 value[2] = instruction->src[0].value[2];
2051 value[3] = instruction->src[0].value[3];
2053 setVertexShaderConstantF(index, value);
2055 else if(instruction->opcode == Shader::OPCODE_DEFI)
2057 int index = instruction->dst.index;
2060 integer[0] = instruction->src[0].integer[0];
2061 integer[1] = instruction->src[0].integer[1];
2062 integer[2] = instruction->src[0].integer[2];
2063 integer[3] = instruction->src[0].integer[3];
2065 setVertexShaderConstantI(index, integer);
2067 else if(instruction->opcode == Shader::OPCODE_DEFB)
2069 int index = instruction->dst.index;
2070 int boolean = instruction->src[0].boolean[0];
2072 setVertexShaderConstantB(index, &boolean);
2077 void Renderer::loadConstants(const PixelShader *pixelShader)
2079 if(!pixelShader) return;
2081 size_t count = pixelShader->getLength();
2083 for(size_t i = 0; i < count; i++)
2085 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2087 if(instruction->opcode == Shader::OPCODE_DEF)
2089 int index = instruction->dst.index;
2092 value[0] = instruction->src[0].value[0];
2093 value[1] = instruction->src[0].value[1];
2094 value[2] = instruction->src[0].value[2];
2095 value[3] = instruction->src[0].value[3];
2097 setPixelShaderConstantF(index, value);
2099 else if(instruction->opcode == Shader::OPCODE_DEFI)
2101 int index = instruction->dst.index;
2104 integer[0] = instruction->src[0].integer[0];
2105 integer[1] = instruction->src[0].integer[1];
2106 integer[2] = instruction->src[0].integer[2];
2107 integer[3] = instruction->src[0].integer[3];
2109 setPixelShaderConstantI(index, integer);
2111 else if(instruction->opcode == Shader::OPCODE_DEFB)
2113 int index = instruction->dst.index;
2114 int boolean = instruction->src[0].boolean[0];
2116 setPixelShaderConstantB(index, &boolean);
2121 void Renderer::setIndexBuffer(Resource *indexBuffer)
2123 context->indexBuffer = indexBuffer;
2126 void Renderer::setMultiSampleMask(unsigned int mask)
2128 context->sampleMask = mask;
2131 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2133 sw::transparencyAntialiasing = transparencyAntialiasing;
2136 bool Renderer::isReadWriteTexture(int sampler)
2138 for(int index = 0; index < RENDERTARGETS; index++)
2140 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2146 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2154 void Renderer::updateClipper()
2156 if(updateClipPlanes)
2158 if(VertexProcessor::isFixedFunction()) // User plane in world space
2160 const Matrix &scissorWorld = getViewTransform();
2162 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2163 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2164 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2165 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2166 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2167 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2169 else // User plane in clip space
2171 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2172 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2173 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2174 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2175 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2176 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2179 updateClipPlanes = false;
2183 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2185 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2187 context->texture[sampler] = resource;
2190 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2192 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2194 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2197 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2199 if(type == SAMPLER_PIXEL)
2201 PixelProcessor::setTextureFilter(sampler, textureFilter);
2205 VertexProcessor::setTextureFilter(sampler, textureFilter);
2209 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2211 if(type == SAMPLER_PIXEL)
2213 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2217 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2221 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2223 if(type == SAMPLER_PIXEL)
2225 PixelProcessor::setGatherEnable(sampler, enable);
2229 VertexProcessor::setGatherEnable(sampler, enable);
2233 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2235 if(type == SAMPLER_PIXEL)
2237 PixelProcessor::setAddressingModeU(sampler, addressMode);
2241 VertexProcessor::setAddressingModeU(sampler, addressMode);
2245 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2247 if(type == SAMPLER_PIXEL)
2249 PixelProcessor::setAddressingModeV(sampler, addressMode);
2253 VertexProcessor::setAddressingModeV(sampler, addressMode);
2257 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2259 if(type == SAMPLER_PIXEL)
2261 PixelProcessor::setAddressingModeW(sampler, addressMode);
2265 VertexProcessor::setAddressingModeW(sampler, addressMode);
2269 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2271 if(type == SAMPLER_PIXEL)
2273 PixelProcessor::setReadSRGB(sampler, sRGB);
2277 VertexProcessor::setReadSRGB(sampler, sRGB);
2281 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2283 if(type == SAMPLER_PIXEL)
2285 PixelProcessor::setMipmapLOD(sampler, bias);
2289 VertexProcessor::setMipmapLOD(sampler, bias);
2293 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2295 if(type == SAMPLER_PIXEL)
2297 PixelProcessor::setBorderColor(sampler, borderColor);
2301 VertexProcessor::setBorderColor(sampler, borderColor);
2305 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2307 if(type == SAMPLER_PIXEL)
2309 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2313 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2317 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2319 if(type == SAMPLER_PIXEL)
2321 PixelProcessor::setSwizzleR(sampler, swizzleR);
2325 VertexProcessor::setSwizzleR(sampler, swizzleR);
2329 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2331 if(type == SAMPLER_PIXEL)
2333 PixelProcessor::setSwizzleG(sampler, swizzleG);
2337 VertexProcessor::setSwizzleG(sampler, swizzleG);
2341 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2343 if(type == SAMPLER_PIXEL)
2345 PixelProcessor::setSwizzleB(sampler, swizzleB);
2349 VertexProcessor::setSwizzleB(sampler, swizzleB);
2353 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2355 if(type == SAMPLER_PIXEL)
2357 PixelProcessor::setSwizzleA(sampler, swizzleA);
2361 VertexProcessor::setSwizzleA(sampler, swizzleA);
2365 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2367 if(type == SAMPLER_PIXEL)
2369 PixelProcessor::setBaseLevel(sampler, baseLevel);
2373 VertexProcessor::setBaseLevel(sampler, baseLevel);
2377 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2379 if(type == SAMPLER_PIXEL)
2381 PixelProcessor::setMaxLevel(sampler, maxLevel);
2385 VertexProcessor::setMaxLevel(sampler, maxLevel);
2389 void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2391 if(type == SAMPLER_PIXEL)
2393 PixelProcessor::setMinLod(sampler, minLod);
2397 VertexProcessor::setMinLod(sampler, minLod);
2401 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2403 if(type == SAMPLER_PIXEL)
2405 PixelProcessor::setMaxLod(sampler, maxLod);
2409 VertexProcessor::setMaxLod(sampler, maxLod);
2413 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2415 context->setPointSpriteEnable(pointSpriteEnable);
2418 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2420 context->setPointScaleEnable(pointScaleEnable);
2423 void Renderer::setLineWidth(float width)
2425 context->lineWidth = width;
2428 void Renderer::setDepthBias(float bias)
2433 void Renderer::setSlopeDepthBias(float slopeBias)
2435 slopeDepthBias = slopeBias;
2438 void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2440 context->rasterizerDiscard = rasterizerDiscard;
2443 void Renderer::setPixelShader(const PixelShader *shader)
2445 context->pixelShader = shader;
2447 loadConstants(shader);
2450 void Renderer::setVertexShader(const VertexShader *shader)
2452 context->vertexShader = shader;
2454 loadConstants(shader);
2457 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2459 for(int i = 0; i < DRAW_COUNT; i++)
2461 if(drawCall[i]->psDirtyConstF < index + count)
2463 drawCall[i]->psDirtyConstF = index + count;
2467 for(int i = 0; i < count; i++)
2469 PixelProcessor::setFloatConstant(index + i, value);
2474 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2476 for(int i = 0; i < DRAW_COUNT; i++)
2478 if(drawCall[i]->psDirtyConstI < index + count)
2480 drawCall[i]->psDirtyConstI = index + count;
2484 for(int i = 0; i < count; i++)
2486 PixelProcessor::setIntegerConstant(index + i, value);
2491 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2493 for(int i = 0; i < DRAW_COUNT; i++)
2495 if(drawCall[i]->psDirtyConstB < index + count)
2497 drawCall[i]->psDirtyConstB = index + count;
2501 for(int i = 0; i < count; i++)
2503 PixelProcessor::setBooleanConstant(index + i, *boolean);
2508 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2510 for(int i = 0; i < DRAW_COUNT; i++)
2512 if(drawCall[i]->vsDirtyConstF < index + count)
2514 drawCall[i]->vsDirtyConstF = index + count;
2518 for(int i = 0; i < count; i++)
2520 VertexProcessor::setFloatConstant(index + i, value);
2525 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2527 for(int i = 0; i < DRAW_COUNT; i++)
2529 if(drawCall[i]->vsDirtyConstI < index + count)
2531 drawCall[i]->vsDirtyConstI = index + count;
2535 for(int i = 0; i < count; i++)
2537 VertexProcessor::setIntegerConstant(index + i, value);
2542 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2544 for(int i = 0; i < DRAW_COUNT; i++)
2546 if(drawCall[i]->vsDirtyConstB < index + count)
2548 drawCall[i]->vsDirtyConstB = index + count;
2552 for(int i = 0; i < count; i++)
2554 VertexProcessor::setBooleanConstant(index + i, *boolean);
2559 void Renderer::setModelMatrix(const Matrix &M, int i)
2561 VertexProcessor::setModelMatrix(M, i);
2564 void Renderer::setViewMatrix(const Matrix &V)
2566 VertexProcessor::setViewMatrix(V);
2567 updateClipPlanes = true;
2570 void Renderer::setBaseMatrix(const Matrix &B)
2572 VertexProcessor::setBaseMatrix(B);
2573 updateClipPlanes = true;
2576 void Renderer::setProjectionMatrix(const Matrix &P)
2578 VertexProcessor::setProjectionMatrix(P);
2579 updateClipPlanes = true;
2582 void Renderer::addQuery(Query *query)
2584 queries.push_back(query);
2587 void Renderer::removeQuery(Query *query)
2589 queries.remove(query);
2593 int Renderer::getThreadCount()
2598 int64_t Renderer::getVertexTime(int thread)
2600 return vertexTime[thread];
2603 int64_t Renderer::getSetupTime(int thread)
2605 return setupTime[thread];
2608 int64_t Renderer::getPixelTime(int thread)
2610 return pixelTime[thread];
2613 void Renderer::resetTimers()
2615 for(int thread = 0; thread < threadCount; thread++)
2617 vertexTime[thread] = 0;
2618 setupTime[thread] = 0;
2619 pixelTime[thread] = 0;
2624 void Renderer::setViewport(const Viewport &viewport)
2626 this->viewport = viewport;
2629 void Renderer::setScissor(const Rect &scissor)
2631 this->scissor = scissor;
2634 void Renderer::setClipFlags(int flags)
2636 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2639 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2641 if(index < MAX_CLIP_PLANES)
2643 userPlane[index] = plane;
2647 updateClipPlanes = true;
2650 void Renderer::updateConfiguration(bool initialUpdate)
2652 bool newConfiguration = swiftConfig->hasNewConfiguration();
2654 if(newConfiguration || initialUpdate)
2658 SwiftConfig::Configuration configuration = {};
2659 swiftConfig->getConfiguration(configuration);
2661 precacheVertex = !newConfiguration && configuration.precache;
2662 precacheSetup = !newConfiguration && configuration.precache;
2663 precachePixel = !newConfiguration && configuration.precache;
2665 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2666 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2667 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2669 switch(configuration.textureSampleQuality)
2671 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2672 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2673 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2674 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2677 switch(configuration.mipmapQuality)
2679 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2680 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2681 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2684 setPerspectiveCorrection(configuration.perspectiveCorrection);
2686 switch(configuration.transcendentalPrecision)
2689 logPrecision = APPROXIMATE;
2690 expPrecision = APPROXIMATE;
2691 rcpPrecision = APPROXIMATE;
2692 rsqPrecision = APPROXIMATE;
2695 logPrecision = PARTIAL;
2696 expPrecision = PARTIAL;
2697 rcpPrecision = PARTIAL;
2698 rsqPrecision = PARTIAL;
2701 logPrecision = ACCURATE;
2702 expPrecision = ACCURATE;
2703 rcpPrecision = ACCURATE;
2704 rsqPrecision = ACCURATE;
2707 logPrecision = WHQL;
2708 expPrecision = WHQL;
2709 rcpPrecision = WHQL;
2710 rsqPrecision = WHQL;
2713 logPrecision = IEEE;
2714 expPrecision = IEEE;
2715 rcpPrecision = IEEE;
2716 rsqPrecision = IEEE;
2719 logPrecision = ACCURATE;
2720 expPrecision = ACCURATE;
2721 rcpPrecision = ACCURATE;
2722 rsqPrecision = ACCURATE;
2726 switch(configuration.transparencyAntialiasing)
2728 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2729 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2730 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2733 switch(configuration.threadCount)
2735 case -1: threadCount = CPUID::coreCount(); break;
2736 case 0: threadCount = CPUID::processAffinity(); break;
2737 default: threadCount = configuration.threadCount; break;
2740 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2741 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2742 CPUID::setEnableSSE3(configuration.enableSSE3);
2743 CPUID::setEnableSSE2(configuration.enableSSE2);
2744 CPUID::setEnableSSE(configuration.enableSSE);
2746 for(int pass = 0; pass < 10; pass++)
2748 optimization[pass] = configuration.optimization[pass];
2751 forceWindowed = configuration.forceWindowed;
2752 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2753 postBlendSRGB = configuration.postBlendSRGB;
2754 exactColorRounding = configuration.exactColorRounding;
2755 forceClearRegisters = configuration.forceClearRegisters;
2758 minPrimitives = configuration.minPrimitives;
2759 maxPrimitives = configuration.maxPrimitives;
2763 if(!initialUpdate && !worker[0])
2765 initializeThreads();