1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Renderer.hpp"
17 #include "Clipper.hpp"
19 #include "FrameBuffer.hpp"
21 #include "Surface.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
32 #include "Reactor/Reactor.hpp"
36 bool disableServer = true;
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
47 extern bool booleanFaceRegister;
48 extern bool fullPixelPositionRegister;
49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
50 extern bool secondaryColor; // Specular lighting is applied after texturing
52 extern bool forceWindowed;
53 extern bool complementaryDepthBuffer;
54 extern bool postBlendSRGB;
55 extern bool exactColorRounding;
56 extern TransparencyAntialiasing transparencyAntialiasing;
57 extern bool forceClearRegisters;
59 extern bool precacheVertex;
60 extern bool precacheSetup;
61 extern bool precachePixel;
68 TranscendentalPrecision logPrecision = ACCURATE;
69 TranscendentalPrecision expPrecision = ACCURATE;
70 TranscendentalPrecision rcpPrecision = ACCURATE;
71 TranscendentalPrecision rsqPrecision = ACCURATE;
72 bool perspectiveCorrection = true;
84 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
88 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
94 data = (DrawData*)allocate(sizeof(DrawData));
95 data->constants = &constants;
105 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
107 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109 sw::booleanFaceRegister = conventions.booleanFaceRegister;
110 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111 sw::leadingVertexFirst = conventions.leadingVertexFirst;
112 sw::secondaryColor = conventions.secondaryColor;
113 sw::exactColorRounding = exactColorRounding;
115 setRenderTarget(0, 0);
116 clipper = new Clipper(symmetricNormalizedDepth);
118 updateViewMatrix = true;
119 updateBaseMatrix = true;
120 updateProjectionMatrix = true;
121 updateClipPlanes = true;
127 for(int i = 0; i < 16; i++)
137 resumeApp = new Event();
145 for(int i = 0; i < 16; i++)
147 triangleBatch[i] = 0;
148 primitiveBatch[i] = 0;
151 for(int draw = 0; draw < DRAW_COUNT; draw++)
153 drawCall[draw] = new DrawCall();
154 drawList[draw] = drawCall[draw];
157 for(int unit = 0; unit < 16; unit++)
159 primitiveProgress[unit].init();
162 for(int cluster = 0; cluster < 16; cluster++)
164 pixelProgress[cluster].init();
169 swiftConfig = new SwiftConfig(disableServer);
170 updateConfiguration(true);
172 sync = new Resource(0);
175 Renderer::~Renderer()
185 for(int draw = 0; draw < DRAW_COUNT; draw++)
187 delete drawCall[draw];
193 void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
195 blitter.clear(pixel, format, dest, dRect, rgbaMask);
198 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
200 blitter.blit(source, sRect, dest, dRect, filter);
203 void Renderer::blit3D(Surface *source, Surface *dest)
205 blitter.blit3D(source, dest);
208 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
211 if(count < minPrimitives || count > maxPrimitives)
217 context->drawType = drawType;
219 updateConfiguration();
222 int ss = context->getSuperSampleCount();
223 int ms = context->getMultiSampleCount();
225 for(int q = 0; q < ss; q++)
227 unsigned int oldMultiSampleMask = context->multiSampleMask;
228 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
230 if(!context->multiSampleMask)
235 sync->lock(sw::PRIVATE);
237 if(update || oldMultiSampleMask != context->multiSampleMask)
239 vertexState = VertexProcessor::update(drawType);
240 setupState = SetupProcessor::update();
241 pixelState = PixelProcessor::update();
243 vertexRoutine = VertexProcessor::routine(vertexState);
244 setupRoutine = SetupProcessor::routine(setupState);
245 pixelRoutine = PixelProcessor::routine(pixelState);
248 int batch = batchSize / ms;
250 int (Renderer::*setupPrimitives)(int batch, int count);
252 if(context->isDrawTriangle())
254 switch(context->fillMode)
257 setupPrimitives = &Renderer::setupSolidTriangles;
260 setupPrimitives = &Renderer::setupWireframeTriangle;
264 setupPrimitives = &Renderer::setupVertexTriangle;
267 default: ASSERT(false);
270 else if(context->isDrawLine())
272 setupPrimitives = &Renderer::setupLines;
276 setupPrimitives = &Renderer::setupPoints;
283 for(int i = 0; i < DRAW_COUNT; i++)
285 if(drawCall[i]->references == -1)
288 drawList[nextDraw % DRAW_COUNT] = draw;
301 DrawData *data = draw->data;
303 if(queries.size() != 0)
305 draw->queries = new std::list<Query*>();
306 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
307 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
310 if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
312 atomicIncrement(&(q->reference));
313 draw->queries->push_back(q);
318 draw->drawType = drawType;
319 draw->batchSize = batch;
321 vertexRoutine->bind();
322 setupRoutine->bind();
323 pixelRoutine->bind();
325 draw->vertexRoutine = vertexRoutine;
326 draw->setupRoutine = setupRoutine;
327 draw->pixelRoutine = pixelRoutine;
328 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
329 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
330 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
331 draw->setupPrimitives = setupPrimitives;
332 draw->setupState = setupState;
334 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
336 draw->vertexStream[i] = context->input[i].resource;
337 data->input[i] = context->input[i].buffer;
338 data->stride[i] = context->input[i].stride;
340 if(draw->vertexStream[i])
342 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
346 if(context->indexBuffer)
348 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
351 draw->indexBuffer = context->indexBuffer;
353 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
355 draw->texture[sampler] = 0;
358 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
360 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
362 draw->texture[sampler] = context->texture[sampler];
363 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
365 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
369 if(context->pixelShader)
371 if(draw->psDirtyConstF)
373 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
374 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
375 draw->psDirtyConstF = 0;
378 if(draw->psDirtyConstI)
380 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
381 draw->psDirtyConstI = 0;
384 if(draw->psDirtyConstB)
386 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
387 draw->psDirtyConstB = 0;
390 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
394 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
396 draw->pUniformBuffers[i] = nullptr;
400 if(context->pixelShaderVersion() <= 0x0104)
402 for(int stage = 0; stage < 8; stage++)
404 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
406 data->textureStage[stage] = context->textureStage[stage].uniforms;
412 if(context->vertexShader)
414 if(context->vertexShader->getVersion() >= 0x0300)
416 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
418 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
420 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
421 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
423 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
428 if(draw->vsDirtyConstF)
430 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
431 draw->vsDirtyConstF = 0;
434 if(draw->vsDirtyConstI)
436 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
437 draw->vsDirtyConstI = 0;
440 if(draw->vsDirtyConstB)
442 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
443 draw->vsDirtyConstB = 0;
446 if(context->vertexShader->instanceIdDeclared)
448 data->instanceID = context->instanceID;
451 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
452 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
458 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
459 draw->vsDirtyConstI = 16;
460 draw->vsDirtyConstB = 16;
462 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
464 draw->vUniformBuffers[i] = nullptr;
467 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
469 draw->transformFeedbackBuffers[i] = nullptr;
473 if(pixelState.stencilActive)
475 data->stencil[0] = stencil;
476 data->stencil[1] = stencilCCW;
479 if(pixelState.fogActive)
484 if(setupState.isDrawPoint)
489 data->lineWidth = context->lineWidth;
491 data->factor = factor;
493 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
495 float ref = context->alphaReference * (1.0f / 255.0f);
496 float margin = sw::min(ref, 1.0f - ref);
500 data->a2c0 = replicate(ref - margin * 0.6f);
501 data->a2c1 = replicate(ref - margin * 0.2f);
502 data->a2c2 = replicate(ref + margin * 0.2f);
503 data->a2c3 = replicate(ref + margin * 0.6f);
507 data->a2c0 = replicate(ref - margin * 0.3f);
508 data->a2c1 = replicate(ref + margin * 0.3f);
513 if(pixelState.occlusionEnabled)
515 for(int cluster = 0; cluster < clusterCount; cluster++)
517 data->occlusion[cluster] = 0;
522 for(int cluster = 0; cluster < clusterCount; cluster++)
524 for(int i = 0; i < PERF_TIMERS; i++)
526 data->cycles[i][cluster] = 0;
533 float W = 0.5f * viewport.width;
534 float H = 0.5f * viewport.height;
535 float X0 = viewport.x0 + W;
536 float Y0 = viewport.y0 + H;
537 float N = viewport.minZ;
538 float F = viewport.maxZ;
541 if(context->isDrawTriangle(false))
546 if(complementaryDepthBuffer)
552 static const float X[5][16] = // Fragment offsets
554 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
555 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
556 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
557 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
558 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
561 static const float Y[5][16] = // Fragment offsets
563 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
564 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
565 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
566 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
567 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
570 int s = sw::log2(ss);
572 data->Wx16 = replicate(W * 16);
573 data->Hx16 = replicate(H * 16);
574 data->X0x16 = replicate(X0 * 16 - 8);
575 data->Y0x16 = replicate(Y0 * 16 - 8);
576 data->XXXX = replicate(X[s][q] / W);
577 data->YYYY = replicate(Y[s][q] / H);
578 data->halfPixelX = replicate(0.5f / W);
579 data->halfPixelY = replicate(0.5f / H);
580 data->viewportHeight = abs(viewport.height);
581 data->slopeDepthBias = slopeDepthBias;
582 data->depthRange = Z;
584 draw->clipFlags = clipFlags;
588 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
589 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
590 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
591 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
592 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
593 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
599 for(int index = 0; index < RENDERTARGETS; index++)
601 draw->renderTarget[index] = context->renderTarget[index];
603 if(draw->renderTarget[index])
605 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
606 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
607 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
611 draw->depthBuffer = context->depthBuffer;
612 draw->stencilBuffer = context->stencilBuffer;
614 if(draw->depthBuffer)
616 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
617 data->depthPitchB = context->depthBuffer->getInternalPitchB();
618 data->depthSliceB = context->depthBuffer->getInternalSliceB();
621 if(draw->stencilBuffer)
623 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED);
624 data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
625 data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
631 data->scissorX0 = scissor.x0;
632 data->scissorX1 = scissor.x1;
633 data->scissorY0 = scissor.y0;
634 data->scissorY1 = scissor.y1;
640 draw->references = (count + batch - 1) / batch;
642 schedulerMutex.lock();
644 schedulerMutex.unlock();
653 task[0].type = Task::RESUME;
658 else // Use main thread for draw execution
661 task[0].type = Task::RESUME;
668 void Renderer::threadFunction(void *parameters)
670 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
671 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
673 if(logPrecision < IEEE)
675 CPUID::setFlushToZero(true);
676 CPUID::setDenormalsAreZero(true);
679 renderer->threadLoop(threadIndex);
682 void Renderer::threadLoop(int threadIndex)
686 taskLoop(threadIndex);
688 suspend[threadIndex]->signal();
689 resume[threadIndex]->wait();
693 void Renderer::taskLoop(int threadIndex)
695 while(task[threadIndex].type != Task::SUSPEND)
697 scheduleTask(threadIndex);
698 executeTask(threadIndex);
702 void Renderer::findAvailableTasks()
705 for(int cluster = 0; cluster < clusterCount; cluster++)
707 if(!pixelProgress[cluster].executing)
709 for(int unit = 0; unit < unitCount; unit++)
711 if(primitiveProgress[unit].references > 0) // Contains processed primitives
713 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
715 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
717 Task &task = taskQueue[qHead];
718 task.type = Task::PIXELS;
719 task.primitiveUnit = unit;
720 task.pixelCluster = cluster;
722 pixelProgress[cluster].executing = true;
724 // Commit to the task queue
725 qHead = (qHead + 1) % 32;
736 // Find primitive tasks
737 if(currentDraw == nextDraw)
739 return; // No more primitives to process
742 for(int unit = 0; unit < unitCount; unit++)
744 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
746 if(draw->primitive >= draw->count)
750 if(currentDraw == nextDraw)
752 return; // No more primitives to process
755 draw = drawList[currentDraw % DRAW_COUNT];
758 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
760 int primitive = draw->primitive;
761 int count = draw->count;
762 int batch = draw->batchSize;
764 primitiveProgress[unit].drawCall = currentDraw;
765 primitiveProgress[unit].firstPrimitive = primitive;
766 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
768 draw->primitive += batch;
770 Task &task = taskQueue[qHead];
771 task.type = Task::PRIMITIVES;
772 task.primitiveUnit = unit;
774 primitiveProgress[unit].references = -1;
776 // Commit to the task queue
777 qHead = (qHead + 1) % 32;
783 void Renderer::scheduleTask(int threadIndex)
785 schedulerMutex.lock();
787 if((int)qSize < threadCount - threadsAwake + 1)
789 findAvailableTasks();
794 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
797 if(threadsAwake != threadCount)
799 int wakeup = qSize - threadsAwake + 1;
801 for(int i = 0; i < threadCount && wakeup > 0; i++)
803 if(task[i].type == Task::SUSPEND)
806 task[i].type = Task::RESUME;
817 task[threadIndex].type = Task::SUSPEND;
822 schedulerMutex.unlock();
825 void Renderer::executeTask(int threadIndex)
828 int64_t startTick = Timer::ticks();
831 switch(task[threadIndex].type)
833 case Task::PRIMITIVES:
835 int unit = task[threadIndex].primitiveUnit;
837 int input = primitiveProgress[unit].firstPrimitive;
838 int count = primitiveProgress[unit].primitiveCount;
839 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
840 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
842 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
845 int64_t time = Timer::ticks();
846 vertexTime[threadIndex] += time - startTick;
852 if(!draw->setupState.rasterizerDiscard)
854 visible = (this->*setupPrimitives)(unit, count);
857 primitiveProgress[unit].visible = visible;
858 primitiveProgress[unit].references = clusterCount;
861 setupTime[threadIndex] += Timer::ticks() - startTick;
867 int unit = task[threadIndex].primitiveUnit;
868 int visible = primitiveProgress[unit].visible;
872 int cluster = task[threadIndex].pixelCluster;
873 Primitive *primitive = primitiveBatch[unit];
874 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
875 DrawData *data = draw->data;
876 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
878 pixelRoutine(primitive, visible, cluster, data);
881 finishRendering(task[threadIndex]);
884 pixelTime[threadIndex] += Timer::ticks() - startTick;
897 void Renderer::synchronize()
899 sync->lock(sw::PUBLIC);
903 void Renderer::finishRendering(Task &pixelTask)
905 int unit = pixelTask.primitiveUnit;
906 int cluster = pixelTask.pixelCluster;
908 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
909 DrawData &data = *draw.data;
910 int primitive = primitiveProgress[unit].firstPrimitive;
911 int count = primitiveProgress[unit].primitiveCount;
912 int processedPrimitives = primitive + count;
914 pixelProgress[cluster].processedPrimitives = processedPrimitives;
916 if(pixelProgress[cluster].processedPrimitives >= draw.count)
918 pixelProgress[cluster].drawCall++;
919 pixelProgress[cluster].processedPrimitives = 0;
922 int ref = atomicDecrement(&primitiveProgress[unit].references);
926 ref = atomicDecrement(&draw.references);
931 for(int cluster = 0; cluster < clusterCount; cluster++)
933 for(int i = 0; i < PERF_TIMERS; i++)
935 profiler.cycles[i] += data.cycles[i][cluster];
942 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
948 case Query::FRAGMENTS_PASSED:
949 for(int cluster = 0; cluster < clusterCount; cluster++)
951 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
954 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
955 atomicAdd((volatile int*)&query->data, processedPrimitives);
961 atomicDecrement(&query->reference);
968 for(int i = 0; i < RENDERTARGETS; i++)
970 if(draw.renderTarget[i])
972 draw.renderTarget[i]->unlockInternal();
978 draw.depthBuffer->unlockInternal();
981 if(draw.stencilBuffer)
983 draw.stencilBuffer->unlockStencil();
986 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
990 draw.texture[i]->unlock();
994 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
996 if(draw.vertexStream[i])
998 draw.vertexStream[i]->unlock();
1002 if(draw.indexBuffer)
1004 draw.indexBuffer->unlock();
1007 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1009 if(draw.pUniformBuffers[i])
1011 draw.pUniformBuffers[i]->unlock();
1013 if(draw.vUniformBuffers[i])
1015 draw.vUniformBuffers[i]->unlock();
1019 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1021 if(draw.transformFeedbackBuffers[i])
1023 draw.transformFeedbackBuffers[i]->unlock();
1027 draw.vertexRoutine->unbind();
1028 draw.setupRoutine->unbind();
1029 draw.pixelRoutine->unbind();
1033 draw.references = -1;
1034 resumeApp->signal();
1038 pixelProgress[cluster].executing = false;
1041 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1043 Triangle *triangle = triangleBatch[unit];
1044 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1045 DrawData *data = draw->data;
1046 VertexTask *task = vertexTask[thread];
1048 const void *indices = data->indices;
1049 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1051 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1053 task->vertexCache.clear();
1054 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1057 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
1059 switch(draw->drawType)
1061 case DRAW_POINTLIST:
1063 unsigned int index = start;
1065 for(unsigned int i = 0; i < triangleCount; i++)
1067 batch[i][0] = index;
1068 batch[i][1] = index;
1069 batch[i][2] = index;
1077 unsigned int index = 2 * start;
1079 for(unsigned int i = 0; i < triangleCount; i++)
1081 batch[i][0] = index + 0;
1082 batch[i][1] = index + 1;
1083 batch[i][2] = index + 1;
1089 case DRAW_LINESTRIP:
1091 unsigned int index = start;
1093 for(unsigned int i = 0; i < triangleCount; i++)
1095 batch[i][0] = index + 0;
1096 batch[i][1] = index + 1;
1097 batch[i][2] = index + 1;
1105 unsigned int index = start;
1107 for(unsigned int i = 0; i < triangleCount; i++)
1109 batch[i][0] = (index + 0) % loop;
1110 batch[i][1] = (index + 1) % loop;
1111 batch[i][2] = (index + 1) % loop;
1117 case DRAW_TRIANGLELIST:
1119 unsigned int index = 3 * start;
1121 for(unsigned int i = 0; i < triangleCount; i++)
1123 batch[i][0] = index + 0;
1124 batch[i][1] = index + 1;
1125 batch[i][2] = index + 2;
1131 case DRAW_TRIANGLESTRIP:
1133 unsigned int index = start;
1135 for(unsigned int i = 0; i < triangleCount; i++)
1137 batch[i][0] = index + 0;
1138 batch[i][1] = index + (index & 1) + 1;
1139 batch[i][2] = index + (~index & 1) + 1;
1145 case DRAW_TRIANGLEFAN:
1147 unsigned int index = start;
1149 for(unsigned int i = 0; i < triangleCount; i++)
1151 batch[i][0] = index + 1;
1152 batch[i][1] = index + 2;
1159 case DRAW_INDEXEDPOINTLIST8:
1161 const unsigned char *index = (const unsigned char*)indices + start;
1163 for(unsigned int i = 0; i < triangleCount; i++)
1165 batch[i][0] = *index;
1166 batch[i][1] = *index;
1167 batch[i][2] = *index;
1173 case DRAW_INDEXEDPOINTLIST16:
1175 const unsigned short *index = (const unsigned short*)indices + start;
1177 for(unsigned int i = 0; i < triangleCount; i++)
1179 batch[i][0] = *index;
1180 batch[i][1] = *index;
1181 batch[i][2] = *index;
1187 case DRAW_INDEXEDPOINTLIST32:
1189 const unsigned int *index = (const unsigned int*)indices + start;
1191 for(unsigned int i = 0; i < triangleCount; i++)
1193 batch[i][0] = *index;
1194 batch[i][1] = *index;
1195 batch[i][2] = *index;
1201 case DRAW_INDEXEDLINELIST8:
1203 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1205 for(unsigned int i = 0; i < triangleCount; i++)
1207 batch[i][0] = index[0];
1208 batch[i][1] = index[1];
1209 batch[i][2] = index[1];
1215 case DRAW_INDEXEDLINELIST16:
1217 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1219 for(unsigned int i = 0; i < triangleCount; i++)
1221 batch[i][0] = index[0];
1222 batch[i][1] = index[1];
1223 batch[i][2] = index[1];
1229 case DRAW_INDEXEDLINELIST32:
1231 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1233 for(unsigned int i = 0; i < triangleCount; i++)
1235 batch[i][0] = index[0];
1236 batch[i][1] = index[1];
1237 batch[i][2] = index[1];
1243 case DRAW_INDEXEDLINESTRIP8:
1245 const unsigned char *index = (const unsigned char*)indices + start;
1247 for(unsigned int i = 0; i < triangleCount; i++)
1249 batch[i][0] = index[0];
1250 batch[i][1] = index[1];
1251 batch[i][2] = index[1];
1257 case DRAW_INDEXEDLINESTRIP16:
1259 const unsigned short *index = (const unsigned short*)indices + start;
1261 for(unsigned int i = 0; i < triangleCount; i++)
1263 batch[i][0] = index[0];
1264 batch[i][1] = index[1];
1265 batch[i][2] = index[1];
1271 case DRAW_INDEXEDLINESTRIP32:
1273 const unsigned int *index = (const unsigned int*)indices + start;
1275 for(unsigned int i = 0; i < triangleCount; i++)
1277 batch[i][0] = index[0];
1278 batch[i][1] = index[1];
1279 batch[i][2] = index[1];
1285 case DRAW_INDEXEDLINELOOP8:
1287 const unsigned char *index = (const unsigned char*)indices;
1289 for(unsigned int i = 0; i < triangleCount; i++)
1291 batch[i][0] = index[(start + i + 0) % loop];
1292 batch[i][1] = index[(start + i + 1) % loop];
1293 batch[i][2] = index[(start + i + 1) % loop];
1297 case DRAW_INDEXEDLINELOOP16:
1299 const unsigned short *index = (const unsigned short*)indices;
1301 for(unsigned int i = 0; i < triangleCount; i++)
1303 batch[i][0] = index[(start + i + 0) % loop];
1304 batch[i][1] = index[(start + i + 1) % loop];
1305 batch[i][2] = index[(start + i + 1) % loop];
1309 case DRAW_INDEXEDLINELOOP32:
1311 const unsigned int *index = (const unsigned int*)indices;
1313 for(unsigned int i = 0; i < triangleCount; i++)
1315 batch[i][0] = index[(start + i + 0) % loop];
1316 batch[i][1] = index[(start + i + 1) % loop];
1317 batch[i][2] = index[(start + i + 1) % loop];
1321 case DRAW_INDEXEDTRIANGLELIST8:
1323 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1325 for(unsigned int i = 0; i < triangleCount; i++)
1327 batch[i][0] = index[0];
1328 batch[i][1] = index[1];
1329 batch[i][2] = index[2];
1335 case DRAW_INDEXEDTRIANGLELIST16:
1337 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1339 for(unsigned int i = 0; i < triangleCount; i++)
1341 batch[i][0] = index[0];
1342 batch[i][1] = index[1];
1343 batch[i][2] = index[2];
1349 case DRAW_INDEXEDTRIANGLELIST32:
1351 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1353 for(unsigned int i = 0; i < triangleCount; i++)
1355 batch[i][0] = index[0];
1356 batch[i][1] = index[1];
1357 batch[i][2] = index[2];
1363 case DRAW_INDEXEDTRIANGLESTRIP8:
1365 const unsigned char *index = (const unsigned char*)indices + start;
1367 for(unsigned int i = 0; i < triangleCount; i++)
1369 batch[i][0] = index[0];
1370 batch[i][1] = index[((start + i) & 1) + 1];
1371 batch[i][2] = index[(~(start + i) & 1) + 1];
1377 case DRAW_INDEXEDTRIANGLESTRIP16:
1379 const unsigned short *index = (const unsigned short*)indices + start;
1381 for(unsigned int i = 0; i < triangleCount; i++)
1383 batch[i][0] = index[0];
1384 batch[i][1] = index[((start + i) & 1) + 1];
1385 batch[i][2] = index[(~(start + i) & 1) + 1];
1391 case DRAW_INDEXEDTRIANGLESTRIP32:
1393 const unsigned int *index = (const unsigned int*)indices + start;
1395 for(unsigned int i = 0; i < triangleCount; i++)
1397 batch[i][0] = index[0];
1398 batch[i][1] = index[((start + i) & 1) + 1];
1399 batch[i][2] = index[(~(start + i) & 1) + 1];
1405 case DRAW_INDEXEDTRIANGLEFAN8:
1407 const unsigned char *index = (const unsigned char*)indices;
1409 for(unsigned int i = 0; i < triangleCount; i++)
1411 batch[i][0] = index[start + i + 1];
1412 batch[i][1] = index[start + i + 2];
1413 batch[i][2] = index[0];
1417 case DRAW_INDEXEDTRIANGLEFAN16:
1419 const unsigned short *index = (const unsigned short*)indices;
1421 for(unsigned int i = 0; i < triangleCount; i++)
1423 batch[i][0] = index[start + i + 1];
1424 batch[i][1] = index[start + i + 2];
1425 batch[i][2] = index[0];
1429 case DRAW_INDEXEDTRIANGLEFAN32:
1431 const unsigned int *index = (const unsigned int*)indices;
1433 for(unsigned int i = 0; i < triangleCount; i++)
1435 batch[i][0] = index[start + i + 1];
1436 batch[i][1] = index[start + i + 2];
1437 batch[i][2] = index[0];
1443 unsigned int index = 4 * start / 2;
1445 for(unsigned int i = 0; i < triangleCount; i += 2)
1447 batch[i+0][0] = index + 0;
1448 batch[i+0][1] = index + 1;
1449 batch[i+0][2] = index + 2;
1451 batch[i+1][0] = index + 0;
1452 batch[i+1][1] = index + 2;
1453 batch[i+1][2] = index + 3;
1464 task->primitiveStart = start;
1465 task->vertexCount = triangleCount * 3;
1466 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1469 int Renderer::setupSolidTriangles(int unit, int count)
1471 Triangle *triangle = triangleBatch[unit];
1472 Primitive *primitive = primitiveBatch[unit];
1474 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1475 SetupProcessor::State &state = draw.setupState;
1476 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1478 int ms = state.multiSample;
1479 int pos = state.positionRegister;
1480 const DrawData *data = draw.data;
1483 for(int i = 0; i < count; i++, triangle++)
1485 Vertex &v0 = triangle->v0;
1486 Vertex &v1 = triangle->v1;
1487 Vertex &v2 = triangle->v2;
1489 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1491 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1493 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1495 if(clipFlagsOr != Clipper::CLIP_FINITE)
1497 if(!clipper->clip(polygon, clipFlagsOr, draw))
1503 if(setupRoutine(primitive, triangle, &polygon, data))
1514 int Renderer::setupWireframeTriangle(int unit, int count)
1516 Triangle *triangle = triangleBatch[unit];
1517 Primitive *primitive = primitiveBatch[unit];
1520 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1521 SetupProcessor::State &state = draw.setupState;
1523 const Vertex &v0 = triangle[0].v0;
1524 const Vertex &v1 = triangle[0].v1;
1525 const Vertex &v2 = triangle[0].v2;
1527 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1529 if(state.cullMode == CULL_CLOCKWISE)
1531 if(d >= 0) return 0;
1533 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1535 if(d <= 0) return 0;
1539 triangle[1].v0 = v1;
1540 triangle[1].v1 = v2;
1541 triangle[2].v0 = v2;
1542 triangle[2].v1 = v0;
1544 if(state.color[0][0].flat) // FIXME
1546 for(int i = 0; i < 2; i++)
1548 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1549 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1550 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1551 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1555 for(int i = 0; i < 3; i++)
1557 if(setupLine(*primitive, *triangle, draw))
1559 primitive->area = 0.5f * d;
1571 int Renderer::setupVertexTriangle(int unit, int count)
1573 Triangle *triangle = triangleBatch[unit];
1574 Primitive *primitive = primitiveBatch[unit];
1577 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1578 SetupProcessor::State &state = draw.setupState;
1580 const Vertex &v0 = triangle[0].v0;
1581 const Vertex &v1 = triangle[0].v1;
1582 const Vertex &v2 = triangle[0].v2;
1584 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1586 if(state.cullMode == CULL_CLOCKWISE)
1588 if(d >= 0) return 0;
1590 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1592 if(d <= 0) return 0;
1596 triangle[1].v0 = v1;
1597 triangle[2].v0 = v2;
1599 for(int i = 0; i < 3; i++)
1601 if(setupPoint(*primitive, *triangle, draw))
1603 primitive->area = 0.5f * d;
1615 int Renderer::setupLines(int unit, int count)
1617 Triangle *triangle = triangleBatch[unit];
1618 Primitive *primitive = primitiveBatch[unit];
1621 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1622 SetupProcessor::State &state = draw.setupState;
1624 int ms = state.multiSample;
1626 for(int i = 0; i < count; i++)
1628 if(setupLine(*primitive, *triangle, draw))
1640 int Renderer::setupPoints(int unit, int count)
1642 Triangle *triangle = triangleBatch[unit];
1643 Primitive *primitive = primitiveBatch[unit];
1646 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1647 SetupProcessor::State &state = draw.setupState;
1649 int ms = state.multiSample;
1651 for(int i = 0; i < count; i++)
1653 if(setupPoint(*primitive, *triangle, draw))
1665 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1667 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1668 const SetupProcessor::State &state = draw.setupState;
1669 const DrawData &data = *draw.data;
1671 float lineWidth = data.lineWidth;
1673 Vertex &v0 = triangle.v0;
1674 Vertex &v1 = triangle.v1;
1676 int pos = state.positionRegister;
1678 const float4 &P0 = v0.v[pos];
1679 const float4 &P1 = v1.v[pos];
1681 if(P0.w <= 0 && P1.w <= 0)
1686 const float W = data.Wx16[0] * (1.0f / 16.0f);
1687 const float H = data.Hx16[0] * (1.0f / 16.0f);
1689 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1690 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1692 if(dx == 0 && dy == 0)
1697 if(false) // Rectangle
1707 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1712 float dx0w = dx * P0.w / W;
1713 float dy0h = dy * P0.w / H;
1714 float dx0h = dx * P0.w / H;
1715 float dy0w = dy * P0.w / W;
1717 float dx1w = dx * P1.w / W;
1718 float dy1h = dy * P1.w / H;
1719 float dx1h = dx * P1.w / H;
1720 float dy1w = dy * P1.w / W;
1722 P[0].x += -dy0w + -dx0w;
1723 P[0].y += -dx0h + +dy0h;
1724 C[0] = clipper->computeClipFlags(P[0]);
1726 P[1].x += -dy1w + +dx1w;
1727 P[1].y += -dx1h + +dy1h;
1728 C[1] = clipper->computeClipFlags(P[1]);
1730 P[2].x += +dy1w + +dx1w;
1731 P[2].y += +dx1h + -dy1h;
1732 C[2] = clipper->computeClipFlags(P[2]);
1734 P[3].x += +dy0w + -dx0w;
1735 P[3].y += +dx0h + +dy0h;
1736 C[3] = clipper->computeClipFlags(P[3]);
1738 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1740 Polygon polygon(P, 4);
1742 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1744 if(clipFlagsOr != Clipper::CLIP_FINITE)
1746 if(!clipper->clip(polygon, clipFlagsOr, draw))
1752 return setupRoutine(&primitive, &triangle, &polygon, &data);
1755 else // Diamond test convention
1769 float dx0 = lineWidth * 0.5f * P0.w / W;
1770 float dy0 = lineWidth * 0.5f * P0.w / H;
1772 float dx1 = lineWidth * 0.5f * P1.w / W;
1773 float dy1 = lineWidth * 0.5f * P1.w / H;
1776 C[0] = clipper->computeClipFlags(P[0]);
1779 C[1] = clipper->computeClipFlags(P[1]);
1782 C[2] = clipper->computeClipFlags(P[2]);
1785 C[3] = clipper->computeClipFlags(P[3]);
1788 C[4] = clipper->computeClipFlags(P[4]);
1791 C[5] = clipper->computeClipFlags(P[5]);
1794 C[6] = clipper->computeClipFlags(P[6]);
1797 C[7] = clipper->computeClipFlags(P[7]);
1799 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1805 if(dx > dy) // Right
1846 Polygon polygon(L, 6);
1848 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1850 if(clipFlagsOr != Clipper::CLIP_FINITE)
1852 if(!clipper->clip(polygon, clipFlagsOr, draw))
1858 return setupRoutine(&primitive, &triangle, &polygon, &data);
1865 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1867 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1868 const SetupProcessor::State &state = draw.setupState;
1869 const DrawData &data = *draw.data;
1871 Vertex &v = triangle.v0;
1875 int pts = state.pointSizeRegister;
1877 if(state.pointSizeRegister != Unused)
1883 pSize = data.point.pointSize[0];
1886 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1891 int pos = state.positionRegister;
1898 const float X = pSize * P[0].w * data.halfPixelX[0];
1899 const float Y = pSize * P[0].w * data.halfPixelY[0];
1903 C[0] = clipper->computeClipFlags(P[0]);
1907 C[1] = clipper->computeClipFlags(P[1]);
1911 C[2] = clipper->computeClipFlags(P[2]);
1915 C[3] = clipper->computeClipFlags(P[3]);
1917 triangle.v1 = triangle.v0;
1918 triangle.v2 = triangle.v0;
1920 triangle.v1.X += iround(16 * 0.5f * pSize);
1921 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1923 Polygon polygon(P, 4);
1925 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1927 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1929 if(clipFlagsOr != Clipper::CLIP_FINITE)
1931 if(!clipper->clip(polygon, clipFlagsOr, draw))
1937 return setupRoutine(&primitive, &triangle, &polygon, &data);
1943 void Renderer::initializeThreads()
1945 unitCount = ceilPow2(threadCount);
1946 clusterCount = ceilPow2(threadCount);
1948 for(int i = 0; i < unitCount; i++)
1950 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1951 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1954 for(int i = 0; i < threadCount; i++)
1956 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1957 vertexTask[i]->vertexCache.drawCall = -1;
1959 task[i].type = Task::SUSPEND;
1961 resume[i] = new Event();
1962 suspend[i] = new Event();
1964 Parameters parameters;
1965 parameters.threadIndex = i;
1966 parameters.renderer = this;
1968 exitThreads = false;
1969 worker[i] = new Thread(threadFunction, ¶meters);
1972 suspend[i]->signal();
1976 void Renderer::terminateThreads()
1978 while(threadsAwake != 0)
1983 for(int thread = 0; thread < threadCount; thread++)
1988 resume[thread]->signal();
1989 worker[thread]->join();
1991 delete worker[thread];
1993 delete resume[thread];
1995 delete suspend[thread];
1996 suspend[thread] = 0;
1999 deallocate(vertexTask[thread]);
2000 vertexTask[thread] = 0;
2003 for(int i = 0; i < 16; i++)
2005 deallocate(triangleBatch[i]);
2006 triangleBatch[i] = 0;
2008 deallocate(primitiveBatch[i]);
2009 primitiveBatch[i] = 0;
2013 void Renderer::loadConstants(const VertexShader *vertexShader)
2015 if(!vertexShader) return;
2017 size_t count = vertexShader->getLength();
2019 for(size_t i = 0; i < count; i++)
2021 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2023 if(instruction->opcode == Shader::OPCODE_DEF)
2025 int index = instruction->dst.index;
2028 value[0] = instruction->src[0].value[0];
2029 value[1] = instruction->src[0].value[1];
2030 value[2] = instruction->src[0].value[2];
2031 value[3] = instruction->src[0].value[3];
2033 setVertexShaderConstantF(index, value);
2035 else if(instruction->opcode == Shader::OPCODE_DEFI)
2037 int index = instruction->dst.index;
2040 integer[0] = instruction->src[0].integer[0];
2041 integer[1] = instruction->src[0].integer[1];
2042 integer[2] = instruction->src[0].integer[2];
2043 integer[3] = instruction->src[0].integer[3];
2045 setVertexShaderConstantI(index, integer);
2047 else if(instruction->opcode == Shader::OPCODE_DEFB)
2049 int index = instruction->dst.index;
2050 int boolean = instruction->src[0].boolean[0];
2052 setVertexShaderConstantB(index, &boolean);
2057 void Renderer::loadConstants(const PixelShader *pixelShader)
2059 if(!pixelShader) return;
2061 size_t count = pixelShader->getLength();
2063 for(size_t i = 0; i < count; i++)
2065 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2067 if(instruction->opcode == Shader::OPCODE_DEF)
2069 int index = instruction->dst.index;
2072 value[0] = instruction->src[0].value[0];
2073 value[1] = instruction->src[0].value[1];
2074 value[2] = instruction->src[0].value[2];
2075 value[3] = instruction->src[0].value[3];
2077 setPixelShaderConstantF(index, value);
2079 else if(instruction->opcode == Shader::OPCODE_DEFI)
2081 int index = instruction->dst.index;
2084 integer[0] = instruction->src[0].integer[0];
2085 integer[1] = instruction->src[0].integer[1];
2086 integer[2] = instruction->src[0].integer[2];
2087 integer[3] = instruction->src[0].integer[3];
2089 setPixelShaderConstantI(index, integer);
2091 else if(instruction->opcode == Shader::OPCODE_DEFB)
2093 int index = instruction->dst.index;
2094 int boolean = instruction->src[0].boolean[0];
2096 setPixelShaderConstantB(index, &boolean);
2101 void Renderer::setIndexBuffer(Resource *indexBuffer)
2103 context->indexBuffer = indexBuffer;
2106 void Renderer::setMultiSampleMask(unsigned int mask)
2108 context->sampleMask = mask;
2111 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2113 sw::transparencyAntialiasing = transparencyAntialiasing;
2116 bool Renderer::isReadWriteTexture(int sampler)
2118 for(int index = 0; index < RENDERTARGETS; index++)
2120 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2126 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2134 void Renderer::updateClipper()
2136 if(updateClipPlanes)
2138 if(VertexProcessor::isFixedFunction()) // User plane in world space
2140 const Matrix &scissorWorld = getViewTransform();
2142 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2143 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2144 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2145 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2146 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2147 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2149 else // User plane in clip space
2151 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2152 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2153 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2154 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2155 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2156 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2159 updateClipPlanes = false;
2163 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2165 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2167 context->texture[sampler] = resource;
2170 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2172 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2174 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2177 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2179 if(type == SAMPLER_PIXEL)
2181 PixelProcessor::setTextureFilter(sampler, textureFilter);
2185 VertexProcessor::setTextureFilter(sampler, textureFilter);
2189 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2191 if(type == SAMPLER_PIXEL)
2193 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2197 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2201 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2203 if(type == SAMPLER_PIXEL)
2205 PixelProcessor::setGatherEnable(sampler, enable);
2209 VertexProcessor::setGatherEnable(sampler, enable);
2213 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2215 if(type == SAMPLER_PIXEL)
2217 PixelProcessor::setAddressingModeU(sampler, addressMode);
2221 VertexProcessor::setAddressingModeU(sampler, addressMode);
2225 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2227 if(type == SAMPLER_PIXEL)
2229 PixelProcessor::setAddressingModeV(sampler, addressMode);
2233 VertexProcessor::setAddressingModeV(sampler, addressMode);
2237 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2239 if(type == SAMPLER_PIXEL)
2241 PixelProcessor::setAddressingModeW(sampler, addressMode);
2245 VertexProcessor::setAddressingModeW(sampler, addressMode);
2249 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2251 if(type == SAMPLER_PIXEL)
2253 PixelProcessor::setReadSRGB(sampler, sRGB);
2257 VertexProcessor::setReadSRGB(sampler, sRGB);
2261 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2263 if(type == SAMPLER_PIXEL)
2265 PixelProcessor::setMipmapLOD(sampler, bias);
2269 VertexProcessor::setMipmapLOD(sampler, bias);
2273 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2275 if(type == SAMPLER_PIXEL)
2277 PixelProcessor::setBorderColor(sampler, borderColor);
2281 VertexProcessor::setBorderColor(sampler, borderColor);
2285 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2287 if(type == SAMPLER_PIXEL)
2289 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2293 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2297 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2299 if(type == SAMPLER_PIXEL)
2301 PixelProcessor::setSwizzleR(sampler, swizzleR);
2305 VertexProcessor::setSwizzleR(sampler, swizzleR);
2309 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2311 if(type == SAMPLER_PIXEL)
2313 PixelProcessor::setSwizzleG(sampler, swizzleG);
2317 VertexProcessor::setSwizzleG(sampler, swizzleG);
2321 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2323 if(type == SAMPLER_PIXEL)
2325 PixelProcessor::setSwizzleB(sampler, swizzleB);
2329 VertexProcessor::setSwizzleB(sampler, swizzleB);
2333 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2335 if(type == SAMPLER_PIXEL)
2337 PixelProcessor::setSwizzleA(sampler, swizzleA);
2341 VertexProcessor::setSwizzleA(sampler, swizzleA);
2345 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2347 if(type == SAMPLER_PIXEL)
2349 PixelProcessor::setBaseLevel(sampler, baseLevel);
2353 VertexProcessor::setBaseLevel(sampler, baseLevel);
2357 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2359 if(type == SAMPLER_PIXEL)
2361 PixelProcessor::setMaxLevel(sampler, maxLevel);
2365 VertexProcessor::setMaxLevel(sampler, maxLevel);
2369 void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2371 if(type == SAMPLER_PIXEL)
2373 PixelProcessor::setMinLod(sampler, minLod);
2377 VertexProcessor::setMinLod(sampler, minLod);
2381 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2383 if(type == SAMPLER_PIXEL)
2385 PixelProcessor::setMaxLod(sampler, maxLod);
2389 VertexProcessor::setMaxLod(sampler, maxLod);
2393 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2395 context->setPointSpriteEnable(pointSpriteEnable);
2398 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2400 context->setPointScaleEnable(pointScaleEnable);
2403 void Renderer::setLineWidth(float width)
2405 context->lineWidth = width;
2408 void Renderer::setDepthBias(float bias)
2413 void Renderer::setSlopeDepthBias(float slopeBias)
2415 slopeDepthBias = slopeBias;
2418 void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2420 context->rasterizerDiscard = rasterizerDiscard;
2423 void Renderer::setPixelShader(const PixelShader *shader)
2425 context->pixelShader = shader;
2427 loadConstants(shader);
2430 void Renderer::setVertexShader(const VertexShader *shader)
2432 context->vertexShader = shader;
2434 loadConstants(shader);
2437 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2439 for(int i = 0; i < DRAW_COUNT; i++)
2441 if(drawCall[i]->psDirtyConstF < index + count)
2443 drawCall[i]->psDirtyConstF = index + count;
2447 for(int i = 0; i < count; i++)
2449 PixelProcessor::setFloatConstant(index + i, value);
2454 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2456 for(int i = 0; i < DRAW_COUNT; i++)
2458 if(drawCall[i]->psDirtyConstI < index + count)
2460 drawCall[i]->psDirtyConstI = index + count;
2464 for(int i = 0; i < count; i++)
2466 PixelProcessor::setIntegerConstant(index + i, value);
2471 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2473 for(int i = 0; i < DRAW_COUNT; i++)
2475 if(drawCall[i]->psDirtyConstB < index + count)
2477 drawCall[i]->psDirtyConstB = index + count;
2481 for(int i = 0; i < count; i++)
2483 PixelProcessor::setBooleanConstant(index + i, *boolean);
2488 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2490 for(int i = 0; i < DRAW_COUNT; i++)
2492 if(drawCall[i]->vsDirtyConstF < index + count)
2494 drawCall[i]->vsDirtyConstF = index + count;
2498 for(int i = 0; i < count; i++)
2500 VertexProcessor::setFloatConstant(index + i, value);
2505 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2507 for(int i = 0; i < DRAW_COUNT; i++)
2509 if(drawCall[i]->vsDirtyConstI < index + count)
2511 drawCall[i]->vsDirtyConstI = index + count;
2515 for(int i = 0; i < count; i++)
2517 VertexProcessor::setIntegerConstant(index + i, value);
2522 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2524 for(int i = 0; i < DRAW_COUNT; i++)
2526 if(drawCall[i]->vsDirtyConstB < index + count)
2528 drawCall[i]->vsDirtyConstB = index + count;
2532 for(int i = 0; i < count; i++)
2534 VertexProcessor::setBooleanConstant(index + i, *boolean);
2539 void Renderer::setModelMatrix(const Matrix &M, int i)
2541 VertexProcessor::setModelMatrix(M, i);
2544 void Renderer::setViewMatrix(const Matrix &V)
2546 VertexProcessor::setViewMatrix(V);
2547 updateClipPlanes = true;
2550 void Renderer::setBaseMatrix(const Matrix &B)
2552 VertexProcessor::setBaseMatrix(B);
2553 updateClipPlanes = true;
2556 void Renderer::setProjectionMatrix(const Matrix &P)
2558 VertexProcessor::setProjectionMatrix(P);
2559 updateClipPlanes = true;
2562 void Renderer::addQuery(Query *query)
2564 queries.push_back(query);
2567 void Renderer::removeQuery(Query *query)
2569 queries.remove(query);
2573 int Renderer::getThreadCount()
2578 int64_t Renderer::getVertexTime(int thread)
2580 return vertexTime[thread];
2583 int64_t Renderer::getSetupTime(int thread)
2585 return setupTime[thread];
2588 int64_t Renderer::getPixelTime(int thread)
2590 return pixelTime[thread];
2593 void Renderer::resetTimers()
2595 for(int thread = 0; thread < threadCount; thread++)
2597 vertexTime[thread] = 0;
2598 setupTime[thread] = 0;
2599 pixelTime[thread] = 0;
2604 void Renderer::setViewport(const Viewport &viewport)
2606 this->viewport = viewport;
2609 void Renderer::setScissor(const Rect &scissor)
2611 this->scissor = scissor;
2614 void Renderer::setClipFlags(int flags)
2616 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2619 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2621 if(index < MAX_CLIP_PLANES)
2623 userPlane[index] = plane;
2627 updateClipPlanes = true;
2630 void Renderer::updateConfiguration(bool initialUpdate)
2632 bool newConfiguration = swiftConfig->hasNewConfiguration();
2634 if(newConfiguration || initialUpdate)
2638 SwiftConfig::Configuration configuration = {};
2639 swiftConfig->getConfiguration(configuration);
2641 precacheVertex = !newConfiguration && configuration.precache;
2642 precacheSetup = !newConfiguration && configuration.precache;
2643 precachePixel = !newConfiguration && configuration.precache;
2645 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2646 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2647 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2649 switch(configuration.textureSampleQuality)
2651 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2652 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2653 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2654 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2657 switch(configuration.mipmapQuality)
2659 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2660 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2661 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2664 setPerspectiveCorrection(configuration.perspectiveCorrection);
2666 switch(configuration.transcendentalPrecision)
2669 logPrecision = APPROXIMATE;
2670 expPrecision = APPROXIMATE;
2671 rcpPrecision = APPROXIMATE;
2672 rsqPrecision = APPROXIMATE;
2675 logPrecision = PARTIAL;
2676 expPrecision = PARTIAL;
2677 rcpPrecision = PARTIAL;
2678 rsqPrecision = PARTIAL;
2681 logPrecision = ACCURATE;
2682 expPrecision = ACCURATE;
2683 rcpPrecision = ACCURATE;
2684 rsqPrecision = ACCURATE;
2687 logPrecision = WHQL;
2688 expPrecision = WHQL;
2689 rcpPrecision = WHQL;
2690 rsqPrecision = WHQL;
2693 logPrecision = IEEE;
2694 expPrecision = IEEE;
2695 rcpPrecision = IEEE;
2696 rsqPrecision = IEEE;
2699 logPrecision = ACCURATE;
2700 expPrecision = ACCURATE;
2701 rcpPrecision = ACCURATE;
2702 rsqPrecision = ACCURATE;
2706 switch(configuration.transparencyAntialiasing)
2708 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2709 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2710 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2713 switch(configuration.threadCount)
2715 case -1: threadCount = CPUID::coreCount(); break;
2716 case 0: threadCount = CPUID::processAffinity(); break;
2717 default: threadCount = configuration.threadCount; break;
2720 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2721 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2722 CPUID::setEnableSSE3(configuration.enableSSE3);
2723 CPUID::setEnableSSE2(configuration.enableSSE2);
2724 CPUID::setEnableSSE(configuration.enableSSE);
2726 for(int pass = 0; pass < 10; pass++)
2728 optimization[pass] = configuration.optimization[pass];
2731 forceWindowed = configuration.forceWindowed;
2732 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2733 postBlendSRGB = configuration.postBlendSRGB;
2734 exactColorRounding = configuration.exactColorRounding;
2735 forceClearRegisters = configuration.forceClearRegisters;
2738 minPrimitives = configuration.minPrimitives;
2739 maxPrimitives = configuration.maxPrimitives;
2743 if(!initialUpdate && !worker[0])
2745 initializeThreads();