1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Renderer.hpp"
17 #include "Clipper.hpp"
19 #include "FrameBuffer.hpp"
21 #include "Surface.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
32 #include "Reactor/Reactor.hpp"
36 bool disableServer = true;
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
47 extern bool booleanFaceRegister;
48 extern bool fullPixelPositionRegister;
49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
50 extern bool secondaryColor; // Specular lighting is applied after texturing
52 extern bool forceWindowed;
53 extern bool complementaryDepthBuffer;
54 extern bool postBlendSRGB;
55 extern bool exactColorRounding;
56 extern TransparencyAntialiasing transparencyAntialiasing;
57 extern bool forceClearRegisters;
59 extern bool precacheVertex;
60 extern bool precacheSetup;
61 extern bool precachePixel;
68 TranscendentalPrecision logPrecision = ACCURATE;
69 TranscendentalPrecision expPrecision = ACCURATE;
70 TranscendentalPrecision rcpPrecision = ACCURATE;
71 TranscendentalPrecision rsqPrecision = ACCURATE;
72 bool perspectiveCorrection = true;
84 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
88 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
94 data = (DrawData*)allocate(sizeof(DrawData));
95 data->constants = &constants;
105 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
107 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
108 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
109 sw::booleanFaceRegister = conventions.booleanFaceRegister;
110 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
111 sw::leadingVertexFirst = conventions.leadingVertexFirst;
112 sw::secondaryColor = conventions.secondaryColor;
113 sw::exactColorRounding = exactColorRounding;
115 setRenderTarget(0, 0);
116 clipper = new Clipper();
118 updateViewMatrix = true;
119 updateBaseMatrix = true;
120 updateProjectionMatrix = true;
121 updateClipPlanes = true;
127 for(int i = 0; i < 16; i++)
137 resumeApp = new Event();
145 for(int i = 0; i < 16; i++)
147 triangleBatch[i] = 0;
148 primitiveBatch[i] = 0;
151 for(int draw = 0; draw < DRAW_COUNT; draw++)
153 drawCall[draw] = new DrawCall();
154 drawList[draw] = drawCall[draw];
157 for(int unit = 0; unit < 16; unit++)
159 primitiveProgress[unit].init();
162 for(int cluster = 0; cluster < 16; cluster++)
164 pixelProgress[cluster].init();
169 swiftConfig = new SwiftConfig(disableServer);
170 updateConfiguration(true);
172 sync = new Resource(0);
175 Renderer::~Renderer()
185 for(int draw = 0; draw < DRAW_COUNT; draw++)
187 delete drawCall[draw];
193 void Renderer::clear(void *pixel, Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
195 blitter.clear(pixel, format, dest, dRect, rgbaMask);
198 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
200 blitter.blit(source, sRect, dest, dRect, filter);
203 void Renderer::blit3D(Surface *source, Surface *dest)
205 blitter.blit3D(source, dest);
208 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
211 if(count < minPrimitives || count > maxPrimitives)
217 context->drawType = drawType;
219 updateConfiguration();
222 int ss = context->getSuperSampleCount();
223 int ms = context->getMultiSampleCount();
225 for(int q = 0; q < ss; q++)
227 unsigned int oldMultiSampleMask = context->multiSampleMask;
228 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
230 if(!context->multiSampleMask)
235 sync->lock(sw::PRIVATE);
237 Routine *vertexRoutine;
238 Routine *setupRoutine;
239 Routine *pixelRoutine;
241 if(update || oldMultiSampleMask != context->multiSampleMask)
243 vertexState = VertexProcessor::update();
244 setupState = SetupProcessor::update();
245 pixelState = PixelProcessor::update();
247 vertexRoutine = VertexProcessor::routine(vertexState);
248 setupRoutine = SetupProcessor::routine(setupState);
249 pixelRoutine = PixelProcessor::routine(pixelState);
252 int batch = batchSize / ms;
254 int (*setupPrimitives)(Renderer *renderer, int batch, int count);
256 if(context->isDrawTriangle())
258 switch(context->fillMode)
261 setupPrimitives = setupSolidTriangles;
264 setupPrimitives = setupWireframeTriangle;
268 setupPrimitives = setupVertexTriangle;
271 default: ASSERT(false);
274 else if(context->isDrawLine())
276 setupPrimitives = setupLines;
280 setupPrimitives = setupPoints;
287 for(int i = 0; i < DRAW_COUNT; i++)
289 if(drawCall[i]->references == -1)
292 drawList[nextDraw % DRAW_COUNT] = draw;
305 DrawData *data = draw->data;
307 if(queries.size() != 0)
309 draw->queries = new std::list<Query*>();
310 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
311 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
314 if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
316 atomicIncrement(&(q->reference));
317 draw->queries->push_back(q);
322 draw->drawType = drawType;
323 draw->batchSize = batch;
325 vertexRoutine->bind();
326 setupRoutine->bind();
327 pixelRoutine->bind();
329 draw->vertexRoutine = vertexRoutine;
330 draw->setupRoutine = setupRoutine;
331 draw->pixelRoutine = pixelRoutine;
332 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
333 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
334 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
335 draw->setupPrimitives = setupPrimitives;
336 draw->setupState = setupState;
338 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
340 draw->vertexStream[i] = context->input[i].resource;
341 data->input[i] = context->input[i].buffer;
342 data->stride[i] = context->input[i].stride;
344 if(draw->vertexStream[i])
346 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
350 if(context->indexBuffer)
352 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
355 draw->indexBuffer = context->indexBuffer;
357 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
359 draw->texture[sampler] = 0;
362 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
364 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
366 draw->texture[sampler] = context->texture[sampler];
367 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
369 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
373 if(context->pixelShader)
375 if(draw->psDirtyConstF)
377 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
378 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
379 draw->psDirtyConstF = 0;
382 if(draw->psDirtyConstI)
384 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
385 draw->psDirtyConstI = 0;
388 if(draw->psDirtyConstB)
390 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
391 draw->psDirtyConstB = 0;
394 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
398 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
400 draw->pUniformBuffers[i] = nullptr;
404 if(context->pixelShaderVersion() <= 0x0104)
406 for(int stage = 0; stage < 8; stage++)
408 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
410 data->textureStage[stage] = context->textureStage[stage].uniforms;
416 if(context->vertexShader)
418 if(context->vertexShader->getVersion() >= 0x0300)
420 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
422 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
424 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
425 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
427 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
432 if(draw->vsDirtyConstF)
434 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
435 draw->vsDirtyConstF = 0;
438 if(draw->vsDirtyConstI)
440 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
441 draw->vsDirtyConstI = 0;
444 if(draw->vsDirtyConstB)
446 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
447 draw->vsDirtyConstB = 0;
450 if(context->vertexShader->instanceIdDeclared)
452 data->instanceID = context->instanceID;
455 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
456 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
462 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
463 draw->vsDirtyConstI = 16;
464 draw->vsDirtyConstB = 16;
466 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
468 draw->vUniformBuffers[i] = nullptr;
471 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
473 draw->transformFeedbackBuffers[i] = nullptr;
477 if(pixelState.stencilActive)
479 data->stencil[0] = stencil;
480 data->stencil[1] = stencilCCW;
483 if(pixelState.fogActive)
488 if(setupState.isDrawPoint)
493 data->lineWidth = context->lineWidth;
495 data->factor = factor;
497 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
499 float ref = context->alphaReference * (1.0f / 255.0f);
500 float margin = sw::min(ref, 1.0f - ref);
504 data->a2c0 = replicate(ref - margin * 0.6f);
505 data->a2c1 = replicate(ref - margin * 0.2f);
506 data->a2c2 = replicate(ref + margin * 0.2f);
507 data->a2c3 = replicate(ref + margin * 0.6f);
511 data->a2c0 = replicate(ref - margin * 0.3f);
512 data->a2c1 = replicate(ref + margin * 0.3f);
517 if(pixelState.occlusionEnabled)
519 for(int cluster = 0; cluster < clusterCount; cluster++)
521 data->occlusion[cluster] = 0;
526 for(int cluster = 0; cluster < clusterCount; cluster++)
528 for(int i = 0; i < PERF_TIMERS; i++)
530 data->cycles[i][cluster] = 0;
537 float W = 0.5f * viewport.width;
538 float H = 0.5f * viewport.height;
539 float X0 = viewport.x0 + W;
540 float Y0 = viewport.y0 + H;
541 float N = viewport.minZ;
542 float F = viewport.maxZ;
545 if(context->isDrawTriangle(false))
550 if(complementaryDepthBuffer)
556 static const float X[5][16] = // Fragment offsets
558 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
559 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
560 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
561 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
562 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
565 static const float Y[5][16] = // Fragment offsets
567 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
568 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
569 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
570 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
571 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
574 int s = sw::log2(ss);
576 data->Wx16 = replicate(W * 16);
577 data->Hx16 = replicate(H * 16);
578 data->X0x16 = replicate(X0 * 16 - 8);
579 data->Y0x16 = replicate(Y0 * 16 - 8);
580 data->XXXX = replicate(X[s][q] / W);
581 data->YYYY = replicate(Y[s][q] / H);
582 data->halfPixelX = replicate(0.5f / W);
583 data->halfPixelY = replicate(0.5f / H);
584 data->viewportHeight = abs(viewport.height);
585 data->slopeDepthBias = slopeDepthBias;
586 data->depthRange = Z;
588 draw->clipFlags = clipFlags;
592 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
593 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
594 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
595 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
596 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
597 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
603 for(int index = 0; index < RENDERTARGETS; index++)
605 draw->renderTarget[index] = context->renderTarget[index];
607 if(draw->renderTarget[index])
609 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
610 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
611 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
615 draw->depthBuffer = context->depthBuffer;
616 draw->stencilBuffer = context->stencilBuffer;
618 if(draw->depthBuffer)
620 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
621 data->depthPitchB = context->depthBuffer->getInternalPitchB();
622 data->depthSliceB = context->depthBuffer->getInternalSliceB();
625 if(draw->stencilBuffer)
627 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(q * ms, MANAGED);
628 data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
629 data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
635 data->scissorX0 = scissor.x0;
636 data->scissorX1 = scissor.x1;
637 data->scissorY0 = scissor.y0;
638 data->scissorY1 = scissor.y1;
644 draw->references = (count + batch - 1) / batch;
646 schedulerMutex.lock();
648 schedulerMutex.unlock();
657 task[0].type = Task::RESUME;
662 else // Use main thread for draw execution
665 task[0].type = Task::RESUME;
672 void Renderer::threadFunction(void *parameters)
674 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
675 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
677 if(logPrecision < IEEE)
679 CPUID::setFlushToZero(true);
680 CPUID::setDenormalsAreZero(true);
683 renderer->threadLoop(threadIndex);
686 void Renderer::threadLoop(int threadIndex)
690 taskLoop(threadIndex);
692 suspend[threadIndex]->signal();
693 resume[threadIndex]->wait();
697 void Renderer::taskLoop(int threadIndex)
699 while(task[threadIndex].type != Task::SUSPEND)
701 scheduleTask(threadIndex);
702 executeTask(threadIndex);
706 void Renderer::findAvailableTasks()
709 for(int cluster = 0; cluster < clusterCount; cluster++)
711 if(!pixelProgress[cluster].executing)
713 for(int unit = 0; unit < unitCount; unit++)
715 if(primitiveProgress[unit].references > 0) // Contains processed primitives
717 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
719 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
721 Task &task = taskQueue[qHead];
722 task.type = Task::PIXELS;
723 task.primitiveUnit = unit;
724 task.pixelCluster = cluster;
726 pixelProgress[cluster].executing = true;
728 // Commit to the task queue
729 qHead = (qHead + 1) % 32;
740 // Find primitive tasks
741 if(currentDraw == nextDraw)
743 return; // No more primitives to process
746 for(int unit = 0; unit < unitCount; unit++)
748 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
750 if(draw->primitive >= draw->count)
754 if(currentDraw == nextDraw)
756 return; // No more primitives to process
759 draw = drawList[currentDraw % DRAW_COUNT];
762 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
764 int primitive = draw->primitive;
765 int count = draw->count;
766 int batch = draw->batchSize;
768 primitiveProgress[unit].drawCall = currentDraw;
769 primitiveProgress[unit].firstPrimitive = primitive;
770 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
772 draw->primitive += batch;
774 Task &task = taskQueue[qHead];
775 task.type = Task::PRIMITIVES;
776 task.primitiveUnit = unit;
778 primitiveProgress[unit].references = -1;
780 // Commit to the task queue
781 qHead = (qHead + 1) % 32;
787 void Renderer::scheduleTask(int threadIndex)
789 schedulerMutex.lock();
791 if((int)qSize < threadCount - threadsAwake + 1)
793 findAvailableTasks();
798 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
801 if(threadsAwake != threadCount)
803 int wakeup = qSize - threadsAwake + 1;
805 for(int i = 0; i < threadCount && wakeup > 0; i++)
807 if(task[i].type == Task::SUSPEND)
810 task[i].type = Task::RESUME;
821 task[threadIndex].type = Task::SUSPEND;
826 schedulerMutex.unlock();
829 void Renderer::executeTask(int threadIndex)
832 int64_t startTick = Timer::ticks();
835 switch(task[threadIndex].type)
837 case Task::PRIMITIVES:
839 int unit = task[threadIndex].primitiveUnit;
841 int input = primitiveProgress[unit].firstPrimitive;
842 int count = primitiveProgress[unit].primitiveCount;
843 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
844 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
846 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
849 int64_t time = Timer::ticks();
850 vertexTime[threadIndex] += time - startTick;
854 int visible = draw->setupState.rasterizerDiscard ? 0 : setupPrimitives(this, unit, count);
856 primitiveProgress[unit].visible = visible;
857 primitiveProgress[unit].references = clusterCount;
860 setupTime[threadIndex] += Timer::ticks() - startTick;
866 int unit = task[threadIndex].primitiveUnit;
867 int visible = primitiveProgress[unit].visible;
871 int cluster = task[threadIndex].pixelCluster;
872 Primitive *primitive = primitiveBatch[unit];
873 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
874 DrawData *data = draw->data;
875 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
877 pixelRoutine(primitive, visible, cluster, data);
880 finishRendering(task[threadIndex]);
883 pixelTime[threadIndex] += Timer::ticks() - startTick;
896 void Renderer::synchronize()
898 sync->lock(sw::PUBLIC);
902 void Renderer::finishRendering(Task &pixelTask)
904 int unit = pixelTask.primitiveUnit;
905 int cluster = pixelTask.pixelCluster;
907 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
908 DrawData &data = *draw.data;
909 int primitive = primitiveProgress[unit].firstPrimitive;
910 int count = primitiveProgress[unit].primitiveCount;
911 int processedPrimitives = primitive + count;
913 pixelProgress[cluster].processedPrimitives = processedPrimitives;
915 if(pixelProgress[cluster].processedPrimitives >= draw.count)
917 pixelProgress[cluster].drawCall++;
918 pixelProgress[cluster].processedPrimitives = 0;
921 int ref = atomicDecrement(&primitiveProgress[unit].references);
925 ref = atomicDecrement(&draw.references);
930 for(int cluster = 0; cluster < clusterCount; cluster++)
932 for(int i = 0; i < PERF_TIMERS; i++)
934 profiler.cycles[i] += data.cycles[i][cluster];
941 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
947 case Query::FRAGMENTS_PASSED:
948 for(int cluster = 0; cluster < clusterCount; cluster++)
950 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
953 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
954 atomicAdd((volatile int*)&query->data, processedPrimitives);
960 atomicDecrement(&query->reference);
967 for(int i = 0; i < RENDERTARGETS; i++)
969 if(draw.renderTarget[i])
971 draw.renderTarget[i]->unlockInternal();
977 draw.depthBuffer->unlockInternal();
980 if(draw.stencilBuffer)
982 draw.stencilBuffer->unlockStencil();
985 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
989 draw.texture[i]->unlock();
993 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
995 if(draw.vertexStream[i])
997 draw.vertexStream[i]->unlock();
1001 if(draw.indexBuffer)
1003 draw.indexBuffer->unlock();
1006 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1008 if(draw.pUniformBuffers[i])
1010 draw.pUniformBuffers[i]->unlock();
1012 if(draw.vUniformBuffers[i])
1014 draw.vUniformBuffers[i]->unlock();
1018 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1020 if(draw.transformFeedbackBuffers[i])
1022 draw.transformFeedbackBuffers[i]->unlock();
1026 draw.vertexRoutine->unbind();
1027 draw.setupRoutine->unbind();
1028 draw.pixelRoutine->unbind();
1032 draw.references = -1;
1033 resumeApp->signal();
1037 pixelProgress[cluster].executing = false;
1040 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1042 Triangle *triangle = triangleBatch[unit];
1043 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1044 DrawData *data = draw->data;
1045 VertexTask *task = vertexTask[thread];
1047 const void *indices = data->indices;
1048 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1050 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1052 task->vertexCache.clear();
1053 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1056 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
1058 switch(draw->drawType)
1060 case DRAW_POINTLIST:
1062 unsigned int index = start;
1064 for(unsigned int i = 0; i < triangleCount; i++)
1066 batch[i][0] = index;
1067 batch[i][1] = index;
1068 batch[i][2] = index;
1076 unsigned int index = 2 * start;
1078 for(unsigned int i = 0; i < triangleCount; i++)
1080 batch[i][0] = index + 0;
1081 batch[i][1] = index + 1;
1082 batch[i][2] = index + 1;
1088 case DRAW_LINESTRIP:
1090 unsigned int index = start;
1092 for(unsigned int i = 0; i < triangleCount; i++)
1094 batch[i][0] = index + 0;
1095 batch[i][1] = index + 1;
1096 batch[i][2] = index + 1;
1104 unsigned int index = start;
1106 for(unsigned int i = 0; i < triangleCount; i++)
1108 batch[i][0] = (index + 0) % loop;
1109 batch[i][1] = (index + 1) % loop;
1110 batch[i][2] = (index + 1) % loop;
1116 case DRAW_TRIANGLELIST:
1118 unsigned int index = 3 * start;
1120 for(unsigned int i = 0; i < triangleCount; i++)
1122 batch[i][0] = index + 0;
1123 batch[i][1] = index + 1;
1124 batch[i][2] = index + 2;
1130 case DRAW_TRIANGLESTRIP:
1132 unsigned int index = start;
1134 for(unsigned int i = 0; i < triangleCount; i++)
1136 batch[i][0] = index + 0;
1137 batch[i][1] = index + (index & 1) + 1;
1138 batch[i][2] = index + (~index & 1) + 1;
1144 case DRAW_TRIANGLEFAN:
1146 unsigned int index = start;
1148 for(unsigned int i = 0; i < triangleCount; i++)
1150 batch[i][0] = index + 1;
1151 batch[i][1] = index + 2;
1158 case DRAW_INDEXEDPOINTLIST8:
1160 const unsigned char *index = (const unsigned char*)indices + start;
1162 for(unsigned int i = 0; i < triangleCount; i++)
1164 batch[i][0] = *index;
1165 batch[i][1] = *index;
1166 batch[i][2] = *index;
1172 case DRAW_INDEXEDPOINTLIST16:
1174 const unsigned short *index = (const unsigned short*)indices + start;
1176 for(unsigned int i = 0; i < triangleCount; i++)
1178 batch[i][0] = *index;
1179 batch[i][1] = *index;
1180 batch[i][2] = *index;
1186 case DRAW_INDEXEDPOINTLIST32:
1188 const unsigned int *index = (const unsigned int*)indices + start;
1190 for(unsigned int i = 0; i < triangleCount; i++)
1192 batch[i][0] = *index;
1193 batch[i][1] = *index;
1194 batch[i][2] = *index;
1200 case DRAW_INDEXEDLINELIST8:
1202 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1204 for(unsigned int i = 0; i < triangleCount; i++)
1206 batch[i][0] = index[0];
1207 batch[i][1] = index[1];
1208 batch[i][2] = index[1];
1214 case DRAW_INDEXEDLINELIST16:
1216 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1218 for(unsigned int i = 0; i < triangleCount; i++)
1220 batch[i][0] = index[0];
1221 batch[i][1] = index[1];
1222 batch[i][2] = index[1];
1228 case DRAW_INDEXEDLINELIST32:
1230 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1232 for(unsigned int i = 0; i < triangleCount; i++)
1234 batch[i][0] = index[0];
1235 batch[i][1] = index[1];
1236 batch[i][2] = index[1];
1242 case DRAW_INDEXEDLINESTRIP8:
1244 const unsigned char *index = (const unsigned char*)indices + start;
1246 for(unsigned int i = 0; i < triangleCount; i++)
1248 batch[i][0] = index[0];
1249 batch[i][1] = index[1];
1250 batch[i][2] = index[1];
1256 case DRAW_INDEXEDLINESTRIP16:
1258 const unsigned short *index = (const unsigned short*)indices + start;
1260 for(unsigned int i = 0; i < triangleCount; i++)
1262 batch[i][0] = index[0];
1263 batch[i][1] = index[1];
1264 batch[i][2] = index[1];
1270 case DRAW_INDEXEDLINESTRIP32:
1272 const unsigned int *index = (const unsigned int*)indices + start;
1274 for(unsigned int i = 0; i < triangleCount; i++)
1276 batch[i][0] = index[0];
1277 batch[i][1] = index[1];
1278 batch[i][2] = index[1];
1284 case DRAW_INDEXEDLINELOOP8:
1286 const unsigned char *index = (const unsigned char*)indices;
1288 for(unsigned int i = 0; i < triangleCount; i++)
1290 batch[i][0] = index[(start + i + 0) % loop];
1291 batch[i][1] = index[(start + i + 1) % loop];
1292 batch[i][2] = index[(start + i + 1) % loop];
1296 case DRAW_INDEXEDLINELOOP16:
1298 const unsigned short *index = (const unsigned short*)indices;
1300 for(unsigned int i = 0; i < triangleCount; i++)
1302 batch[i][0] = index[(start + i + 0) % loop];
1303 batch[i][1] = index[(start + i + 1) % loop];
1304 batch[i][2] = index[(start + i + 1) % loop];
1308 case DRAW_INDEXEDLINELOOP32:
1310 const unsigned int *index = (const unsigned int*)indices;
1312 for(unsigned int i = 0; i < triangleCount; i++)
1314 batch[i][0] = index[(start + i + 0) % loop];
1315 batch[i][1] = index[(start + i + 1) % loop];
1316 batch[i][2] = index[(start + i + 1) % loop];
1320 case DRAW_INDEXEDTRIANGLELIST8:
1322 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1324 for(unsigned int i = 0; i < triangleCount; i++)
1326 batch[i][0] = index[0];
1327 batch[i][1] = index[1];
1328 batch[i][2] = index[2];
1334 case DRAW_INDEXEDTRIANGLELIST16:
1336 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1338 for(unsigned int i = 0; i < triangleCount; i++)
1340 batch[i][0] = index[0];
1341 batch[i][1] = index[1];
1342 batch[i][2] = index[2];
1348 case DRAW_INDEXEDTRIANGLELIST32:
1350 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1352 for(unsigned int i = 0; i < triangleCount; i++)
1354 batch[i][0] = index[0];
1355 batch[i][1] = index[1];
1356 batch[i][2] = index[2];
1362 case DRAW_INDEXEDTRIANGLESTRIP8:
1364 const unsigned char *index = (const unsigned char*)indices + start;
1366 for(unsigned int i = 0; i < triangleCount; i++)
1368 batch[i][0] = index[0];
1369 batch[i][1] = index[((start + i) & 1) + 1];
1370 batch[i][2] = index[(~(start + i) & 1) + 1];
1376 case DRAW_INDEXEDTRIANGLESTRIP16:
1378 const unsigned short *index = (const unsigned short*)indices + start;
1380 for(unsigned int i = 0; i < triangleCount; i++)
1382 batch[i][0] = index[0];
1383 batch[i][1] = index[((start + i) & 1) + 1];
1384 batch[i][2] = index[(~(start + i) & 1) + 1];
1390 case DRAW_INDEXEDTRIANGLESTRIP32:
1392 const unsigned int *index = (const unsigned int*)indices + start;
1394 for(unsigned int i = 0; i < triangleCount; i++)
1396 batch[i][0] = index[0];
1397 batch[i][1] = index[((start + i) & 1) + 1];
1398 batch[i][2] = index[(~(start + i) & 1) + 1];
1404 case DRAW_INDEXEDTRIANGLEFAN8:
1406 const unsigned char *index = (const unsigned char*)indices;
1408 for(unsigned int i = 0; i < triangleCount; i++)
1410 batch[i][0] = index[start + i + 1];
1411 batch[i][1] = index[start + i + 2];
1412 batch[i][2] = index[0];
1416 case DRAW_INDEXEDTRIANGLEFAN16:
1418 const unsigned short *index = (const unsigned short*)indices;
1420 for(unsigned int i = 0; i < triangleCount; i++)
1422 batch[i][0] = index[start + i + 1];
1423 batch[i][1] = index[start + i + 2];
1424 batch[i][2] = index[0];
1428 case DRAW_INDEXEDTRIANGLEFAN32:
1430 const unsigned int *index = (const unsigned int*)indices;
1432 for(unsigned int i = 0; i < triangleCount; i++)
1434 batch[i][0] = index[start + i + 1];
1435 batch[i][1] = index[start + i + 2];
1436 batch[i][2] = index[0];
1442 unsigned int index = 4 * start / 2;
1444 for(unsigned int i = 0; i < triangleCount; i += 2)
1446 batch[i+0][0] = index + 0;
1447 batch[i+0][1] = index + 1;
1448 batch[i+0][2] = index + 2;
1450 batch[i+1][0] = index + 0;
1451 batch[i+1][1] = index + 2;
1452 batch[i+1][2] = index + 3;
1463 task->vertexStart = start * 3;
1464 task->vertexCount = triangleCount * 3;
1465 // Note: Quads aren't handled for verticesPerPrimitive, but verticesPerPrimitive is used for transform feedback,
1466 // which is an OpenGL ES 3.0 feature, and OpenGL ES 3.0 doesn't support quads as a primitive type.
1467 DrawType type = static_cast<DrawType>(static_cast<unsigned int>(draw->drawType) & 0xF);
1468 task->verticesPerPrimitive = 1 + (type >= DRAW_LINELIST) + (type >= DRAW_TRIANGLELIST);
1469 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1472 int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1474 Triangle *triangle = renderer->triangleBatch[unit];
1475 Primitive *primitive = renderer->primitiveBatch[unit];
1477 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1478 SetupProcessor::State &state = draw.setupState;
1479 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1481 int ms = state.multiSample;
1482 int pos = state.positionRegister;
1483 const DrawData *data = draw.data;
1486 for(int i = 0; i < count; i++, triangle++)
1488 Vertex &v0 = triangle->v0;
1489 Vertex &v1 = triangle->v1;
1490 Vertex &v2 = triangle->v2;
1492 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1494 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1496 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1498 if(clipFlagsOr != Clipper::CLIP_FINITE)
1500 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1506 if(setupRoutine(primitive, triangle, &polygon, data))
1517 int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1519 Triangle *triangle = renderer->triangleBatch[unit];
1520 Primitive *primitive = renderer->primitiveBatch[unit];
1523 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1524 SetupProcessor::State &state = draw.setupState;
1525 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1527 const Vertex &v0 = triangle[0].v0;
1528 const Vertex &v1 = triangle[0].v1;
1529 const Vertex &v2 = triangle[0].v2;
1531 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1533 if(state.cullMode == CULL_CLOCKWISE)
1535 if(d >= 0) return 0;
1537 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1539 if(d <= 0) return 0;
1543 triangle[1].v0 = v1;
1544 triangle[1].v1 = v2;
1545 triangle[2].v0 = v2;
1546 triangle[2].v1 = v0;
1548 if(state.color[0][0].flat) // FIXME
1550 for(int i = 0; i < 2; i++)
1552 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1553 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1554 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1555 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1559 for(int i = 0; i < 3; i++)
1561 if(setupLine(renderer, *primitive, *triangle, draw))
1563 primitive->area = 0.5f * d;
1575 int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1577 Triangle *triangle = renderer->triangleBatch[unit];
1578 Primitive *primitive = renderer->primitiveBatch[unit];
1581 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1582 SetupProcessor::State &state = draw.setupState;
1584 const Vertex &v0 = triangle[0].v0;
1585 const Vertex &v1 = triangle[0].v1;
1586 const Vertex &v2 = triangle[0].v2;
1588 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1590 if(state.cullMode == CULL_CLOCKWISE)
1592 if(d >= 0) return 0;
1594 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1596 if(d <= 0) return 0;
1600 triangle[1].v0 = v1;
1601 triangle[2].v0 = v2;
1603 for(int i = 0; i < 3; i++)
1605 if(setupPoint(renderer, *primitive, *triangle, draw))
1607 primitive->area = 0.5f * d;
1619 int Renderer::setupLines(Renderer *renderer, int unit, int count)
1621 Triangle *triangle = renderer->triangleBatch[unit];
1622 Primitive *primitive = renderer->primitiveBatch[unit];
1625 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1626 SetupProcessor::State &state = draw.setupState;
1628 int ms = state.multiSample;
1630 for(int i = 0; i < count; i++)
1632 if(setupLine(renderer, *primitive, *triangle, draw))
1644 int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1646 Triangle *triangle = renderer->triangleBatch[unit];
1647 Primitive *primitive = renderer->primitiveBatch[unit];
1650 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1651 SetupProcessor::State &state = draw.setupState;
1653 int ms = state.multiSample;
1655 for(int i = 0; i < count; i++)
1657 if(setupPoint(renderer, *primitive, *triangle, draw))
1669 bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1671 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1672 const SetupProcessor::State &state = draw.setupState;
1673 const DrawData &data = *draw.data;
1675 float lineWidth = data.lineWidth;
1677 Vertex &v0 = triangle.v0;
1678 Vertex &v1 = triangle.v1;
1680 int pos = state.positionRegister;
1682 const float4 &P0 = v0.v[pos];
1683 const float4 &P1 = v1.v[pos];
1685 if(P0.w <= 0 && P1.w <= 0)
1690 const float W = data.Wx16[0] * (1.0f / 16.0f);
1691 const float H = data.Hx16[0] * (1.0f / 16.0f);
1693 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1694 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1696 if(dx == 0 && dy == 0)
1701 if(false) // Rectangle
1711 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1716 float dx0w = dx * P0.w / W;
1717 float dy0h = dy * P0.w / H;
1718 float dx0h = dx * P0.w / H;
1719 float dy0w = dy * P0.w / W;
1721 float dx1w = dx * P1.w / W;
1722 float dy1h = dy * P1.w / H;
1723 float dx1h = dx * P1.w / H;
1724 float dy1w = dy * P1.w / W;
1726 P[0].x += -dy0w + -dx0w;
1727 P[0].y += -dx0h + +dy0h;
1728 C[0] = computeClipFlags(P[0], data);
1730 P[1].x += -dy1w + +dx1w;
1731 P[1].y += -dx1h + +dy1h;
1732 C[1] = computeClipFlags(P[1], data);
1734 P[2].x += +dy1w + +dx1w;
1735 P[2].y += +dx1h + -dy1h;
1736 C[2] = computeClipFlags(P[2], data);
1738 P[3].x += +dy0w + -dx0w;
1739 P[3].y += +dx0h + +dy0h;
1740 C[3] = computeClipFlags(P[3], data);
1742 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1744 Polygon polygon(P, 4);
1746 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1748 if(clipFlagsOr != Clipper::CLIP_FINITE)
1750 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1756 return setupRoutine(&primitive, &triangle, &polygon, &data);
1759 else // Diamond test convention
1773 float dx0 = lineWidth * 0.5f * P0.w / W;
1774 float dy0 = lineWidth * 0.5f * P0.w / H;
1776 float dx1 = lineWidth * 0.5f * P1.w / W;
1777 float dy1 = lineWidth * 0.5f * P1.w / H;
1780 C[0] = computeClipFlags(P[0], data);
1783 C[1] = computeClipFlags(P[1], data);
1786 C[2] = computeClipFlags(P[2], data);
1789 C[3] = computeClipFlags(P[3], data);
1792 C[4] = computeClipFlags(P[4], data);
1795 C[5] = computeClipFlags(P[5], data);
1798 C[6] = computeClipFlags(P[6], data);
1801 C[7] = computeClipFlags(P[7], data);
1803 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1809 if(dx > dy) // Right
1850 Polygon polygon(L, 6);
1852 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1854 if(clipFlagsOr != Clipper::CLIP_FINITE)
1856 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1862 return setupRoutine(&primitive, &triangle, &polygon, &data);
1869 bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1871 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1872 const SetupProcessor::State &state = draw.setupState;
1873 const DrawData &data = *draw.data;
1875 Vertex &v = triangle.v0;
1879 int pts = state.pointSizeRegister;
1881 if(state.pointSizeRegister != Unused)
1887 pSize = data.point.pointSize[0];
1890 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1895 int pos = state.positionRegister;
1902 const float X = pSize * P[0].w * data.halfPixelX[0];
1903 const float Y = pSize * P[0].w * data.halfPixelY[0];
1907 C[0] = computeClipFlags(P[0], data);
1911 C[1] = computeClipFlags(P[1], data);
1915 C[2] = computeClipFlags(P[2], data);
1919 C[3] = computeClipFlags(P[3], data);
1921 triangle.v1 = triangle.v0;
1922 triangle.v2 = triangle.v0;
1924 triangle.v1.X += iround(16 * 0.5f * pSize);
1925 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1927 Polygon polygon(P, 4);
1929 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1931 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1933 if(clipFlagsOr != Clipper::CLIP_FINITE)
1935 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1941 return setupRoutine(&primitive, &triangle, &polygon, &data);
1947 unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1949 return ((v.x > v.w) << 0) |
1950 ((v.y > v.w) << 1) |
1951 ((v.z > v.w) << 2) |
1952 ((v.x < -v.w) << 3) |
1953 ((v.y < -v.w) << 4) |
1955 Clipper::CLIP_FINITE; // FIXME: xyz finite
1958 void Renderer::initializeThreads()
1960 unitCount = ceilPow2(threadCount);
1961 clusterCount = ceilPow2(threadCount);
1963 for(int i = 0; i < unitCount; i++)
1965 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1966 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1969 for(int i = 0; i < threadCount; i++)
1971 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1972 vertexTask[i]->vertexCache.drawCall = -1;
1974 task[i].type = Task::SUSPEND;
1976 resume[i] = new Event();
1977 suspend[i] = new Event();
1979 Parameters parameters;
1980 parameters.threadIndex = i;
1981 parameters.renderer = this;
1983 exitThreads = false;
1984 worker[i] = new Thread(threadFunction, ¶meters);
1987 suspend[i]->signal();
1991 void Renderer::terminateThreads()
1993 while(threadsAwake != 0)
1998 for(int thread = 0; thread < threadCount; thread++)
2003 resume[thread]->signal();
2004 worker[thread]->join();
2006 delete worker[thread];
2008 delete resume[thread];
2010 delete suspend[thread];
2011 suspend[thread] = 0;
2014 deallocate(vertexTask[thread]);
2015 vertexTask[thread] = 0;
2018 for(int i = 0; i < 16; i++)
2020 deallocate(triangleBatch[i]);
2021 triangleBatch[i] = 0;
2023 deallocate(primitiveBatch[i]);
2024 primitiveBatch[i] = 0;
2028 void Renderer::loadConstants(const VertexShader *vertexShader)
2030 if(!vertexShader) return;
2032 size_t count = vertexShader->getLength();
2034 for(size_t i = 0; i < count; i++)
2036 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2038 if(instruction->opcode == Shader::OPCODE_DEF)
2040 int index = instruction->dst.index;
2043 value[0] = instruction->src[0].value[0];
2044 value[1] = instruction->src[0].value[1];
2045 value[2] = instruction->src[0].value[2];
2046 value[3] = instruction->src[0].value[3];
2048 setVertexShaderConstantF(index, value);
2050 else if(instruction->opcode == Shader::OPCODE_DEFI)
2052 int index = instruction->dst.index;
2055 integer[0] = instruction->src[0].integer[0];
2056 integer[1] = instruction->src[0].integer[1];
2057 integer[2] = instruction->src[0].integer[2];
2058 integer[3] = instruction->src[0].integer[3];
2060 setVertexShaderConstantI(index, integer);
2062 else if(instruction->opcode == Shader::OPCODE_DEFB)
2064 int index = instruction->dst.index;
2065 int boolean = instruction->src[0].boolean[0];
2067 setVertexShaderConstantB(index, &boolean);
2072 void Renderer::loadConstants(const PixelShader *pixelShader)
2074 if(!pixelShader) return;
2076 size_t count = pixelShader->getLength();
2078 for(size_t i = 0; i < count; i++)
2080 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2082 if(instruction->opcode == Shader::OPCODE_DEF)
2084 int index = instruction->dst.index;
2087 value[0] = instruction->src[0].value[0];
2088 value[1] = instruction->src[0].value[1];
2089 value[2] = instruction->src[0].value[2];
2090 value[3] = instruction->src[0].value[3];
2092 setPixelShaderConstantF(index, value);
2094 else if(instruction->opcode == Shader::OPCODE_DEFI)
2096 int index = instruction->dst.index;
2099 integer[0] = instruction->src[0].integer[0];
2100 integer[1] = instruction->src[0].integer[1];
2101 integer[2] = instruction->src[0].integer[2];
2102 integer[3] = instruction->src[0].integer[3];
2104 setPixelShaderConstantI(index, integer);
2106 else if(instruction->opcode == Shader::OPCODE_DEFB)
2108 int index = instruction->dst.index;
2109 int boolean = instruction->src[0].boolean[0];
2111 setPixelShaderConstantB(index, &boolean);
2116 void Renderer::setIndexBuffer(Resource *indexBuffer)
2118 context->indexBuffer = indexBuffer;
2121 void Renderer::setMultiSampleMask(unsigned int mask)
2123 context->sampleMask = mask;
2126 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2128 sw::transparencyAntialiasing = transparencyAntialiasing;
2131 bool Renderer::isReadWriteTexture(int sampler)
2133 for(int index = 0; index < RENDERTARGETS; index++)
2135 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2141 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2149 void Renderer::updateClipper()
2151 if(updateClipPlanes)
2153 if(VertexProcessor::isFixedFunction()) // User plane in world space
2155 const Matrix &scissorWorld = getViewTransform();
2157 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2158 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2159 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2160 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2161 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2162 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2164 else // User plane in clip space
2166 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2167 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2168 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2169 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2170 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2171 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2174 updateClipPlanes = false;
2178 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2180 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2182 context->texture[sampler] = resource;
2185 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2187 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2189 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2192 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2194 if(type == SAMPLER_PIXEL)
2196 PixelProcessor::setTextureFilter(sampler, textureFilter);
2200 VertexProcessor::setTextureFilter(sampler, textureFilter);
2204 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2206 if(type == SAMPLER_PIXEL)
2208 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2212 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2216 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2218 if(type == SAMPLER_PIXEL)
2220 PixelProcessor::setGatherEnable(sampler, enable);
2224 VertexProcessor::setGatherEnable(sampler, enable);
2228 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2230 if(type == SAMPLER_PIXEL)
2232 PixelProcessor::setAddressingModeU(sampler, addressMode);
2236 VertexProcessor::setAddressingModeU(sampler, addressMode);
2240 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2242 if(type == SAMPLER_PIXEL)
2244 PixelProcessor::setAddressingModeV(sampler, addressMode);
2248 VertexProcessor::setAddressingModeV(sampler, addressMode);
2252 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2254 if(type == SAMPLER_PIXEL)
2256 PixelProcessor::setAddressingModeW(sampler, addressMode);
2260 VertexProcessor::setAddressingModeW(sampler, addressMode);
2264 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2266 if(type == SAMPLER_PIXEL)
2268 PixelProcessor::setReadSRGB(sampler, sRGB);
2272 VertexProcessor::setReadSRGB(sampler, sRGB);
2276 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2278 if(type == SAMPLER_PIXEL)
2280 PixelProcessor::setMipmapLOD(sampler, bias);
2284 VertexProcessor::setMipmapLOD(sampler, bias);
2288 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2290 if(type == SAMPLER_PIXEL)
2292 PixelProcessor::setBorderColor(sampler, borderColor);
2296 VertexProcessor::setBorderColor(sampler, borderColor);
2300 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2302 if(type == SAMPLER_PIXEL)
2304 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2308 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2312 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2314 if(type == SAMPLER_PIXEL)
2316 PixelProcessor::setSwizzleR(sampler, swizzleR);
2320 VertexProcessor::setSwizzleR(sampler, swizzleR);
2324 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2326 if(type == SAMPLER_PIXEL)
2328 PixelProcessor::setSwizzleG(sampler, swizzleG);
2332 VertexProcessor::setSwizzleG(sampler, swizzleG);
2336 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2338 if(type == SAMPLER_PIXEL)
2340 PixelProcessor::setSwizzleB(sampler, swizzleB);
2344 VertexProcessor::setSwizzleB(sampler, swizzleB);
2348 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2350 if(type == SAMPLER_PIXEL)
2352 PixelProcessor::setSwizzleA(sampler, swizzleA);
2356 VertexProcessor::setSwizzleA(sampler, swizzleA);
2360 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2362 context->setPointSpriteEnable(pointSpriteEnable);
2365 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2367 context->setPointScaleEnable(pointScaleEnable);
2370 void Renderer::setLineWidth(float width)
2372 context->lineWidth = width;
2375 void Renderer::setDepthBias(float bias)
2380 void Renderer::setSlopeDepthBias(float slopeBias)
2382 slopeDepthBias = slopeBias;
2385 void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2387 context->rasterizerDiscard = rasterizerDiscard;
2390 void Renderer::setPixelShader(const PixelShader *shader)
2392 context->pixelShader = shader;
2394 loadConstants(shader);
2397 void Renderer::setVertexShader(const VertexShader *shader)
2399 context->vertexShader = shader;
2401 loadConstants(shader);
2404 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2406 for(int i = 0; i < DRAW_COUNT; i++)
2408 if(drawCall[i]->psDirtyConstF < index + count)
2410 drawCall[i]->psDirtyConstF = index + count;
2414 for(int i = 0; i < count; i++)
2416 PixelProcessor::setFloatConstant(index + i, value);
2421 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2423 for(int i = 0; i < DRAW_COUNT; i++)
2425 if(drawCall[i]->psDirtyConstI < index + count)
2427 drawCall[i]->psDirtyConstI = index + count;
2431 for(int i = 0; i < count; i++)
2433 PixelProcessor::setIntegerConstant(index + i, value);
2438 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2440 for(int i = 0; i < DRAW_COUNT; i++)
2442 if(drawCall[i]->psDirtyConstB < index + count)
2444 drawCall[i]->psDirtyConstB = index + count;
2448 for(int i = 0; i < count; i++)
2450 PixelProcessor::setBooleanConstant(index + i, *boolean);
2455 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2457 for(int i = 0; i < DRAW_COUNT; i++)
2459 if(drawCall[i]->vsDirtyConstF < index + count)
2461 drawCall[i]->vsDirtyConstF = index + count;
2465 for(int i = 0; i < count; i++)
2467 VertexProcessor::setFloatConstant(index + i, value);
2472 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2474 for(int i = 0; i < DRAW_COUNT; i++)
2476 if(drawCall[i]->vsDirtyConstI < index + count)
2478 drawCall[i]->vsDirtyConstI = index + count;
2482 for(int i = 0; i < count; i++)
2484 VertexProcessor::setIntegerConstant(index + i, value);
2489 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2491 for(int i = 0; i < DRAW_COUNT; i++)
2493 if(drawCall[i]->vsDirtyConstB < index + count)
2495 drawCall[i]->vsDirtyConstB = index + count;
2499 for(int i = 0; i < count; i++)
2501 VertexProcessor::setBooleanConstant(index + i, *boolean);
2506 void Renderer::setModelMatrix(const Matrix &M, int i)
2508 VertexProcessor::setModelMatrix(M, i);
2511 void Renderer::setViewMatrix(const Matrix &V)
2513 VertexProcessor::setViewMatrix(V);
2514 updateClipPlanes = true;
2517 void Renderer::setBaseMatrix(const Matrix &B)
2519 VertexProcessor::setBaseMatrix(B);
2520 updateClipPlanes = true;
2523 void Renderer::setProjectionMatrix(const Matrix &P)
2525 VertexProcessor::setProjectionMatrix(P);
2526 updateClipPlanes = true;
2529 void Renderer::addQuery(Query *query)
2531 queries.push_back(query);
2534 void Renderer::removeQuery(Query *query)
2536 queries.remove(query);
2540 int Renderer::getThreadCount()
2545 int64_t Renderer::getVertexTime(int thread)
2547 return vertexTime[thread];
2550 int64_t Renderer::getSetupTime(int thread)
2552 return setupTime[thread];
2555 int64_t Renderer::getPixelTime(int thread)
2557 return pixelTime[thread];
2560 void Renderer::resetTimers()
2562 for(int thread = 0; thread < threadCount; thread++)
2564 vertexTime[thread] = 0;
2565 setupTime[thread] = 0;
2566 pixelTime[thread] = 0;
2571 void Renderer::setViewport(const Viewport &viewport)
2573 this->viewport = viewport;
2576 void Renderer::setScissor(const Rect &scissor)
2578 this->scissor = scissor;
2581 void Renderer::setClipFlags(int flags)
2583 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2586 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2588 if(index < MAX_CLIP_PLANES)
2590 userPlane[index] = plane;
2594 updateClipPlanes = true;
2597 void Renderer::updateConfiguration(bool initialUpdate)
2599 bool newConfiguration = swiftConfig->hasNewConfiguration();
2601 if(newConfiguration || initialUpdate)
2605 SwiftConfig::Configuration configuration = {};
2606 swiftConfig->getConfiguration(configuration);
2608 precacheVertex = !newConfiguration && configuration.precache;
2609 precacheSetup = !newConfiguration && configuration.precache;
2610 precachePixel = !newConfiguration && configuration.precache;
2612 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2613 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2614 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2616 switch(configuration.textureSampleQuality)
2618 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2619 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2620 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2621 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2624 switch(configuration.mipmapQuality)
2626 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2627 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2628 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2631 setPerspectiveCorrection(configuration.perspectiveCorrection);
2633 switch(configuration.transcendentalPrecision)
2636 logPrecision = APPROXIMATE;
2637 expPrecision = APPROXIMATE;
2638 rcpPrecision = APPROXIMATE;
2639 rsqPrecision = APPROXIMATE;
2642 logPrecision = PARTIAL;
2643 expPrecision = PARTIAL;
2644 rcpPrecision = PARTIAL;
2645 rsqPrecision = PARTIAL;
2648 logPrecision = ACCURATE;
2649 expPrecision = ACCURATE;
2650 rcpPrecision = ACCURATE;
2651 rsqPrecision = ACCURATE;
2654 logPrecision = WHQL;
2655 expPrecision = WHQL;
2656 rcpPrecision = WHQL;
2657 rsqPrecision = WHQL;
2660 logPrecision = IEEE;
2661 expPrecision = IEEE;
2662 rcpPrecision = IEEE;
2663 rsqPrecision = IEEE;
2666 logPrecision = ACCURATE;
2667 expPrecision = ACCURATE;
2668 rcpPrecision = ACCURATE;
2669 rsqPrecision = ACCURATE;
2673 switch(configuration.transparencyAntialiasing)
2675 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2676 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2677 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2680 switch(configuration.threadCount)
2682 case -1: threadCount = CPUID::coreCount(); break;
2683 case 0: threadCount = CPUID::processAffinity(); break;
2684 default: threadCount = configuration.threadCount; break;
2687 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2688 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2689 CPUID::setEnableSSE3(configuration.enableSSE3);
2690 CPUID::setEnableSSE2(configuration.enableSSE2);
2691 CPUID::setEnableSSE(configuration.enableSSE);
2693 for(int pass = 0; pass < 10; pass++)
2695 optimization[pass] = configuration.optimization[pass];
2698 forceWindowed = configuration.forceWindowed;
2699 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2700 postBlendSRGB = configuration.postBlendSRGB;
2701 exactColorRounding = configuration.exactColorRounding;
2702 forceClearRegisters = configuration.forceClearRegisters;
2705 minPrimitives = configuration.minPrimitives;
2706 maxPrimitives = configuration.maxPrimitives;
2710 if(!initialUpdate && !worker[0])
2712 initializeThreads();