1 // SwiftShader Software Renderer
3 // Copyright(c) 2005-2012 TransGaming Inc.
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
12 #include "Renderer.hpp"
14 #include "Clipper.hpp"
16 #include "FrameBuffer.hpp"
18 #include "Surface.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
29 #include "Reactor/Reactor.hpp"
35 bool disableServer = true;
38 unsigned int minPrimitives = 1;
39 unsigned int maxPrimitives = 1 << 21;
44 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
45 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
46 extern bool booleanFaceRegister;
47 extern bool fullPixelPositionRegister;
49 extern bool forceWindowed;
50 extern bool complementaryDepthBuffer;
51 extern bool postBlendSRGB;
52 extern bool exactColorRounding;
53 extern TransparencyAntialiasing transparencyAntialiasing;
54 extern bool forceClearRegisters;
56 extern bool precacheVertex;
57 extern bool precacheSetup;
58 extern bool precachePixel;
65 TranscendentalPrecision logPrecision = ACCURATE;
66 TranscendentalPrecision expPrecision = ACCURATE;
67 TranscendentalPrecision rcpPrecision = ACCURATE;
68 TranscendentalPrecision rsqPrecision = ACCURATE;
69 bool perspectiveCorrection = true;
81 vsDirtyConstF = 256 + 1;
91 data = (DrawData*)allocate(sizeof(DrawData));
92 data->constants = &constants;
102 Renderer::Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
104 sw::halfIntegerCoordinates = halfIntegerCoordinates;
105 sw::symmetricNormalizedDepth = symmetricNormalizedDepth;
106 sw::booleanFaceRegister = booleanFaceRegister;
107 sw::fullPixelPositionRegister = fullPixelPositionRegister;
108 sw::exactColorRounding = exactColorRounding;
110 setRenderTarget(0, 0);
111 clipper = new Clipper();
113 updateViewMatrix = true;
114 updateBaseMatrix = true;
115 updateProjectionMatrix = true;
116 updateClipPlanes = true;
122 for(int i = 0; i < 16; i++)
132 resumeApp = new Event();
140 for(int i = 0; i < 16; i++)
142 triangleBatch[i] = 0;
143 primitiveBatch[i] = 0;
146 for(int draw = 0; draw < DRAW_COUNT; draw++)
148 drawCall[draw] = new DrawCall();
149 drawList[draw] = drawCall[draw];
152 for(int unit = 0; unit < 16; unit++)
154 primitiveProgress[unit].init();
157 for(int cluster = 0; cluster < 16; cluster++)
159 pixelProgress[cluster].init();
164 swiftConfig = new SwiftConfig(disableServer);
165 updateConfiguration(true);
167 sync = new Resource(0);
170 Renderer::~Renderer()
180 for(int draw = 0; draw < DRAW_COUNT; draw++)
182 delete drawCall[draw];
188 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
190 blitter.blit(source, sRect, dest, dRect, filter);
193 void Renderer::blit3D(Surface *source, Surface *dest)
195 blitter.blit3D(source, dest);
198 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
201 if(count < minPrimitives || count > maxPrimitives)
207 context->drawType = drawType;
209 updateConfiguration();
212 int ss = context->getSuperSampleCount();
213 int ms = context->getMultiSampleCount();
215 for(int q = 0; q < ss; q++)
217 int oldMultiSampleMask = context->multiSampleMask;
218 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
220 if(!context->multiSampleMask)
225 sync->lock(sw::PRIVATE);
227 Routine *vertexRoutine;
228 Routine *setupRoutine;
229 Routine *pixelRoutine;
231 if(update || oldMultiSampleMask != context->multiSampleMask)
233 vertexState = VertexProcessor::update();
234 setupState = SetupProcessor::update();
235 pixelState = PixelProcessor::update();
237 vertexRoutine = VertexProcessor::routine(vertexState);
238 setupRoutine = SetupProcessor::routine(setupState);
239 pixelRoutine = PixelProcessor::routine(pixelState);
242 int batch = batchSize / ms;
244 int (*setupPrimitives)(Renderer *renderer, int batch, int count);
246 if(context->isDrawTriangle())
248 switch(context->fillMode)
251 setupPrimitives = setupSolidTriangles;
254 setupPrimitives = setupWireframeTriangle;
258 setupPrimitives = setupVertexTriangle;
261 default: ASSERT(false);
264 else if(context->isDrawLine())
266 setupPrimitives = setupLines;
270 setupPrimitives = setupPoints;
277 for(int i = 0; i < DRAW_COUNT; i++)
279 if(drawCall[i]->references == -1)
282 drawList[nextDraw % DRAW_COUNT] = draw;
295 DrawData *data = draw->data;
297 if(queries.size() != 0)
299 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
301 atomicIncrement(&(*query)->reference);
304 draw->queries = new std::list<Query*>(queries);
307 draw->drawType = drawType;
308 draw->batchSize = batch;
310 vertexRoutine->bind();
311 setupRoutine->bind();
312 pixelRoutine->bind();
314 draw->vertexRoutine = vertexRoutine;
315 draw->setupRoutine = setupRoutine;
316 draw->pixelRoutine = pixelRoutine;
317 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();;
318 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
319 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
320 draw->setupPrimitives = setupPrimitives;
321 draw->setupState = setupState;
323 for(int i = 0; i < 16; i++)
325 draw->vertexStream[i] = context->input[i].resource;
326 data->input[i] = context->input[i].buffer;
327 data->stride[i] = context->input[i].stride;
329 if(draw->vertexStream[i])
331 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
335 if(context->indexBuffer)
337 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
340 draw->indexBuffer = context->indexBuffer;
342 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
344 draw->texture[sampler] = 0;
347 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
349 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
351 draw->texture[sampler] = context->texture[sampler];
352 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
354 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
358 if(context->pixelShader)
360 if(draw->psDirtyConstF)
362 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
363 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
364 draw->psDirtyConstF = 0;
367 if(draw->psDirtyConstI)
369 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
370 draw->psDirtyConstI = 0;
373 if(draw->psDirtyConstB)
375 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
376 draw->psDirtyConstB = 0;
380 if(context->pixelShaderVersion() <= 0x0104)
382 for(int stage = 0; stage < 8; stage++)
384 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
386 data->textureStage[stage] = context->textureStage[stage].uniforms;
392 if(context->vertexShader)
394 if(context->vertexShader->getVersion() >= 0x0300)
396 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
398 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
400 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
401 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
403 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
408 if(draw->vsDirtyConstF)
410 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
411 draw->vsDirtyConstF = 0;
414 if(draw->vsDirtyConstI)
416 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
417 draw->vsDirtyConstI = 0;
420 if(draw->vsDirtyConstB)
422 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
423 draw->vsDirtyConstB = 0;
430 draw->vsDirtyConstF = 256 + 1;
431 draw->vsDirtyConstI = 16;
432 draw->vsDirtyConstB = 16;
435 if(pixelState.stencilActive)
437 data->stencil[0] = stencil;
438 data->stencil[1] = stencilCCW;
441 if(pixelState.fogActive)
446 if(setupState.isDrawPoint)
451 data->lineWidth = context->lineWidth;
453 data->factor = factor;
455 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
457 float ref = (float)context->alphaReference * (1.0f / 255.0f);
458 float margin = sw::min(ref, 1.0f - ref);
462 data->a2c0 = replicate(ref - margin * 0.6f);
463 data->a2c1 = replicate(ref - margin * 0.2f);
464 data->a2c2 = replicate(ref + margin * 0.2f);
465 data->a2c3 = replicate(ref + margin * 0.6f);
469 data->a2c0 = replicate(ref - margin * 0.3f);
470 data->a2c1 = replicate(ref + margin * 0.3f);
475 if(pixelState.occlusionEnabled)
477 for(int cluster = 0; cluster < clusterCount; cluster++)
479 data->occlusion[cluster] = 0;
484 for(int cluster = 0; cluster < clusterCount; cluster++)
486 for(int i = 0; i < PERF_TIMERS; i++)
488 data->cycles[i][cluster] = 0;
495 float W = 0.5f * viewport.width;
496 float H = 0.5f * viewport.height;
497 float X0 = viewport.x0 + W;
498 float Y0 = viewport.y0 + H;
499 float N = viewport.minZ;
500 float F = viewport.maxZ;
503 if(context->isDrawTriangle(false))
508 if(complementaryDepthBuffer)
514 static const float X[5][16] = // Fragment offsets
516 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
517 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
518 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
519 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
520 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
523 static const float Y[5][16] = // Fragment offsets
525 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
526 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
527 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
528 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
529 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
532 int s = sw::log2(ss);
534 data->Wx16 = replicate(W * 16);
535 data->Hx16 = replicate(H * 16);
536 data->X0x16 = replicate(X0 * 16);
537 data->Y0x16 = replicate(Y0 * 16);
538 data->XXXX = replicate(X[s][q] / W);
539 data->YYYY = replicate(Y[s][q] / H);
540 data->halfPixelX = replicate(0.5f / W);
541 data->halfPixelY = replicate(0.5f / H);
542 data->viewportHeight = abs(viewport.height);
543 data->slopeDepthBias = slopeDepthBias;
544 data->depthRange = Z;
546 draw->clipFlags = clipFlags;
550 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
551 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
552 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
553 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
554 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
555 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
561 for(int index = 0; index < 4; index++)
563 draw->renderTarget[index] = context->renderTarget[index];
565 if(draw->renderTarget[index])
567 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
568 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
569 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
573 draw->depthStencil = context->depthStencil;
575 if(draw->depthStencil)
577 data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
578 data->depthPitchB = context->depthStencil->getInternalPitchB();
579 data->depthSliceB = context->depthStencil->getInternalSliceB();
581 data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
582 data->stencilPitchB = context->depthStencil->getStencilPitchB();
583 data->stencilSliceB = context->depthStencil->getStencilSliceB();
589 data->scissorX0 = scissor.x0;
590 data->scissorX1 = scissor.x1;
591 data->scissorY0 = scissor.y0;
592 data->scissorY1 = scissor.y1;
598 draw->references = (count + batch - 1) / batch;
600 schedulerMutex.lock();
602 schedulerMutex.unlock();
609 task[0].type = Task::RESUME;
616 void Renderer::threadFunction(void *parameters)
618 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
619 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
621 if(logPrecision < IEEE)
623 CPUID::setFlushToZero(true);
624 CPUID::setDenormalsAreZero(true);
627 renderer->threadLoop(threadIndex);
630 void Renderer::threadLoop(int threadIndex)
634 taskLoop(threadIndex);
636 suspend[threadIndex]->signal();
637 resume[threadIndex]->wait();
641 void Renderer::taskLoop(int threadIndex)
643 while(task[threadIndex].type != Task::SUSPEND)
645 scheduleTask(threadIndex);
646 executeTask(threadIndex);
650 void Renderer::findAvailableTasks()
653 for(int cluster = 0; cluster < clusterCount; cluster++)
655 if(!pixelProgress[cluster].executing)
657 for(int unit = 0; unit < unitCount; unit++)
659 if(primitiveProgress[unit].references > 0) // Contains processed primitives
661 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
663 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
665 Task &task = taskQueue[qHead];
666 task.type = Task::PIXELS;
667 task.primitiveUnit = unit;
668 task.pixelCluster = cluster;
670 pixelProgress[cluster].executing = true;
672 // Commit to the task queue
673 qHead = (qHead + 1) % 32;
684 // Find primitive tasks
685 if(currentDraw == nextDraw)
687 return; // No more primitives to process
690 for(int unit = 0; unit < unitCount; unit++)
692 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
694 if(draw->primitive >= draw->count)
698 if(currentDraw == nextDraw)
700 return; // No more primitives to process
703 draw = drawList[currentDraw % DRAW_COUNT];
706 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
708 int primitive = draw->primitive;
709 int count = draw->count;
710 int batch = draw->batchSize;
712 primitiveProgress[unit].drawCall = currentDraw;
713 primitiveProgress[unit].firstPrimitive = primitive;
714 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
716 draw->primitive += batch;
718 Task &task = taskQueue[qHead];
719 task.type = Task::PRIMITIVES;
720 task.primitiveUnit = unit;
722 primitiveProgress[unit].references = -1;
724 // Commit to the task queue
725 qHead = (qHead + 1) % 32;
731 void Renderer::scheduleTask(int threadIndex)
733 schedulerMutex.lock();
735 if((int)qSize < threadCount - threadsAwake + 1)
737 findAvailableTasks();
742 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
745 if(threadsAwake != threadCount)
747 int wakeup = qSize - threadsAwake + 1;
749 for(int i = 0; i < threadCount && wakeup > 0; i++)
751 if(task[i].type == Task::SUSPEND)
754 task[i].type = Task::RESUME;
765 task[threadIndex].type = Task::SUSPEND;
770 schedulerMutex.unlock();
773 void Renderer::executeTask(int threadIndex)
776 int64_t startTick = Timer::ticks();
779 switch(task[threadIndex].type)
781 case Task::PRIMITIVES:
783 int unit = task[threadIndex].primitiveUnit;
785 int input = primitiveProgress[unit].firstPrimitive;
786 int count = primitiveProgress[unit].primitiveCount;
787 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
788 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
790 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
793 int64_t time = Timer::ticks();
794 vertexTime[threadIndex] += time - startTick;
798 int visible = setupPrimitives(this, unit, count);
800 primitiveProgress[unit].visible = visible;
801 primitiveProgress[unit].references = clusterCount;
804 setupTime[threadIndex] += Timer::ticks() - startTick;
810 int unit = task[threadIndex].primitiveUnit;
811 int visible = primitiveProgress[unit].visible;
815 int cluster = task[threadIndex].pixelCluster;
816 Primitive *primitive = primitiveBatch[unit];
817 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
818 DrawData *data = draw->data;
819 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
821 pixelRoutine(primitive, visible, cluster, data);
824 finishRendering(task[threadIndex]);
827 pixelTime[threadIndex] += Timer::ticks() - startTick;
840 void Renderer::synchronize()
842 sync->lock(sw::PUBLIC);
846 void Renderer::finishRendering(Task &pixelTask)
848 int unit = pixelTask.primitiveUnit;
849 int cluster = pixelTask.pixelCluster;
851 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
852 DrawData &data = *draw.data;
853 int primitive = primitiveProgress[unit].firstPrimitive;
854 int count = primitiveProgress[unit].primitiveCount;
856 pixelProgress[cluster].processedPrimitives = primitive + count;
858 if(pixelProgress[cluster].processedPrimitives >= draw.count)
860 pixelProgress[cluster].drawCall++;
861 pixelProgress[cluster].processedPrimitives = 0;
864 int ref = atomicDecrement(&primitiveProgress[unit].references);
868 ref = atomicDecrement(&draw.references);
873 for(int cluster = 0; cluster < clusterCount; cluster++)
875 for(int i = 0; i < PERF_TIMERS; i++)
877 profiler.cycles[i] += data.cycles[i][cluster];
884 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
888 for(int cluster = 0; cluster < clusterCount; cluster++)
890 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
893 atomicDecrement(&query->reference);
900 for(int i = 0; i < 4; i++)
902 if(draw.renderTarget[i])
904 draw.renderTarget[i]->unlockInternal();
908 if(draw.depthStencil)
910 draw.depthStencil->unlockInternal();
911 draw.depthStencil->unlockStencil();
914 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
918 draw.texture[i]->unlock();
922 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
924 if(draw.vertexStream[i])
926 draw.vertexStream[i]->unlock();
932 draw.indexBuffer->unlock();
935 draw.vertexRoutine->unbind();
936 draw.setupRoutine->unbind();
937 draw.pixelRoutine->unbind();
941 draw.references = -1;
946 pixelProgress[cluster].executing = false;
949 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
951 Triangle *triangle = triangleBatch[unit];
952 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
953 DrawData *data = draw->data;
954 VertexTask *task = vertexTask[thread];
956 const void *indices = data->indices;
957 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
959 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
961 task->vertexCache.clear();
962 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
965 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
967 switch(draw->drawType)
971 unsigned int index = start;
973 for(unsigned int i = 0; i < triangleCount; i++)
985 unsigned int index = 2 * start;
987 for(unsigned int i = 0; i < triangleCount; i++)
989 batch[i][0] = index + 0;
990 batch[i][1] = index + 1;
991 batch[i][2] = index + 1;
999 unsigned int index = start;
1001 for(unsigned int i = 0; i < triangleCount; i++)
1003 batch[i][0] = index + 0;
1004 batch[i][1] = index + 1;
1005 batch[i][2] = index + 1;
1013 unsigned int index = start;
1015 for(unsigned int i = 0; i < triangleCount; i++)
1017 batch[i][0] = (index + 0) % loop;
1018 batch[i][1] = (index + 1) % loop;
1019 batch[i][2] = (index + 1) % loop;
1025 case DRAW_TRIANGLELIST:
1027 unsigned int index = 3 * start;
1029 for(unsigned int i = 0; i < triangleCount; i++)
1031 batch[i][0] = index + 0;
1032 batch[i][1] = index + 1;
1033 batch[i][2] = index + 2;
1039 case DRAW_TRIANGLESTRIP:
1041 unsigned int index = start;
1043 for(unsigned int i = 0; i < triangleCount; i++)
1045 batch[i][0] = index + 0;
1046 batch[i][1] = index + (index & 1) + 1;
1047 batch[i][2] = index + (~index & 1) + 1;
1053 case DRAW_TRIANGLEFAN:
1055 unsigned int index = start;
1057 for(unsigned int i = 0; i < triangleCount; i++)
1059 batch[i][0] = index + 1;
1060 batch[i][1] = index + 2;
1067 case DRAW_INDEXEDPOINTLIST8:
1069 const unsigned char *index = (const unsigned char*)indices + start;
1071 for(unsigned int i = 0; i < triangleCount; i++)
1073 batch[i][0] = *index;
1074 batch[i][1] = *index;
1075 batch[i][2] = *index;
1081 case DRAW_INDEXEDPOINTLIST16:
1083 const unsigned short *index = (const unsigned short*)indices + start;
1085 for(unsigned int i = 0; i < triangleCount; i++)
1087 batch[i][0] = *index;
1088 batch[i][1] = *index;
1089 batch[i][2] = *index;
1095 case DRAW_INDEXEDPOINTLIST32:
1097 const unsigned int *index = (const unsigned int*)indices + start;
1099 for(unsigned int i = 0; i < triangleCount; i++)
1101 batch[i][0] = *index;
1102 batch[i][1] = *index;
1103 batch[i][2] = *index;
1109 case DRAW_INDEXEDLINELIST8:
1111 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1113 for(unsigned int i = 0; i < triangleCount; i++)
1115 batch[i][0] = index[0];
1116 batch[i][1] = index[1];
1117 batch[i][2] = index[1];
1123 case DRAW_INDEXEDLINELIST16:
1125 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1127 for(unsigned int i = 0; i < triangleCount; i++)
1129 batch[i][0] = index[0];
1130 batch[i][1] = index[1];
1131 batch[i][2] = index[1];
1137 case DRAW_INDEXEDLINELIST32:
1139 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1141 for(unsigned int i = 0; i < triangleCount; i++)
1143 batch[i][0] = index[0];
1144 batch[i][1] = index[1];
1145 batch[i][2] = index[1];
1151 case DRAW_INDEXEDLINESTRIP8:
1153 const unsigned char *index = (const unsigned char*)indices + start;
1155 for(unsigned int i = 0; i < triangleCount; i++)
1157 batch[i][0] = index[0];
1158 batch[i][1] = index[1];
1159 batch[i][2] = index[1];
1165 case DRAW_INDEXEDLINESTRIP16:
1167 const unsigned short *index = (const unsigned short*)indices + start;
1169 for(unsigned int i = 0; i < triangleCount; i++)
1171 batch[i][0] = index[0];
1172 batch[i][1] = index[1];
1173 batch[i][2] = index[1];
1179 case DRAW_INDEXEDLINESTRIP32:
1181 const unsigned int *index = (const unsigned int*)indices + start;
1183 for(unsigned int i = 0; i < triangleCount; i++)
1185 batch[i][0] = index[0];
1186 batch[i][1] = index[1];
1187 batch[i][2] = index[1];
1193 case DRAW_INDEXEDLINELOOP8:
1195 const unsigned char *index = (const unsigned char*)indices;
1197 for(unsigned int i = 0; i < triangleCount; i++)
1199 batch[i][0] = index[(start + i + 0) % loop];
1200 batch[i][1] = index[(start + i + 1) % loop];
1201 batch[i][2] = index[(start + i + 1) % loop];
1205 case DRAW_INDEXEDLINELOOP16:
1207 const unsigned short *index = (const unsigned short*)indices;
1209 for(unsigned int i = 0; i < triangleCount; i++)
1211 batch[i][0] = index[(start + i + 0) % loop];
1212 batch[i][1] = index[(start + i + 1) % loop];
1213 batch[i][2] = index[(start + i + 1) % loop];
1217 case DRAW_INDEXEDLINELOOP32:
1219 const unsigned int *index = (const unsigned int*)indices;
1221 for(unsigned int i = 0; i < triangleCount; i++)
1223 batch[i][0] = index[(start + i + 0) % loop];
1224 batch[i][1] = index[(start + i + 1) % loop];
1225 batch[i][2] = index[(start + i + 1) % loop];
1229 case DRAW_INDEXEDTRIANGLELIST8:
1231 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1233 for(unsigned int i = 0; i < triangleCount; i++)
1235 batch[i][0] = index[0];
1236 batch[i][1] = index[1];
1237 batch[i][2] = index[2];
1243 case DRAW_INDEXEDTRIANGLELIST16:
1245 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1247 for(unsigned int i = 0; i < triangleCount; i++)
1249 batch[i][0] = index[0];
1250 batch[i][1] = index[1];
1251 batch[i][2] = index[2];
1257 case DRAW_INDEXEDTRIANGLELIST32:
1259 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1261 for(unsigned int i = 0; i < triangleCount; i++)
1263 batch[i][0] = index[0];
1264 batch[i][1] = index[1];
1265 batch[i][2] = index[2];
1271 case DRAW_INDEXEDTRIANGLESTRIP8:
1273 const unsigned char *index = (const unsigned char*)indices + start;
1275 for(unsigned int i = 0; i < triangleCount; i++)
1277 batch[i][0] = index[0];
1278 batch[i][1] = index[((start + i) & 1) + 1];
1279 batch[i][2] = index[(~(start + i) & 1) + 1];
1285 case DRAW_INDEXEDTRIANGLESTRIP16:
1287 const unsigned short *index = (const unsigned short*)indices + start;
1289 for(unsigned int i = 0; i < triangleCount; i++)
1291 batch[i][0] = index[0];
1292 batch[i][1] = index[((start + i) & 1) + 1];
1293 batch[i][2] = index[(~(start + i) & 1) + 1];
1299 case DRAW_INDEXEDTRIANGLESTRIP32:
1301 const unsigned int *index = (const unsigned int*)indices + start;
1303 for(unsigned int i = 0; i < triangleCount; i++)
1305 batch[i][0] = index[0];
1306 batch[i][1] = index[((start + i) & 1) + 1];
1307 batch[i][2] = index[(~(start + i) & 1) + 1];
1313 case DRAW_INDEXEDTRIANGLEFAN8:
1315 const unsigned char *index = (const unsigned char*)indices;
1317 for(unsigned int i = 0; i < triangleCount; i++)
1319 batch[i][0] = index[start + i + 1];
1320 batch[i][1] = index[start + i + 2];
1321 batch[i][2] = index[0];
1325 case DRAW_INDEXEDTRIANGLEFAN16:
1327 const unsigned short *index = (const unsigned short*)indices;
1329 for(unsigned int i = 0; i < triangleCount; i++)
1331 batch[i][0] = index[start + i + 1];
1332 batch[i][1] = index[start + i + 2];
1333 batch[i][2] = index[0];
1337 case DRAW_INDEXEDTRIANGLEFAN32:
1339 const unsigned int *index = (const unsigned int*)indices;
1341 for(unsigned int i = 0; i < triangleCount; i++)
1343 batch[i][0] = index[start + i + 1];
1344 batch[i][1] = index[start + i + 2];
1345 batch[i][2] = index[0];
1351 unsigned int index = 4 * start / 2;
1353 for(unsigned int i = 0; i < triangleCount; i += 2)
1355 batch[i+0][0] = index + 0;
1356 batch[i+0][1] = index + 1;
1357 batch[i+0][2] = index + 2;
1359 batch[i+1][0] = index + 0;
1360 batch[i+1][1] = index + 2;
1361 batch[i+1][2] = index + 3;
1371 task->vertexCount = triangleCount * 3;
1372 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1375 int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1377 Triangle *triangle = renderer->triangleBatch[unit];
1378 Primitive *primitive = renderer->primitiveBatch[unit];
1380 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1381 SetupProcessor::State &state = draw.setupState;
1382 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1384 int ms = state.multiSample;
1385 int pos = state.positionRegister;
1386 const DrawData *data = draw.data;
1389 for(int i = 0; i < count; i++, triangle++)
1391 Vertex &v0 = triangle->v0;
1392 Vertex &v1 = triangle->v1;
1393 Vertex &v2 = triangle->v2;
1395 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1397 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1399 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1401 if(clipFlagsOr != Clipper::CLIP_FINITE)
1403 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1409 if(setupRoutine(primitive, triangle, &polygon, data))
1420 int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1422 Triangle *triangle = renderer->triangleBatch[unit];
1423 Primitive *primitive = renderer->primitiveBatch[unit];
1426 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1427 SetupProcessor::State &state = draw.setupState;
1428 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1430 const Vertex &v0 = triangle[0].v0;
1431 const Vertex &v1 = triangle[0].v1;
1432 const Vertex &v2 = triangle[0].v2;
1434 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1436 if(state.cullMode == CULL_CLOCKWISE)
1438 if(d >= 0) return 0;
1440 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1442 if(d <= 0) return 0;
1446 triangle[1].v0 = v1;
1447 triangle[1].v1 = v2;
1448 triangle[2].v0 = v2;
1449 triangle[2].v1 = v0;
1451 if(state.color[0][0].flat) // FIXME
1453 for(int i = 0; i < 2; i++)
1455 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1456 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1457 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1458 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1462 for(int i = 0; i < 3; i++)
1464 if(setupLine(renderer, *primitive, *triangle, draw))
1466 primitive->area = 0.5f * d;
1478 int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1480 Triangle *triangle = renderer->triangleBatch[unit];
1481 Primitive *primitive = renderer->primitiveBatch[unit];
1484 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1485 SetupProcessor::State &state = draw.setupState;
1487 const Vertex &v0 = triangle[0].v0;
1488 const Vertex &v1 = triangle[0].v1;
1489 const Vertex &v2 = triangle[0].v2;
1491 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1493 if(state.cullMode == CULL_CLOCKWISE)
1495 if(d >= 0) return 0;
1497 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1499 if(d <= 0) return 0;
1503 triangle[1].v0 = v1;
1504 triangle[2].v0 = v2;
1506 for(int i = 0; i < 3; i++)
1508 if(setupPoint(renderer, *primitive, *triangle, draw))
1510 primitive->area = 0.5f * d;
1522 int Renderer::setupLines(Renderer *renderer, int unit, int count)
1524 Triangle *triangle = renderer->triangleBatch[unit];
1525 Primitive *primitive = renderer->primitiveBatch[unit];
1528 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1529 SetupProcessor::State &state = draw.setupState;
1531 int ms = state.multiSample;
1533 for(int i = 0; i < count; i++)
1535 if(setupLine(renderer, *primitive, *triangle, draw))
1547 int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1549 Triangle *triangle = renderer->triangleBatch[unit];
1550 Primitive *primitive = renderer->primitiveBatch[unit];
1553 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1554 SetupProcessor::State &state = draw.setupState;
1556 int ms = state.multiSample;
1558 for(int i = 0; i < count; i++)
1560 if(setupPoint(renderer, *primitive, *triangle, draw))
1572 bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1574 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1575 const SetupProcessor::State &state = draw.setupState;
1576 const DrawData &data = *draw.data;
1578 float lineWidth = data.lineWidth;
1580 Vertex &v0 = triangle.v0;
1581 Vertex &v1 = triangle.v1;
1583 int pos = state.positionRegister;
1585 const float4 &P0 = v0.v[pos];
1586 const float4 &P1 = v1.v[pos];
1588 if(P0.w <= 0 && P1.w <= 0)
1593 const float W = data.Wx16[0] * (1.0f / 16.0f);
1594 const float H = data.Hx16[0] * (1.0f / 16.0f);
1596 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1597 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1599 if(dx == 0 && dy == 0)
1604 if(false) // Rectangle
1614 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1619 float dx0w = dx * P0.w / W;
1620 float dy0h = dy * P0.w / H;
1621 float dx0h = dx * P0.w / H;
1622 float dy0w = dy * P0.w / W;
1624 float dx1w = dx * P1.w / W;
1625 float dy1h = dy * P1.w / H;
1626 float dx1h = dx * P1.w / H;
1627 float dy1w = dy * P1.w / W;
1629 P[0].x += -dy0w + -dx0w;
1630 P[0].y += -dx0h + +dy0h;
1631 C[0] = computeClipFlags(P[0], data);
1633 P[1].x += -dy1w + +dx1w;
1634 P[1].y += -dx1h + +dy1h;
1635 C[1] = computeClipFlags(P[1], data);
1637 P[2].x += +dy1w + +dx1w;
1638 P[2].y += +dx1h + -dy1h;
1639 C[2] = computeClipFlags(P[2], data);
1641 P[3].x += +dy0w + -dx0w;
1642 P[3].y += +dx0h + +dy0h;
1643 C[3] = computeClipFlags(P[3], data);
1645 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1647 Polygon polygon(P, 4);
1649 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1651 if(clipFlagsOr != Clipper::CLIP_FINITE)
1653 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1659 return setupRoutine(&primitive, &triangle, &polygon, &data);
1662 else // Diamond test convention
1676 float dx0 = lineWidth * 0.5f * P0.w / W;
1677 float dy0 = lineWidth * 0.5f * P0.w / H;
1679 float dx1 = lineWidth * 0.5f * P1.w / W;
1680 float dy1 = lineWidth * 0.5f * P1.w / H;
1683 C[0] = computeClipFlags(P[0], data);
1686 C[1] = computeClipFlags(P[1], data);
1689 C[2] = computeClipFlags(P[2], data);
1692 C[3] = computeClipFlags(P[3], data);
1695 C[4] = computeClipFlags(P[4], data);
1698 C[5] = computeClipFlags(P[5], data);
1701 C[6] = computeClipFlags(P[6], data);
1704 C[7] = computeClipFlags(P[7], data);
1706 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1712 if(dx > dy) // Right
1753 Polygon polygon(L, 6);
1755 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1757 if(clipFlagsOr != Clipper::CLIP_FINITE)
1759 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1765 return setupRoutine(&primitive, &triangle, &polygon, &data);
1772 bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1774 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1775 const SetupProcessor::State &state = draw.setupState;
1776 const DrawData &data = *draw.data;
1778 Vertex &v = triangle.v0;
1782 int pts = state.pointSizeRegister;
1784 if(state.pointSizeRegister != 0xF)
1790 pSize = data.point.pointSize[0];
1793 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1798 int pos = state.positionRegister;
1805 const float X = pSize * P[0].w * data.halfPixelX[0];
1806 const float Y = pSize * P[0].w * data.halfPixelY[0];
1810 C[0] = computeClipFlags(P[0], data);
1814 C[1] = computeClipFlags(P[1], data);
1818 C[2] = computeClipFlags(P[2], data);
1822 C[3] = computeClipFlags(P[3], data);
1824 triangle.v1 = triangle.v0;
1825 triangle.v2 = triangle.v0;
1827 triangle.v1.X += iround(16 * 0.5f * pSize);
1828 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1830 Polygon polygon(P, 4);
1832 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1834 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1836 if(clipFlagsOr != Clipper::CLIP_FINITE)
1838 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1844 return setupRoutine(&primitive, &triangle, &polygon, &data);
1850 unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1852 float clX = v.x + data.halfPixelX[0] * v.w;
1853 float clY = v.y + data.halfPixelY[0] * v.w;
1855 return ((clX > v.w) << 0) |
1856 ((clY > v.w) << 1) |
1857 ((v.z > v.w) << 2) |
1858 ((clX < -v.w) << 3) |
1859 ((clY < -v.w) << 4) |
1861 Clipper::CLIP_FINITE; // FIXME: xyz finite
1864 void Renderer::initializeThreads()
1866 unitCount = ceilPow2(threadCount);
1867 clusterCount = ceilPow2(threadCount);
1869 for(int i = 0; i < unitCount; i++)
1871 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1872 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1875 for(int i = 0; i < threadCount; i++)
1877 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1878 vertexTask[i]->vertexCache.drawCall = -1;
1880 task[i].type = Task::SUSPEND;
1882 resume[i] = new Event();
1883 suspend[i] = new Event();
1885 Parameters parameters;
1886 parameters.threadIndex = i;
1887 parameters.renderer = this;
1889 exitThreads = false;
1890 worker[i] = new Thread(threadFunction, ¶meters);
1893 suspend[i]->signal();
1897 void Renderer::terminateThreads()
1899 while(threadsAwake != 0)
1904 for(int thread = 0; thread < threadCount; thread++)
1909 resume[thread]->signal();
1910 worker[thread]->join();
1912 delete worker[thread];
1914 delete resume[thread];
1916 delete suspend[thread];
1917 suspend[thread] = 0;
1920 deallocate(vertexTask[thread]);
1921 vertexTask[thread] = 0;
1924 for(int i = 0; i < 16; i++)
1926 deallocate(triangleBatch[i]);
1927 triangleBatch[i] = 0;
1929 deallocate(primitiveBatch[i]);
1930 primitiveBatch[i] = 0;
1934 void Renderer::loadConstants(const VertexShader *vertexShader)
1936 if(!vertexShader) return;
1938 size_t count = vertexShader->getLength();
1940 for(size_t i = 0; i < count; i++)
1942 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1944 if(instruction->opcode == Shader::OPCODE_DEF)
1946 int index = instruction->dst.index;
1949 value[0] = instruction->src[0].value[0];
1950 value[1] = instruction->src[0].value[1];
1951 value[2] = instruction->src[0].value[2];
1952 value[3] = instruction->src[0].value[3];
1954 setVertexShaderConstantF(index, value);
1956 else if(instruction->opcode == Shader::OPCODE_DEFI)
1958 int index = instruction->dst.index;
1961 integer[0] = instruction->src[0].integer[0];
1962 integer[1] = instruction->src[0].integer[1];
1963 integer[2] = instruction->src[0].integer[2];
1964 integer[3] = instruction->src[0].integer[3];
1966 setVertexShaderConstantI(index, integer);
1968 else if(instruction->opcode == Shader::OPCODE_DEFB)
1970 int index = instruction->dst.index;
1971 int boolean = instruction->src[0].boolean[0];
1973 setVertexShaderConstantB(index, &boolean);
1978 void Renderer::loadConstants(const PixelShader *pixelShader)
1980 if(!pixelShader) return;
1982 size_t count = pixelShader->getLength();
1984 for(size_t i = 0; i < count; i++)
1986 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1988 if(instruction->opcode == Shader::OPCODE_DEF)
1990 int index = instruction->dst.index;
1993 value[0] = instruction->src[0].value[0];
1994 value[1] = instruction->src[0].value[1];
1995 value[2] = instruction->src[0].value[2];
1996 value[3] = instruction->src[0].value[3];
1998 setPixelShaderConstantF(index, value);
2000 else if(instruction->opcode == Shader::OPCODE_DEFI)
2002 int index = instruction->dst.index;
2005 integer[0] = instruction->src[0].integer[0];
2006 integer[1] = instruction->src[0].integer[1];
2007 integer[2] = instruction->src[0].integer[2];
2008 integer[3] = instruction->src[0].integer[3];
2010 setPixelShaderConstantI(index, integer);
2012 else if(instruction->opcode == Shader::OPCODE_DEFB)
2014 int index = instruction->dst.index;
2015 int boolean = instruction->src[0].boolean[0];
2017 setPixelShaderConstantB(index, &boolean);
2022 void Renderer::setIndexBuffer(Resource *indexBuffer)
2024 context->indexBuffer = indexBuffer;
2027 void Renderer::setMultiSampleMask(unsigned int mask)
2029 context->sampleMask = mask;
2032 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2034 sw::transparencyAntialiasing = transparencyAntialiasing;
2037 bool Renderer::isReadWriteTexture(int sampler)
2039 for(int index = 0; index < 4; index++)
2041 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2047 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2055 void Renderer::updateClipper()
2057 if(updateClipPlanes)
2059 if(VertexProcessor::isFixedFunction()) // User plane in world space
2061 const Matrix &scissorWorld = getViewTransform();
2063 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2064 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2065 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2066 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2067 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2068 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2070 else // User plane in clip space
2072 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2073 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2074 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2075 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2076 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2077 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2080 updateClipPlanes = false;
2084 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2086 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2088 context->texture[sampler] = resource;
2091 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2093 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2095 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2098 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2100 if(type == SAMPLER_PIXEL)
2102 PixelProcessor::setTextureFilter(sampler, textureFilter);
2106 VertexProcessor::setTextureFilter(sampler, textureFilter);
2110 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2112 if(type == SAMPLER_PIXEL)
2114 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2118 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2122 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2124 if(type == SAMPLER_PIXEL)
2126 PixelProcessor::setGatherEnable(sampler, enable);
2130 VertexProcessor::setGatherEnable(sampler, enable);
2134 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2136 if(type == SAMPLER_PIXEL)
2138 PixelProcessor::setAddressingModeU(sampler, addressMode);
2142 VertexProcessor::setAddressingModeU(sampler, addressMode);
2146 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2148 if(type == SAMPLER_PIXEL)
2150 PixelProcessor::setAddressingModeV(sampler, addressMode);
2154 VertexProcessor::setAddressingModeV(sampler, addressMode);
2158 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2160 if(type == SAMPLER_PIXEL)
2162 PixelProcessor::setAddressingModeW(sampler, addressMode);
2166 VertexProcessor::setAddressingModeW(sampler, addressMode);
2170 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2172 if(type == SAMPLER_PIXEL)
2174 PixelProcessor::setReadSRGB(sampler, sRGB);
2178 VertexProcessor::setReadSRGB(sampler, sRGB);
2182 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2184 if(type == SAMPLER_PIXEL)
2186 PixelProcessor::setMipmapLOD(sampler, bias);
2190 VertexProcessor::setMipmapLOD(sampler, bias);
2194 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2196 if(type == SAMPLER_PIXEL)
2198 PixelProcessor::setBorderColor(sampler, borderColor);
2202 VertexProcessor::setBorderColor(sampler, borderColor);
2206 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2208 if(type == SAMPLER_PIXEL)
2210 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2214 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2218 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2220 context->setPointSpriteEnable(pointSpriteEnable);
2223 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2225 context->setPointScaleEnable(pointScaleEnable);
2228 void Renderer::setLineWidth(float width)
2230 context->lineWidth = width;
2233 void Renderer::setDepthBias(float bias)
2238 void Renderer::setSlopeDepthBias(float slopeBias)
2240 slopeDepthBias = slopeBias;
2243 void Renderer::setPixelShader(const PixelShader *shader)
2245 context->pixelShader = shader;
2247 loadConstants(shader);
2250 void Renderer::setVertexShader(const VertexShader *shader)
2252 context->vertexShader = shader;
2254 loadConstants(shader);
2257 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2259 for(int i = 0; i < DRAW_COUNT; i++)
2261 if(drawCall[i]->psDirtyConstF < index + count)
2263 drawCall[i]->psDirtyConstF = index + count;
2267 for(int i = 0; i < count; i++)
2269 PixelProcessor::setFloatConstant(index + i, value);
2274 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2276 for(int i = 0; i < DRAW_COUNT; i++)
2278 if(drawCall[i]->psDirtyConstI < index + count)
2280 drawCall[i]->psDirtyConstI = index + count;
2284 for(int i = 0; i < count; i++)
2286 PixelProcessor::setIntegerConstant(index + i, value);
2291 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2293 for(int i = 0; i < DRAW_COUNT; i++)
2295 if(drawCall[i]->psDirtyConstB < index + count)
2297 drawCall[i]->psDirtyConstB = index + count;
2301 for(int i = 0; i < count; i++)
2303 PixelProcessor::setBooleanConstant(index + i, *boolean);
2308 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2310 for(int i = 0; i < DRAW_COUNT; i++)
2312 if(drawCall[i]->vsDirtyConstF < index + count)
2314 drawCall[i]->vsDirtyConstF = index + count;
2318 for(int i = 0; i < count; i++)
2320 VertexProcessor::setFloatConstant(index + i, value);
2325 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2327 for(int i = 0; i < DRAW_COUNT; i++)
2329 if(drawCall[i]->vsDirtyConstI < index + count)
2331 drawCall[i]->vsDirtyConstI = index + count;
2335 for(int i = 0; i < count; i++)
2337 VertexProcessor::setIntegerConstant(index + i, value);
2342 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2344 for(int i = 0; i < DRAW_COUNT; i++)
2346 if(drawCall[i]->vsDirtyConstB < index + count)
2348 drawCall[i]->vsDirtyConstB = index + count;
2352 for(int i = 0; i < count; i++)
2354 VertexProcessor::setBooleanConstant(index + i, *boolean);
2359 void Renderer::setModelMatrix(const Matrix &M, int i)
2361 VertexProcessor::setModelMatrix(M, i);
2364 void Renderer::setViewMatrix(const Matrix &V)
2366 VertexProcessor::setViewMatrix(V);
2367 updateClipPlanes = true;
2370 void Renderer::setBaseMatrix(const Matrix &B)
2372 VertexProcessor::setBaseMatrix(B);
2373 updateClipPlanes = true;
2376 void Renderer::setProjectionMatrix(const Matrix &P)
2378 VertexProcessor::setProjectionMatrix(P);
2379 updateClipPlanes = true;
2382 void Renderer::addQuery(Query *query)
2384 queries.push_back(query);
2387 void Renderer::removeQuery(Query *query)
2389 queries.remove(query);
2393 int Renderer::getThreadCount()
2398 int64_t Renderer::getVertexTime(int thread)
2400 return vertexTime[thread];
2403 int64_t Renderer::getSetupTime(int thread)
2405 return setupTime[thread];
2408 int64_t Renderer::getPixelTime(int thread)
2410 return pixelTime[thread];
2413 void Renderer::resetTimers()
2415 for(int thread = 0; thread < threadCount; thread++)
2417 vertexTime[thread] = 0;
2418 setupTime[thread] = 0;
2419 pixelTime[thread] = 0;
2424 void Renderer::setViewport(const Viewport &viewport)
2426 this->viewport = viewport;
2429 void Renderer::setScissor(const Rect &scissor)
2431 this->scissor = scissor;
2434 void Renderer::setClipFlags(int flags)
2436 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2439 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2443 userPlane[index] = plane;
2447 updateClipPlanes = true;
2450 void Renderer::updateConfiguration(bool initialUpdate)
2452 bool newConfiguration = swiftConfig->hasNewConfiguration();
2454 if(newConfiguration || initialUpdate)
2458 SwiftConfig::Configuration configuration = {0};
2459 swiftConfig->getConfiguration(configuration);
2461 precacheVertex = !newConfiguration && configuration.precache;
2462 precacheSetup = !newConfiguration && configuration.precache;
2463 precachePixel = !newConfiguration && configuration.precache;
2465 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2466 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2467 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2469 switch(configuration.textureSampleQuality)
2471 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2472 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2473 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2474 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2477 switch(configuration.mipmapQuality)
2479 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2480 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2481 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2484 setPerspectiveCorrection(configuration.perspectiveCorrection);
2486 switch(configuration.transcendentalPrecision)
2489 logPrecision = APPROXIMATE;
2490 expPrecision = APPROXIMATE;
2491 rcpPrecision = APPROXIMATE;
2492 rsqPrecision = APPROXIMATE;
2495 logPrecision = PARTIAL;
2496 expPrecision = PARTIAL;
2497 rcpPrecision = PARTIAL;
2498 rsqPrecision = PARTIAL;
2501 logPrecision = ACCURATE;
2502 expPrecision = ACCURATE;
2503 rcpPrecision = ACCURATE;
2504 rsqPrecision = ACCURATE;
2507 logPrecision = WHQL;
2508 expPrecision = WHQL;
2509 rcpPrecision = WHQL;
2510 rsqPrecision = WHQL;
2513 logPrecision = IEEE;
2514 expPrecision = IEEE;
2515 rcpPrecision = IEEE;
2516 rsqPrecision = IEEE;
2519 logPrecision = ACCURATE;
2520 expPrecision = ACCURATE;
2521 rcpPrecision = ACCURATE;
2522 rsqPrecision = ACCURATE;
2526 switch(configuration.transparencyAntialiasing)
2528 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2529 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2530 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2533 switch(configuration.threadCount)
2535 case -1: threadCount = CPUID::coreCount(); break;
2536 case 0: threadCount = CPUID::processAffinity(); break;
2537 default: threadCount = configuration.threadCount; break;
2540 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2541 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2542 CPUID::setEnableSSE3(configuration.enableSSE3);
2543 CPUID::setEnableSSE2(configuration.enableSSE2);
2544 CPUID::setEnableSSE(configuration.enableSSE);
2546 for(int pass = 0; pass < 10; pass++)
2548 optimization[pass] = configuration.optimization[pass];
2551 forceWindowed = configuration.forceWindowed;
2552 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2553 postBlendSRGB = configuration.postBlendSRGB;
2554 exactColorRounding = configuration.exactColorRounding;
2555 forceClearRegisters = configuration.forceClearRegisters;
2558 minPrimitives = configuration.minPrimitives;
2559 maxPrimitives = configuration.maxPrimitives;
2563 if(!initialUpdate && !worker[0])
2565 initializeThreads();