1 // SwiftShader Software Renderer
3 // Copyright(c) 2005-2012 TransGaming Inc.
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
12 #include "Renderer.hpp"
14 #include "Clipper.hpp"
16 #include "FrameBuffer.hpp"
18 #include "Surface.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
29 #include "Reactor/Reactor.hpp"
35 bool disableServer = true;
38 unsigned int minPrimitives = 1;
39 unsigned int maxPrimitives = 1 << 21;
44 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
45 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
46 extern bool booleanFaceRegister;
47 extern bool fullPixelPositionRegister;
48 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
49 extern bool secondaryColor; // Specular lighting is applied after texturing
51 extern bool forceWindowed;
52 extern bool complementaryDepthBuffer;
53 extern bool postBlendSRGB;
54 extern bool exactColorRounding;
55 extern TransparencyAntialiasing transparencyAntialiasing;
56 extern bool forceClearRegisters;
58 extern bool precacheVertex;
59 extern bool precacheSetup;
60 extern bool precachePixel;
67 TranscendentalPrecision logPrecision = ACCURATE;
68 TranscendentalPrecision expPrecision = ACCURATE;
69 TranscendentalPrecision rcpPrecision = ACCURATE;
70 TranscendentalPrecision rsqPrecision = ACCURATE;
71 bool perspectiveCorrection = true;
83 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
87 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
93 data = (DrawData*)allocate(sizeof(DrawData));
94 data->constants = &constants;
104 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
106 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
107 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
108 sw::booleanFaceRegister = conventions.booleanFaceRegister;
109 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
110 sw::leadingVertexFirst = conventions.leadingVertexFirst;
111 sw::secondaryColor = conventions.secondaryColor;
112 sw::exactColorRounding = exactColorRounding;
114 setRenderTarget(0, 0);
115 clipper = new Clipper();
117 updateViewMatrix = true;
118 updateBaseMatrix = true;
119 updateProjectionMatrix = true;
120 updateClipPlanes = true;
126 for(int i = 0; i < 16; i++)
136 resumeApp = new Event();
144 for(int i = 0; i < 16; i++)
146 triangleBatch[i] = 0;
147 primitiveBatch[i] = 0;
150 for(int draw = 0; draw < DRAW_COUNT; draw++)
152 drawCall[draw] = new DrawCall();
153 drawList[draw] = drawCall[draw];
156 for(int unit = 0; unit < 16; unit++)
158 primitiveProgress[unit].init();
161 for(int cluster = 0; cluster < 16; cluster++)
163 pixelProgress[cluster].init();
168 swiftConfig = new SwiftConfig(disableServer);
169 updateConfiguration(true);
171 sync = new Resource(0);
174 Renderer::~Renderer()
184 for(int draw = 0; draw < DRAW_COUNT; draw++)
186 delete drawCall[draw];
192 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
194 blitter.blit(source, sRect, dest, dRect, filter);
197 void Renderer::blit3D(Surface *source, Surface *dest)
199 blitter.blit3D(source, dest);
202 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
205 if(count < minPrimitives || count > maxPrimitives)
211 context->drawType = drawType;
213 updateConfiguration();
216 int ss = context->getSuperSampleCount();
217 int ms = context->getMultiSampleCount();
219 for(int q = 0; q < ss; q++)
221 int oldMultiSampleMask = context->multiSampleMask;
222 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
224 if(!context->multiSampleMask)
229 sync->lock(sw::PRIVATE);
231 Routine *vertexRoutine;
232 Routine *setupRoutine;
233 Routine *pixelRoutine;
235 if(update || oldMultiSampleMask != context->multiSampleMask)
237 vertexState = VertexProcessor::update();
238 setupState = SetupProcessor::update();
239 pixelState = PixelProcessor::update();
241 vertexRoutine = VertexProcessor::routine(vertexState);
242 setupRoutine = SetupProcessor::routine(setupState);
243 pixelRoutine = PixelProcessor::routine(pixelState);
246 int batch = batchSize / ms;
248 int (*setupPrimitives)(Renderer *renderer, int batch, int count);
250 if(context->isDrawTriangle())
252 switch(context->fillMode)
255 setupPrimitives = setupSolidTriangles;
258 setupPrimitives = setupWireframeTriangle;
262 setupPrimitives = setupVertexTriangle;
265 default: ASSERT(false);
268 else if(context->isDrawLine())
270 setupPrimitives = setupLines;
274 setupPrimitives = setupPoints;
281 for(int i = 0; i < DRAW_COUNT; i++)
283 if(drawCall[i]->references == -1)
286 drawList[nextDraw % DRAW_COUNT] = draw;
299 DrawData *data = draw->data;
301 if(queries.size() != 0)
303 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
305 atomicIncrement(&(*query)->reference);
308 draw->queries = new std::list<Query*>(queries);
311 draw->drawType = drawType;
312 draw->batchSize = batch;
314 vertexRoutine->bind();
315 setupRoutine->bind();
316 pixelRoutine->bind();
318 draw->vertexRoutine = vertexRoutine;
319 draw->setupRoutine = setupRoutine;
320 draw->pixelRoutine = pixelRoutine;
321 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
322 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
323 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
324 draw->setupPrimitives = setupPrimitives;
325 draw->setupState = setupState;
327 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
329 draw->vertexStream[i] = context->input[i].resource;
330 data->input[i] = context->input[i].buffer;
331 data->stride[i] = context->input[i].stride;
333 if(draw->vertexStream[i])
335 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
339 if(context->indexBuffer)
341 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
344 draw->indexBuffer = context->indexBuffer;
346 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
348 draw->texture[sampler] = 0;
351 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
353 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
355 draw->texture[sampler] = context->texture[sampler];
356 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
358 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
362 if(context->pixelShader)
364 if(draw->psDirtyConstF)
366 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
367 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
368 draw->psDirtyConstF = 0;
371 if(draw->psDirtyConstI)
373 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
374 draw->psDirtyConstI = 0;
377 if(draw->psDirtyConstB)
379 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
380 draw->psDirtyConstB = 0;
384 if(context->pixelShaderVersion() <= 0x0104)
386 for(int stage = 0; stage < 8; stage++)
388 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
390 data->textureStage[stage] = context->textureStage[stage].uniforms;
396 if(context->vertexShader)
398 if(context->vertexShader->getVersion() >= 0x0300)
400 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
402 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
404 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
405 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
407 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
412 if(draw->vsDirtyConstF)
414 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
415 draw->vsDirtyConstF = 0;
418 if(draw->vsDirtyConstI)
420 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
421 draw->vsDirtyConstI = 0;
424 if(draw->vsDirtyConstB)
426 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
427 draw->vsDirtyConstB = 0;
430 if(context->vertexShader->instanceIdDeclared)
432 data->instanceID = context->instanceID;
439 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
440 draw->vsDirtyConstI = 16;
441 draw->vsDirtyConstB = 16;
444 if(pixelState.stencilActive)
446 data->stencil[0] = stencil;
447 data->stencil[1] = stencilCCW;
450 if(pixelState.fogActive)
455 if(setupState.isDrawPoint)
460 data->lineWidth = context->lineWidth;
462 data->factor = factor;
464 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
466 float ref = context->alphaReference * (1.0f / 255.0f);
467 float margin = sw::min(ref, 1.0f - ref);
471 data->a2c0 = replicate(ref - margin * 0.6f);
472 data->a2c1 = replicate(ref - margin * 0.2f);
473 data->a2c2 = replicate(ref + margin * 0.2f);
474 data->a2c3 = replicate(ref + margin * 0.6f);
478 data->a2c0 = replicate(ref - margin * 0.3f);
479 data->a2c1 = replicate(ref + margin * 0.3f);
484 if(pixelState.occlusionEnabled)
486 for(int cluster = 0; cluster < clusterCount; cluster++)
488 data->occlusion[cluster] = 0;
493 for(int cluster = 0; cluster < clusterCount; cluster++)
495 for(int i = 0; i < PERF_TIMERS; i++)
497 data->cycles[i][cluster] = 0;
504 float W = 0.5f * viewport.width;
505 float H = 0.5f * viewport.height;
506 float X0 = viewport.x0 + W;
507 float Y0 = viewport.y0 + H;
508 float N = viewport.minZ;
509 float F = viewport.maxZ;
512 if(context->isDrawTriangle(false))
517 if(complementaryDepthBuffer)
523 static const float X[5][16] = // Fragment offsets
525 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
526 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
527 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
528 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
529 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
532 static const float Y[5][16] = // Fragment offsets
534 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
535 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
536 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
537 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
538 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
541 int s = sw::log2(ss);
543 data->Wx16 = replicate(W * 16);
544 data->Hx16 = replicate(H * 16);
545 data->X0x16 = replicate(X0 * 16 - 8);
546 data->Y0x16 = replicate(Y0 * 16 - 8);
547 data->XXXX = replicate(X[s][q] / W);
548 data->YYYY = replicate(Y[s][q] / H);
549 data->halfPixelX = replicate(0.5f / W);
550 data->halfPixelY = replicate(0.5f / H);
551 data->viewportHeight = abs(viewport.height);
552 data->slopeDepthBias = slopeDepthBias;
553 data->depthRange = Z;
555 draw->clipFlags = clipFlags;
559 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
560 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
561 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
562 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
563 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
564 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
570 for(int index = 0; index < 4; index++)
572 draw->renderTarget[index] = context->renderTarget[index];
574 if(draw->renderTarget[index])
576 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
577 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
578 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
582 draw->depthStencil = context->depthStencil;
584 if(draw->depthStencil)
586 data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
587 data->depthPitchB = context->depthStencil->getInternalPitchB();
588 data->depthSliceB = context->depthStencil->getInternalSliceB();
590 data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
591 data->stencilPitchB = context->depthStencil->getStencilPitchB();
592 data->stencilSliceB = context->depthStencil->getStencilSliceB();
598 data->scissorX0 = scissor.x0;
599 data->scissorX1 = scissor.x1;
600 data->scissorY0 = scissor.y0;
601 data->scissorY1 = scissor.y1;
607 draw->references = (count + batch - 1) / batch;
609 schedulerMutex.lock();
611 schedulerMutex.unlock();
620 task[0].type = Task::RESUME;
625 else // Use main thread for draw execution
628 task[0].type = Task::RESUME;
635 void Renderer::threadFunction(void *parameters)
637 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
638 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
640 if(logPrecision < IEEE)
642 CPUID::setFlushToZero(true);
643 CPUID::setDenormalsAreZero(true);
646 renderer->threadLoop(threadIndex);
649 void Renderer::threadLoop(int threadIndex)
653 taskLoop(threadIndex);
655 suspend[threadIndex]->signal();
656 resume[threadIndex]->wait();
660 void Renderer::taskLoop(int threadIndex)
662 while(task[threadIndex].type != Task::SUSPEND)
664 scheduleTask(threadIndex);
665 executeTask(threadIndex);
669 void Renderer::findAvailableTasks()
672 for(int cluster = 0; cluster < clusterCount; cluster++)
674 if(!pixelProgress[cluster].executing)
676 for(int unit = 0; unit < unitCount; unit++)
678 if(primitiveProgress[unit].references > 0) // Contains processed primitives
680 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
682 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
684 Task &task = taskQueue[qHead];
685 task.type = Task::PIXELS;
686 task.primitiveUnit = unit;
687 task.pixelCluster = cluster;
689 pixelProgress[cluster].executing = true;
691 // Commit to the task queue
692 qHead = (qHead + 1) % 32;
703 // Find primitive tasks
704 if(currentDraw == nextDraw)
706 return; // No more primitives to process
709 for(int unit = 0; unit < unitCount; unit++)
711 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
713 if(draw->primitive >= draw->count)
717 if(currentDraw == nextDraw)
719 return; // No more primitives to process
722 draw = drawList[currentDraw % DRAW_COUNT];
725 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
727 int primitive = draw->primitive;
728 int count = draw->count;
729 int batch = draw->batchSize;
731 primitiveProgress[unit].drawCall = currentDraw;
732 primitiveProgress[unit].firstPrimitive = primitive;
733 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
735 draw->primitive += batch;
737 Task &task = taskQueue[qHead];
738 task.type = Task::PRIMITIVES;
739 task.primitiveUnit = unit;
741 primitiveProgress[unit].references = -1;
743 // Commit to the task queue
744 qHead = (qHead + 1) % 32;
750 void Renderer::scheduleTask(int threadIndex)
752 schedulerMutex.lock();
754 if((int)qSize < threadCount - threadsAwake + 1)
756 findAvailableTasks();
761 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
764 if(threadsAwake != threadCount)
766 int wakeup = qSize - threadsAwake + 1;
768 for(int i = 0; i < threadCount && wakeup > 0; i++)
770 if(task[i].type == Task::SUSPEND)
773 task[i].type = Task::RESUME;
784 task[threadIndex].type = Task::SUSPEND;
789 schedulerMutex.unlock();
792 void Renderer::executeTask(int threadIndex)
795 int64_t startTick = Timer::ticks();
798 switch(task[threadIndex].type)
800 case Task::PRIMITIVES:
802 int unit = task[threadIndex].primitiveUnit;
804 int input = primitiveProgress[unit].firstPrimitive;
805 int count = primitiveProgress[unit].primitiveCount;
806 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
807 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
809 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
812 int64_t time = Timer::ticks();
813 vertexTime[threadIndex] += time - startTick;
817 int visible = setupPrimitives(this, unit, count);
819 primitiveProgress[unit].visible = visible;
820 primitiveProgress[unit].references = clusterCount;
823 setupTime[threadIndex] += Timer::ticks() - startTick;
829 int unit = task[threadIndex].primitiveUnit;
830 int visible = primitiveProgress[unit].visible;
834 int cluster = task[threadIndex].pixelCluster;
835 Primitive *primitive = primitiveBatch[unit];
836 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
837 DrawData *data = draw->data;
838 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
840 pixelRoutine(primitive, visible, cluster, data);
843 finishRendering(task[threadIndex]);
846 pixelTime[threadIndex] += Timer::ticks() - startTick;
859 void Renderer::synchronize()
861 sync->lock(sw::PUBLIC);
865 void Renderer::finishRendering(Task &pixelTask)
867 int unit = pixelTask.primitiveUnit;
868 int cluster = pixelTask.pixelCluster;
870 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
871 DrawData &data = *draw.data;
872 int primitive = primitiveProgress[unit].firstPrimitive;
873 int count = primitiveProgress[unit].primitiveCount;
875 pixelProgress[cluster].processedPrimitives = primitive + count;
877 if(pixelProgress[cluster].processedPrimitives >= draw.count)
879 pixelProgress[cluster].drawCall++;
880 pixelProgress[cluster].processedPrimitives = 0;
883 int ref = atomicDecrement(&primitiveProgress[unit].references);
887 ref = atomicDecrement(&draw.references);
892 for(int cluster = 0; cluster < clusterCount; cluster++)
894 for(int i = 0; i < PERF_TIMERS; i++)
896 profiler.cycles[i] += data.cycles[i][cluster];
903 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
907 for(int cluster = 0; cluster < clusterCount; cluster++)
909 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
912 atomicDecrement(&query->reference);
919 for(int i = 0; i < 4; i++)
921 if(draw.renderTarget[i])
923 draw.renderTarget[i]->unlockInternal();
927 if(draw.depthStencil)
929 draw.depthStencil->unlockInternal();
930 draw.depthStencil->unlockStencil();
933 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
937 draw.texture[i]->unlock();
941 for(int i = 0; i < VERTEX_ATTRIBUTES; i++)
943 if(draw.vertexStream[i])
945 draw.vertexStream[i]->unlock();
951 draw.indexBuffer->unlock();
954 draw.vertexRoutine->unbind();
955 draw.setupRoutine->unbind();
956 draw.pixelRoutine->unbind();
960 draw.references = -1;
965 pixelProgress[cluster].executing = false;
968 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
970 Triangle *triangle = triangleBatch[unit];
971 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
972 DrawData *data = draw->data;
973 VertexTask *task = vertexTask[thread];
975 const void *indices = data->indices;
976 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
978 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
980 task->vertexCache.clear();
981 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
984 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
986 switch(draw->drawType)
990 unsigned int index = start;
992 for(unsigned int i = 0; i < triangleCount; i++)
1004 unsigned int index = 2 * start;
1006 for(unsigned int i = 0; i < triangleCount; i++)
1008 batch[i][0] = index + 0;
1009 batch[i][1] = index + 1;
1010 batch[i][2] = index + 1;
1016 case DRAW_LINESTRIP:
1018 unsigned int index = start;
1020 for(unsigned int i = 0; i < triangleCount; i++)
1022 batch[i][0] = index + 0;
1023 batch[i][1] = index + 1;
1024 batch[i][2] = index + 1;
1032 unsigned int index = start;
1034 for(unsigned int i = 0; i < triangleCount; i++)
1036 batch[i][0] = (index + 0) % loop;
1037 batch[i][1] = (index + 1) % loop;
1038 batch[i][2] = (index + 1) % loop;
1044 case DRAW_TRIANGLELIST:
1046 unsigned int index = 3 * start;
1048 for(unsigned int i = 0; i < triangleCount; i++)
1050 batch[i][0] = index + 0;
1051 batch[i][1] = index + 1;
1052 batch[i][2] = index + 2;
1058 case DRAW_TRIANGLESTRIP:
1060 unsigned int index = start;
1062 for(unsigned int i = 0; i < triangleCount; i++)
1064 batch[i][0] = index + 0;
1065 batch[i][1] = index + (index & 1) + 1;
1066 batch[i][2] = index + (~index & 1) + 1;
1072 case DRAW_TRIANGLEFAN:
1074 unsigned int index = start;
1076 for(unsigned int i = 0; i < triangleCount; i++)
1078 batch[i][0] = index + 1;
1079 batch[i][1] = index + 2;
1086 case DRAW_INDEXEDPOINTLIST8:
1088 const unsigned char *index = (const unsigned char*)indices + start;
1090 for(unsigned int i = 0; i < triangleCount; i++)
1092 batch[i][0] = *index;
1093 batch[i][1] = *index;
1094 batch[i][2] = *index;
1100 case DRAW_INDEXEDPOINTLIST16:
1102 const unsigned short *index = (const unsigned short*)indices + start;
1104 for(unsigned int i = 0; i < triangleCount; i++)
1106 batch[i][0] = *index;
1107 batch[i][1] = *index;
1108 batch[i][2] = *index;
1114 case DRAW_INDEXEDPOINTLIST32:
1116 const unsigned int *index = (const unsigned int*)indices + start;
1118 for(unsigned int i = 0; i < triangleCount; i++)
1120 batch[i][0] = *index;
1121 batch[i][1] = *index;
1122 batch[i][2] = *index;
1128 case DRAW_INDEXEDLINELIST8:
1130 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1132 for(unsigned int i = 0; i < triangleCount; i++)
1134 batch[i][0] = index[0];
1135 batch[i][1] = index[1];
1136 batch[i][2] = index[1];
1142 case DRAW_INDEXEDLINELIST16:
1144 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1146 for(unsigned int i = 0; i < triangleCount; i++)
1148 batch[i][0] = index[0];
1149 batch[i][1] = index[1];
1150 batch[i][2] = index[1];
1156 case DRAW_INDEXEDLINELIST32:
1158 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1160 for(unsigned int i = 0; i < triangleCount; i++)
1162 batch[i][0] = index[0];
1163 batch[i][1] = index[1];
1164 batch[i][2] = index[1];
1170 case DRAW_INDEXEDLINESTRIP8:
1172 const unsigned char *index = (const unsigned char*)indices + start;
1174 for(unsigned int i = 0; i < triangleCount; i++)
1176 batch[i][0] = index[0];
1177 batch[i][1] = index[1];
1178 batch[i][2] = index[1];
1184 case DRAW_INDEXEDLINESTRIP16:
1186 const unsigned short *index = (const unsigned short*)indices + start;
1188 for(unsigned int i = 0; i < triangleCount; i++)
1190 batch[i][0] = index[0];
1191 batch[i][1] = index[1];
1192 batch[i][2] = index[1];
1198 case DRAW_INDEXEDLINESTRIP32:
1200 const unsigned int *index = (const unsigned int*)indices + start;
1202 for(unsigned int i = 0; i < triangleCount; i++)
1204 batch[i][0] = index[0];
1205 batch[i][1] = index[1];
1206 batch[i][2] = index[1];
1212 case DRAW_INDEXEDLINELOOP8:
1214 const unsigned char *index = (const unsigned char*)indices;
1216 for(unsigned int i = 0; i < triangleCount; i++)
1218 batch[i][0] = index[(start + i + 0) % loop];
1219 batch[i][1] = index[(start + i + 1) % loop];
1220 batch[i][2] = index[(start + i + 1) % loop];
1224 case DRAW_INDEXEDLINELOOP16:
1226 const unsigned short *index = (const unsigned short*)indices;
1228 for(unsigned int i = 0; i < triangleCount; i++)
1230 batch[i][0] = index[(start + i + 0) % loop];
1231 batch[i][1] = index[(start + i + 1) % loop];
1232 batch[i][2] = index[(start + i + 1) % loop];
1236 case DRAW_INDEXEDLINELOOP32:
1238 const unsigned int *index = (const unsigned int*)indices;
1240 for(unsigned int i = 0; i < triangleCount; i++)
1242 batch[i][0] = index[(start + i + 0) % loop];
1243 batch[i][1] = index[(start + i + 1) % loop];
1244 batch[i][2] = index[(start + i + 1) % loop];
1248 case DRAW_INDEXEDTRIANGLELIST8:
1250 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1252 for(unsigned int i = 0; i < triangleCount; i++)
1254 batch[i][0] = index[0];
1255 batch[i][1] = index[1];
1256 batch[i][2] = index[2];
1262 case DRAW_INDEXEDTRIANGLELIST16:
1264 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1266 for(unsigned int i = 0; i < triangleCount; i++)
1268 batch[i][0] = index[0];
1269 batch[i][1] = index[1];
1270 batch[i][2] = index[2];
1276 case DRAW_INDEXEDTRIANGLELIST32:
1278 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1280 for(unsigned int i = 0; i < triangleCount; i++)
1282 batch[i][0] = index[0];
1283 batch[i][1] = index[1];
1284 batch[i][2] = index[2];
1290 case DRAW_INDEXEDTRIANGLESTRIP8:
1292 const unsigned char *index = (const unsigned char*)indices + start;
1294 for(unsigned int i = 0; i < triangleCount; i++)
1296 batch[i][0] = index[0];
1297 batch[i][1] = index[((start + i) & 1) + 1];
1298 batch[i][2] = index[(~(start + i) & 1) + 1];
1304 case DRAW_INDEXEDTRIANGLESTRIP16:
1306 const unsigned short *index = (const unsigned short*)indices + start;
1308 for(unsigned int i = 0; i < triangleCount; i++)
1310 batch[i][0] = index[0];
1311 batch[i][1] = index[((start + i) & 1) + 1];
1312 batch[i][2] = index[(~(start + i) & 1) + 1];
1318 case DRAW_INDEXEDTRIANGLESTRIP32:
1320 const unsigned int *index = (const unsigned int*)indices + start;
1322 for(unsigned int i = 0; i < triangleCount; i++)
1324 batch[i][0] = index[0];
1325 batch[i][1] = index[((start + i) & 1) + 1];
1326 batch[i][2] = index[(~(start + i) & 1) + 1];
1332 case DRAW_INDEXEDTRIANGLEFAN8:
1334 const unsigned char *index = (const unsigned char*)indices;
1336 for(unsigned int i = 0; i < triangleCount; i++)
1338 batch[i][0] = index[start + i + 1];
1339 batch[i][1] = index[start + i + 2];
1340 batch[i][2] = index[0];
1344 case DRAW_INDEXEDTRIANGLEFAN16:
1346 const unsigned short *index = (const unsigned short*)indices;
1348 for(unsigned int i = 0; i < triangleCount; i++)
1350 batch[i][0] = index[start + i + 1];
1351 batch[i][1] = index[start + i + 2];
1352 batch[i][2] = index[0];
1356 case DRAW_INDEXEDTRIANGLEFAN32:
1358 const unsigned int *index = (const unsigned int*)indices;
1360 for(unsigned int i = 0; i < triangleCount; i++)
1362 batch[i][0] = index[start + i + 1];
1363 batch[i][1] = index[start + i + 2];
1364 batch[i][2] = index[0];
1370 unsigned int index = 4 * start / 2;
1372 for(unsigned int i = 0; i < triangleCount; i += 2)
1374 batch[i+0][0] = index + 0;
1375 batch[i+0][1] = index + 1;
1376 batch[i+0][2] = index + 2;
1378 batch[i+1][0] = index + 0;
1379 batch[i+1][1] = index + 2;
1380 batch[i+1][2] = index + 3;
1390 task->vertexCount = triangleCount * 3;
1391 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1394 int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1396 Triangle *triangle = renderer->triangleBatch[unit];
1397 Primitive *primitive = renderer->primitiveBatch[unit];
1399 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1400 SetupProcessor::State &state = draw.setupState;
1401 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1403 int ms = state.multiSample;
1404 int pos = state.positionRegister;
1405 const DrawData *data = draw.data;
1408 for(int i = 0; i < count; i++, triangle++)
1410 Vertex &v0 = triangle->v0;
1411 Vertex &v1 = triangle->v1;
1412 Vertex &v2 = triangle->v2;
1414 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1416 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1418 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1420 if(clipFlagsOr != Clipper::CLIP_FINITE)
1422 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1428 if(setupRoutine(primitive, triangle, &polygon, data))
1439 int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1441 Triangle *triangle = renderer->triangleBatch[unit];
1442 Primitive *primitive = renderer->primitiveBatch[unit];
1445 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1446 SetupProcessor::State &state = draw.setupState;
1447 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1449 const Vertex &v0 = triangle[0].v0;
1450 const Vertex &v1 = triangle[0].v1;
1451 const Vertex &v2 = triangle[0].v2;
1453 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1455 if(state.cullMode == CULL_CLOCKWISE)
1457 if(d >= 0) return 0;
1459 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1461 if(d <= 0) return 0;
1465 triangle[1].v0 = v1;
1466 triangle[1].v1 = v2;
1467 triangle[2].v0 = v2;
1468 triangle[2].v1 = v0;
1470 if(state.color[0][0].flat) // FIXME
1472 for(int i = 0; i < 2; i++)
1474 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1475 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1476 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1477 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1481 for(int i = 0; i < 3; i++)
1483 if(setupLine(renderer, *primitive, *triangle, draw))
1485 primitive->area = 0.5f * d;
1497 int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1499 Triangle *triangle = renderer->triangleBatch[unit];
1500 Primitive *primitive = renderer->primitiveBatch[unit];
1503 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1504 SetupProcessor::State &state = draw.setupState;
1506 const Vertex &v0 = triangle[0].v0;
1507 const Vertex &v1 = triangle[0].v1;
1508 const Vertex &v2 = triangle[0].v2;
1510 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1512 if(state.cullMode == CULL_CLOCKWISE)
1514 if(d >= 0) return 0;
1516 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1518 if(d <= 0) return 0;
1522 triangle[1].v0 = v1;
1523 triangle[2].v0 = v2;
1525 for(int i = 0; i < 3; i++)
1527 if(setupPoint(renderer, *primitive, *triangle, draw))
1529 primitive->area = 0.5f * d;
1541 int Renderer::setupLines(Renderer *renderer, int unit, int count)
1543 Triangle *triangle = renderer->triangleBatch[unit];
1544 Primitive *primitive = renderer->primitiveBatch[unit];
1547 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1548 SetupProcessor::State &state = draw.setupState;
1550 int ms = state.multiSample;
1552 for(int i = 0; i < count; i++)
1554 if(setupLine(renderer, *primitive, *triangle, draw))
1566 int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1568 Triangle *triangle = renderer->triangleBatch[unit];
1569 Primitive *primitive = renderer->primitiveBatch[unit];
1572 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1573 SetupProcessor::State &state = draw.setupState;
1575 int ms = state.multiSample;
1577 for(int i = 0; i < count; i++)
1579 if(setupPoint(renderer, *primitive, *triangle, draw))
1591 bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1593 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1594 const SetupProcessor::State &state = draw.setupState;
1595 const DrawData &data = *draw.data;
1597 float lineWidth = data.lineWidth;
1599 Vertex &v0 = triangle.v0;
1600 Vertex &v1 = triangle.v1;
1602 int pos = state.positionRegister;
1604 const float4 &P0 = v0.v[pos];
1605 const float4 &P1 = v1.v[pos];
1607 if(P0.w <= 0 && P1.w <= 0)
1612 const float W = data.Wx16[0] * (1.0f / 16.0f);
1613 const float H = data.Hx16[0] * (1.0f / 16.0f);
1615 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1616 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1618 if(dx == 0 && dy == 0)
1623 if(false) // Rectangle
1633 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1638 float dx0w = dx * P0.w / W;
1639 float dy0h = dy * P0.w / H;
1640 float dx0h = dx * P0.w / H;
1641 float dy0w = dy * P0.w / W;
1643 float dx1w = dx * P1.w / W;
1644 float dy1h = dy * P1.w / H;
1645 float dx1h = dx * P1.w / H;
1646 float dy1w = dy * P1.w / W;
1648 P[0].x += -dy0w + -dx0w;
1649 P[0].y += -dx0h + +dy0h;
1650 C[0] = computeClipFlags(P[0], data);
1652 P[1].x += -dy1w + +dx1w;
1653 P[1].y += -dx1h + +dy1h;
1654 C[1] = computeClipFlags(P[1], data);
1656 P[2].x += +dy1w + +dx1w;
1657 P[2].y += +dx1h + -dy1h;
1658 C[2] = computeClipFlags(P[2], data);
1660 P[3].x += +dy0w + -dx0w;
1661 P[3].y += +dx0h + +dy0h;
1662 C[3] = computeClipFlags(P[3], data);
1664 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1666 Polygon polygon(P, 4);
1668 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1670 if(clipFlagsOr != Clipper::CLIP_FINITE)
1672 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1678 return setupRoutine(&primitive, &triangle, &polygon, &data);
1681 else // Diamond test convention
1695 float dx0 = lineWidth * 0.5f * P0.w / W;
1696 float dy0 = lineWidth * 0.5f * P0.w / H;
1698 float dx1 = lineWidth * 0.5f * P1.w / W;
1699 float dy1 = lineWidth * 0.5f * P1.w / H;
1702 C[0] = computeClipFlags(P[0], data);
1705 C[1] = computeClipFlags(P[1], data);
1708 C[2] = computeClipFlags(P[2], data);
1711 C[3] = computeClipFlags(P[3], data);
1714 C[4] = computeClipFlags(P[4], data);
1717 C[5] = computeClipFlags(P[5], data);
1720 C[6] = computeClipFlags(P[6], data);
1723 C[7] = computeClipFlags(P[7], data);
1725 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1731 if(dx > dy) // Right
1772 Polygon polygon(L, 6);
1774 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1776 if(clipFlagsOr != Clipper::CLIP_FINITE)
1778 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1784 return setupRoutine(&primitive, &triangle, &polygon, &data);
1791 bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1793 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1794 const SetupProcessor::State &state = draw.setupState;
1795 const DrawData &data = *draw.data;
1797 Vertex &v = triangle.v0;
1801 int pts = state.pointSizeRegister;
1803 if(state.pointSizeRegister != 0xF)
1809 pSize = data.point.pointSize[0];
1812 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1817 int pos = state.positionRegister;
1824 const float X = pSize * P[0].w * data.halfPixelX[0];
1825 const float Y = pSize * P[0].w * data.halfPixelY[0];
1829 C[0] = computeClipFlags(P[0], data);
1833 C[1] = computeClipFlags(P[1], data);
1837 C[2] = computeClipFlags(P[2], data);
1841 C[3] = computeClipFlags(P[3], data);
1843 triangle.v1 = triangle.v0;
1844 triangle.v2 = triangle.v0;
1846 triangle.v1.X += iround(16 * 0.5f * pSize);
1847 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1849 Polygon polygon(P, 4);
1851 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1853 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1855 if(clipFlagsOr != Clipper::CLIP_FINITE)
1857 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1863 return setupRoutine(&primitive, &triangle, &polygon, &data);
1869 unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1871 return ((v.x > v.w) << 0) |
1872 ((v.y > v.w) << 1) |
1873 ((v.z > v.w) << 2) |
1874 ((v.x < -v.w) << 3) |
1875 ((v.y < -v.w) << 4) |
1877 Clipper::CLIP_FINITE; // FIXME: xyz finite
1880 void Renderer::initializeThreads()
1882 unitCount = ceilPow2(threadCount);
1883 clusterCount = ceilPow2(threadCount);
1885 for(int i = 0; i < unitCount; i++)
1887 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1888 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1891 for(int i = 0; i < threadCount; i++)
1893 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1894 vertexTask[i]->vertexCache.drawCall = -1;
1896 task[i].type = Task::SUSPEND;
1898 resume[i] = new Event();
1899 suspend[i] = new Event();
1901 Parameters parameters;
1902 parameters.threadIndex = i;
1903 parameters.renderer = this;
1905 exitThreads = false;
1906 worker[i] = new Thread(threadFunction, ¶meters);
1909 suspend[i]->signal();
1913 void Renderer::terminateThreads()
1915 while(threadsAwake != 0)
1920 for(int thread = 0; thread < threadCount; thread++)
1925 resume[thread]->signal();
1926 worker[thread]->join();
1928 delete worker[thread];
1930 delete resume[thread];
1932 delete suspend[thread];
1933 suspend[thread] = 0;
1936 deallocate(vertexTask[thread]);
1937 vertexTask[thread] = 0;
1940 for(int i = 0; i < 16; i++)
1942 deallocate(triangleBatch[i]);
1943 triangleBatch[i] = 0;
1945 deallocate(primitiveBatch[i]);
1946 primitiveBatch[i] = 0;
1950 void Renderer::loadConstants(const VertexShader *vertexShader)
1952 if(!vertexShader) return;
1954 size_t count = vertexShader->getLength();
1956 for(size_t i = 0; i < count; i++)
1958 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1960 if(instruction->opcode == Shader::OPCODE_DEF)
1962 int index = instruction->dst.index;
1965 value[0] = instruction->src[0].value[0];
1966 value[1] = instruction->src[0].value[1];
1967 value[2] = instruction->src[0].value[2];
1968 value[3] = instruction->src[0].value[3];
1970 setVertexShaderConstantF(index, value);
1972 else if(instruction->opcode == Shader::OPCODE_DEFI)
1974 int index = instruction->dst.index;
1977 integer[0] = instruction->src[0].integer[0];
1978 integer[1] = instruction->src[0].integer[1];
1979 integer[2] = instruction->src[0].integer[2];
1980 integer[3] = instruction->src[0].integer[3];
1982 setVertexShaderConstantI(index, integer);
1984 else if(instruction->opcode == Shader::OPCODE_DEFB)
1986 int index = instruction->dst.index;
1987 int boolean = instruction->src[0].boolean[0];
1989 setVertexShaderConstantB(index, &boolean);
1994 void Renderer::loadConstants(const PixelShader *pixelShader)
1996 if(!pixelShader) return;
1998 size_t count = pixelShader->getLength();
2000 for(size_t i = 0; i < count; i++)
2002 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2004 if(instruction->opcode == Shader::OPCODE_DEF)
2006 int index = instruction->dst.index;
2009 value[0] = instruction->src[0].value[0];
2010 value[1] = instruction->src[0].value[1];
2011 value[2] = instruction->src[0].value[2];
2012 value[3] = instruction->src[0].value[3];
2014 setPixelShaderConstantF(index, value);
2016 else if(instruction->opcode == Shader::OPCODE_DEFI)
2018 int index = instruction->dst.index;
2021 integer[0] = instruction->src[0].integer[0];
2022 integer[1] = instruction->src[0].integer[1];
2023 integer[2] = instruction->src[0].integer[2];
2024 integer[3] = instruction->src[0].integer[3];
2026 setPixelShaderConstantI(index, integer);
2028 else if(instruction->opcode == Shader::OPCODE_DEFB)
2030 int index = instruction->dst.index;
2031 int boolean = instruction->src[0].boolean[0];
2033 setPixelShaderConstantB(index, &boolean);
2038 void Renderer::setIndexBuffer(Resource *indexBuffer)
2040 context->indexBuffer = indexBuffer;
2043 void Renderer::setMultiSampleMask(unsigned int mask)
2045 context->sampleMask = mask;
2048 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2050 sw::transparencyAntialiasing = transparencyAntialiasing;
2053 bool Renderer::isReadWriteTexture(int sampler)
2055 for(int index = 0; index < 4; index++)
2057 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2063 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2071 void Renderer::updateClipper()
2073 if(updateClipPlanes)
2075 if(VertexProcessor::isFixedFunction()) // User plane in world space
2077 const Matrix &scissorWorld = getViewTransform();
2079 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2080 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2081 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2082 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2083 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2084 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2086 else // User plane in clip space
2088 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2089 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2090 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2091 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2092 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2093 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2096 updateClipPlanes = false;
2100 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2102 ASSERT(sampler < TOTAL_IMAGE_UNITS);
2104 context->texture[sampler] = resource;
2107 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2109 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2111 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2114 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2116 if(type == SAMPLER_PIXEL)
2118 PixelProcessor::setTextureFilter(sampler, textureFilter);
2122 VertexProcessor::setTextureFilter(sampler, textureFilter);
2126 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2128 if(type == SAMPLER_PIXEL)
2130 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2134 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2138 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2140 if(type == SAMPLER_PIXEL)
2142 PixelProcessor::setGatherEnable(sampler, enable);
2146 VertexProcessor::setGatherEnable(sampler, enable);
2150 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2152 if(type == SAMPLER_PIXEL)
2154 PixelProcessor::setAddressingModeU(sampler, addressMode);
2158 VertexProcessor::setAddressingModeU(sampler, addressMode);
2162 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2164 if(type == SAMPLER_PIXEL)
2166 PixelProcessor::setAddressingModeV(sampler, addressMode);
2170 VertexProcessor::setAddressingModeV(sampler, addressMode);
2174 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2176 if(type == SAMPLER_PIXEL)
2178 PixelProcessor::setAddressingModeW(sampler, addressMode);
2182 VertexProcessor::setAddressingModeW(sampler, addressMode);
2186 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2188 if(type == SAMPLER_PIXEL)
2190 PixelProcessor::setReadSRGB(sampler, sRGB);
2194 VertexProcessor::setReadSRGB(sampler, sRGB);
2198 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2200 if(type == SAMPLER_PIXEL)
2202 PixelProcessor::setMipmapLOD(sampler, bias);
2206 VertexProcessor::setMipmapLOD(sampler, bias);
2210 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2212 if(type == SAMPLER_PIXEL)
2214 PixelProcessor::setBorderColor(sampler, borderColor);
2218 VertexProcessor::setBorderColor(sampler, borderColor);
2222 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2224 if(type == SAMPLER_PIXEL)
2226 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2230 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2234 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2236 context->setPointSpriteEnable(pointSpriteEnable);
2239 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2241 context->setPointScaleEnable(pointScaleEnable);
2244 void Renderer::setLineWidth(float width)
2246 context->lineWidth = width;
2249 void Renderer::setDepthBias(float bias)
2254 void Renderer::setSlopeDepthBias(float slopeBias)
2256 slopeDepthBias = slopeBias;
2259 void Renderer::setPixelShader(const PixelShader *shader)
2261 context->pixelShader = shader;
2263 loadConstants(shader);
2266 void Renderer::setVertexShader(const VertexShader *shader)
2268 context->vertexShader = shader;
2270 loadConstants(shader);
2273 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2275 for(int i = 0; i < DRAW_COUNT; i++)
2277 if(drawCall[i]->psDirtyConstF < index + count)
2279 drawCall[i]->psDirtyConstF = index + count;
2283 for(int i = 0; i < count; i++)
2285 PixelProcessor::setFloatConstant(index + i, value);
2290 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2292 for(int i = 0; i < DRAW_COUNT; i++)
2294 if(drawCall[i]->psDirtyConstI < index + count)
2296 drawCall[i]->psDirtyConstI = index + count;
2300 for(int i = 0; i < count; i++)
2302 PixelProcessor::setIntegerConstant(index + i, value);
2307 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2309 for(int i = 0; i < DRAW_COUNT; i++)
2311 if(drawCall[i]->psDirtyConstB < index + count)
2313 drawCall[i]->psDirtyConstB = index + count;
2317 for(int i = 0; i < count; i++)
2319 PixelProcessor::setBooleanConstant(index + i, *boolean);
2324 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2326 for(int i = 0; i < DRAW_COUNT; i++)
2328 if(drawCall[i]->vsDirtyConstF < index + count)
2330 drawCall[i]->vsDirtyConstF = index + count;
2334 for(int i = 0; i < count; i++)
2336 VertexProcessor::setFloatConstant(index + i, value);
2341 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2343 for(int i = 0; i < DRAW_COUNT; i++)
2345 if(drawCall[i]->vsDirtyConstI < index + count)
2347 drawCall[i]->vsDirtyConstI = index + count;
2351 for(int i = 0; i < count; i++)
2353 VertexProcessor::setIntegerConstant(index + i, value);
2358 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2360 for(int i = 0; i < DRAW_COUNT; i++)
2362 if(drawCall[i]->vsDirtyConstB < index + count)
2364 drawCall[i]->vsDirtyConstB = index + count;
2368 for(int i = 0; i < count; i++)
2370 VertexProcessor::setBooleanConstant(index + i, *boolean);
2375 void Renderer::setModelMatrix(const Matrix &M, int i)
2377 VertexProcessor::setModelMatrix(M, i);
2380 void Renderer::setViewMatrix(const Matrix &V)
2382 VertexProcessor::setViewMatrix(V);
2383 updateClipPlanes = true;
2386 void Renderer::setBaseMatrix(const Matrix &B)
2388 VertexProcessor::setBaseMatrix(B);
2389 updateClipPlanes = true;
2392 void Renderer::setProjectionMatrix(const Matrix &P)
2394 VertexProcessor::setProjectionMatrix(P);
2395 updateClipPlanes = true;
2398 void Renderer::addQuery(Query *query)
2400 queries.push_back(query);
2403 void Renderer::removeQuery(Query *query)
2405 queries.remove(query);
2409 int Renderer::getThreadCount()
2414 int64_t Renderer::getVertexTime(int thread)
2416 return vertexTime[thread];
2419 int64_t Renderer::getSetupTime(int thread)
2421 return setupTime[thread];
2424 int64_t Renderer::getPixelTime(int thread)
2426 return pixelTime[thread];
2429 void Renderer::resetTimers()
2431 for(int thread = 0; thread < threadCount; thread++)
2433 vertexTime[thread] = 0;
2434 setupTime[thread] = 0;
2435 pixelTime[thread] = 0;
2440 void Renderer::setViewport(const Viewport &viewport)
2442 this->viewport = viewport;
2445 void Renderer::setScissor(const Rect &scissor)
2447 this->scissor = scissor;
2450 void Renderer::setClipFlags(int flags)
2452 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2455 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2457 if(index < MAX_CLIP_PLANES)
2459 userPlane[index] = plane;
2463 updateClipPlanes = true;
2466 void Renderer::updateConfiguration(bool initialUpdate)
2468 bool newConfiguration = swiftConfig->hasNewConfiguration();
2470 if(newConfiguration || initialUpdate)
2474 SwiftConfig::Configuration configuration = {0};
2475 swiftConfig->getConfiguration(configuration);
2477 precacheVertex = !newConfiguration && configuration.precache;
2478 precacheSetup = !newConfiguration && configuration.precache;
2479 precachePixel = !newConfiguration && configuration.precache;
2481 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2482 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2483 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2485 switch(configuration.textureSampleQuality)
2487 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2488 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2489 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2490 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2493 switch(configuration.mipmapQuality)
2495 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2496 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2497 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2500 setPerspectiveCorrection(configuration.perspectiveCorrection);
2502 switch(configuration.transcendentalPrecision)
2505 logPrecision = APPROXIMATE;
2506 expPrecision = APPROXIMATE;
2507 rcpPrecision = APPROXIMATE;
2508 rsqPrecision = APPROXIMATE;
2511 logPrecision = PARTIAL;
2512 expPrecision = PARTIAL;
2513 rcpPrecision = PARTIAL;
2514 rsqPrecision = PARTIAL;
2517 logPrecision = ACCURATE;
2518 expPrecision = ACCURATE;
2519 rcpPrecision = ACCURATE;
2520 rsqPrecision = ACCURATE;
2523 logPrecision = WHQL;
2524 expPrecision = WHQL;
2525 rcpPrecision = WHQL;
2526 rsqPrecision = WHQL;
2529 logPrecision = IEEE;
2530 expPrecision = IEEE;
2531 rcpPrecision = IEEE;
2532 rsqPrecision = IEEE;
2535 logPrecision = ACCURATE;
2536 expPrecision = ACCURATE;
2537 rcpPrecision = ACCURATE;
2538 rsqPrecision = ACCURATE;
2542 switch(configuration.transparencyAntialiasing)
2544 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2545 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2546 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2549 switch(configuration.threadCount)
2551 case -1: threadCount = CPUID::coreCount(); break;
2552 case 0: threadCount = CPUID::processAffinity(); break;
2553 default: threadCount = configuration.threadCount; break;
2556 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2557 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2558 CPUID::setEnableSSE3(configuration.enableSSE3);
2559 CPUID::setEnableSSE2(configuration.enableSSE2);
2560 CPUID::setEnableSSE(configuration.enableSSE);
2562 for(int pass = 0; pass < 10; pass++)
2564 optimization[pass] = configuration.optimization[pass];
2567 forceWindowed = configuration.forceWindowed;
2568 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2569 postBlendSRGB = configuration.postBlendSRGB;
2570 exactColorRounding = configuration.exactColorRounding;
2571 forceClearRegisters = configuration.forceClearRegisters;
2574 minPrimitives = configuration.minPrimitives;
2575 maxPrimitives = configuration.maxPrimitives;
2579 if(!initialUpdate && !worker[0])
2581 initializeThreads();