1 // SwiftShader Software Renderer
3 // Copyright(c) 2005-2012 TransGaming Inc.
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
12 #include "Renderer.hpp"
14 #include "Clipper.hpp"
16 #include "FrameBuffer.hpp"
18 #include "Surface.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
29 #include "Reactor/Reactor.hpp"
36 bool disableServer = true;
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1]
47 extern bool booleanFaceRegister;
48 extern bool fullPixelPositionRegister;
50 extern bool forceWindowed;
51 extern bool complementaryDepthBuffer;
52 extern bool postBlendSRGB;
53 extern bool exactColorRounding;
54 extern TransparencyAntialiasing transparencyAntialiasing;
55 extern bool forceClearRegisters;
57 extern bool precacheVertex;
58 extern bool precacheSetup;
59 extern bool precachePixel;
66 TranscendentalPrecision logPrecision = ACCURATE;
67 TranscendentalPrecision expPrecision = ACCURATE;
68 TranscendentalPrecision rcpPrecision = ACCURATE;
69 TranscendentalPrecision rsqPrecision = ACCURATE;
70 bool perspectiveCorrection = true;
82 vsDirtyConstF = 256 + 1;
92 data = (DrawData*)allocate(sizeof(DrawData));
93 data->constants = &constants;
103 Renderer::Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
105 sw::halfIntegerCoordinates = halfIntegerCoordinates;
106 sw::symmetricNormalizedDepth = symmetricNormalizedDepth;
107 sw::booleanFaceRegister = booleanFaceRegister;
108 sw::fullPixelPositionRegister = fullPixelPositionRegister;
109 sw::exactColorRounding = exactColorRounding;
111 setRenderTarget(0, 0);
112 clipper = new Clipper();
114 updateViewMatrix = true;
115 updateBaseMatrix = true;
116 updateProjectionMatrix = true;
117 updateClipPlanes = true;
123 for(int i = 0; i < 16; i++)
133 resumeApp = new Event();
141 for(int i = 0; i < 16; i++)
143 triangleBatch[i] = 0;
144 primitiveBatch[i] = 0;
147 for(int draw = 0; draw < DRAW_COUNT; draw++)
149 drawCall[draw] = new DrawCall();
150 drawList[draw] = drawCall[draw];
153 for(int unit = 0; unit < 16; unit++)
155 primitiveProgress[unit].init();
158 for(int cluster = 0; cluster < 16; cluster++)
160 pixelProgress[cluster].init();
165 swiftConfig = new SwiftConfig(disableServer);
166 updateConfiguration(true);
168 sync = new Resource(0);
171 Renderer::~Renderer()
181 for(int draw = 0; draw < DRAW_COUNT; draw++)
183 delete drawCall[draw];
189 void Renderer::blit(Surface *source, const Rect &sRect, Surface *dest, const Rect &dRect, bool filter)
191 blitter.blit(source, sRect, dest, dRect, filter);
194 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
197 if(count < minPrimitives || count > maxPrimitives)
203 context->drawType = drawType;
205 updateConfiguration();
208 int ss = context->getSuperSampleCount();
209 int ms = context->getMultiSampleCount();
211 for(int q = 0; q < ss; q++)
213 int oldMultiSampleMask = context->multiSampleMask;
214 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
216 if(!context->multiSampleMask)
221 sync->lock(sw::PRIVATE);
223 if(update || oldMultiSampleMask != context->multiSampleMask)
225 vertexState = VertexProcessor::update();
226 setupState = SetupProcessor::update();
227 pixelState = PixelProcessor::update();
229 vertexRoutine = VertexProcessor::routine(vertexState);
230 setupRoutine = SetupProcessor::routine(setupState);
231 pixelRoutine = PixelProcessor::routine(pixelState);
234 int batch = batchSize / ms;
236 if(context->isDrawTriangle())
238 switch(context->fillMode)
241 setupPrimitives = setupSolidTriangles;
244 setupPrimitives = setupWireframeTriangle;
248 setupPrimitives = setupVertexTriangle;
251 default: ASSERT(false);
254 else if(context->isDrawLine())
256 setupPrimitives = setupLines;
260 setupPrimitives = setupPoints;
267 for(int i = 0; i < DRAW_COUNT; i++)
269 if(drawCall[i]->references == -1)
272 drawList[nextDraw % DRAW_COUNT] = draw;
285 DrawData *data = draw->data;
287 if(queries.size() != 0)
289 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
291 atomicIncrement(&(*query)->reference);
294 draw->queries = new std::list<Query*>(queries);
297 draw->drawType = drawType;
298 draw->batchSize = batch;
300 vertexRoutine->bind();
301 setupRoutine->bind();
302 pixelRoutine->bind();
304 draw->vertexRoutine = vertexRoutine;
305 draw->setupRoutine = setupRoutine;
306 draw->pixelRoutine = pixelRoutine;
307 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();;
308 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
309 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
310 draw->setupPrimitives = setupPrimitives;
311 draw->setupState = setupState;
313 for(int i = 0; i < 16; i++)
315 draw->vertexStream[i] = context->input[i].resource;
316 data->input[i] = context->input[i].buffer;
317 data->stride[i] = context->input[i].stride;
319 if(draw->vertexStream[i])
321 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
325 if(context->indexBuffer)
327 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
330 draw->indexBuffer = context->indexBuffer;
332 for(int sampler = 0; sampler < 20; sampler++)
334 draw->texture[sampler] = 0;
337 for(int sampler = 0; sampler < 16; sampler++)
339 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
341 draw->texture[sampler] = context->texture[sampler];
342 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
344 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
348 if(context->pixelShader)
350 if(draw->psDirtyConstF)
352 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
353 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
354 draw->psDirtyConstF = 0;
357 if(draw->psDirtyConstI)
359 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
360 draw->psDirtyConstI = 0;
363 if(draw->psDirtyConstB)
365 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
366 draw->psDirtyConstB = 0;
370 if(context->pixelShaderVersion() <= 0x0104)
372 for(int stage = 0; stage < 8; stage++)
374 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
376 data->textureStage[stage] = context->textureStage[stage].uniforms;
382 if(context->vertexShader)
384 if(context->vertexShader->getVersion() >= 0x0300)
386 for(int sampler = 0; sampler < 4; sampler++)
388 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
390 draw->texture[16 + sampler] = context->texture[16 + sampler];
391 draw->texture[16 + sampler]->lock(PUBLIC, PRIVATE);
393 data->mipmap[16 + sampler] = context->sampler[16 + sampler].getTextureData();
398 if(draw->vsDirtyConstF)
400 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
401 draw->vsDirtyConstF = 0;
404 if(draw->vsDirtyConstI)
406 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
407 draw->vsDirtyConstI = 0;
410 if(draw->vsDirtyConstB)
412 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
413 draw->vsDirtyConstB = 0;
420 draw->vsDirtyConstF = 256 + 1;
421 draw->vsDirtyConstI = 16;
422 draw->vsDirtyConstB = 16;
425 if(pixelState.stencilActive)
427 data->stencil[0] = stencil;
428 data->stencil[1] = stencilCCW;
431 if(pixelState.fogActive)
436 if(setupState.isDrawPoint)
441 data->factor = factor;
443 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
445 float ref = (float)context->alphaReference * (1.0f / 255.0f);
446 float margin = sw::min(ref, 1.0f - ref);
450 data->a2c0 = replicate(ref - margin * 0.6f);
451 data->a2c1 = replicate(ref - margin * 0.2f);
452 data->a2c2 = replicate(ref + margin * 0.2f);
453 data->a2c3 = replicate(ref + margin * 0.6f);
457 data->a2c0 = replicate(ref - margin * 0.3f);
458 data->a2c1 = replicate(ref + margin * 0.3f);
463 if(pixelState.occlusionEnabled)
465 for(int cluster = 0; cluster < clusterCount; cluster++)
467 data->occlusion[cluster] = 0;
472 for(int cluster = 0; cluster < clusterCount; cluster++)
474 for(int i = 0; i < PERF_TIMERS; i++)
476 data->cycles[i][cluster] = 0;
483 float W = 0.5f * viewport.width;
484 float H = 0.5f * viewport.height;
485 float X0 = viewport.x0 + W;
486 float Y0 = viewport.y0 + H;
487 float N = viewport.minZ;
488 float F = viewport.maxZ;
491 if(context->isDrawTriangle(false))
496 if(complementaryDepthBuffer)
502 static const float X[5][16] = // Fragment offsets
504 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
505 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
506 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
507 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
508 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples
511 static const float Y[5][16] = // Fragment offsets
513 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample
514 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples
515 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples
516 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples
517 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples
520 int s = sw::log2(ss);
522 data->Wx16 = replicate(W * 16);
523 data->Hx16 = replicate(H * 16);
524 data->X0x16 = replicate(X0 * 16);
525 data->Y0x16 = replicate(Y0 * 16);
526 data->XXXX = replicate(X[s][q] / W);
527 data->YYYY = replicate(Y[s][q] / H);
528 data->halfPixelX = replicate(0.5f / W);
529 data->halfPixelY = replicate(0.5f / H);
530 data->viewportHeight = abs(viewport.height);
531 data->slopeDepthBias = slopeDepthBias;
532 data->depthRange = Z;
534 draw->clipFlags = clipFlags;
538 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
539 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
540 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
541 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
542 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
543 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
549 for(int index = 0; index < 4; index++)
551 draw->renderTarget[index] = context->renderTarget[index];
553 if(draw->renderTarget[index])
555 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
556 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
557 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
561 draw->depthStencil = context->depthStencil;
563 if(draw->depthStencil)
565 data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
566 data->depthPitchB = context->depthStencil->getInternalPitchB();
567 data->depthSliceB = context->depthStencil->getInternalSliceB();
569 data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
570 data->stencilPitchB = context->depthStencil->getStencilPitchB();
571 data->stencilSliceB = context->depthStencil->getStencilSliceB();
577 data->scissorX0 = scissor.x0;
578 data->scissorX1 = scissor.x1;
579 data->scissorY0 = scissor.y0;
580 data->scissorY1 = scissor.y1;
586 draw->references = (count + batch - 1) / batch;
597 task[0].type = Task::RESUME;
604 void Renderer::threadFunction(void *parameters)
606 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
607 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
609 if(logPrecision < IEEE)
611 CPUID::setFlushToZero(true);
612 CPUID::setDenormalsAreZero(true);
615 renderer->threadLoop(threadIndex);
618 void Renderer::threadLoop(int threadIndex)
622 taskLoop(threadIndex);
624 suspend[threadIndex]->signal();
625 resume[threadIndex]->wait();
629 void Renderer::taskLoop(int threadIndex)
631 while(task[threadIndex].type != Task::SUSPEND)
633 scheduleTask(threadIndex);
634 executeTask(threadIndex);
638 void Renderer::findAvailableTasks()
641 for(int cluster = 0; cluster < clusterCount; cluster++)
643 if(!pixelProgress[cluster].executing)
645 for(int unit = 0; unit < unitCount; unit++)
647 if(primitiveProgress[unit].references > 0) // Contains processed primitives
649 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
651 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
653 Task &task = taskQueue[qHead];
654 task.type = Task::PIXELS;
655 task.primitiveUnit = unit;
656 task.pixelCluster = cluster;
658 pixelProgress[cluster].executing = true;
660 // Commit to the task queue
661 qHead = (qHead + 1) % 32;
672 // Find primitive tasks
673 if(currentDraw == nextDraw)
675 return; // No more primitives to process
678 for(int unit = 0; unit < unitCount; unit++)
680 DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
682 if(draw->primitive >= draw->count)
686 if(currentDraw == nextDraw)
688 return; // No more primitives to process
691 draw = drawList[currentDraw % DRAW_COUNT];
694 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
696 int primitive = draw->primitive;
697 int count = draw->count;
698 int batch = draw->batchSize;
700 primitiveProgress[unit].drawCall = currentDraw;
701 primitiveProgress[unit].firstPrimitive = primitive;
702 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
704 draw->primitive += batch;
706 Task &task = taskQueue[qHead];
707 task.type = Task::PRIMITIVES;
708 task.primitiveUnit = unit;
710 primitiveProgress[unit].references = -1;
712 // Commit to the task queue
713 qHead = (qHead + 1) % 32;
719 void Renderer::scheduleTask(int threadIndex)
723 if((int)qSize < threadCount - threadsAwake + 1)
725 findAvailableTasks();
730 task[threadIndex] = taskQueue[(qHead - qSize) % 32];
733 if(threadsAwake != threadCount)
735 int wakeup = qSize - threadsAwake + 1;
737 for(int i = 0; i < threadCount && wakeup > 0; i++)
739 if(task[i].type == Task::SUSPEND)
742 task[i].type = Task::RESUME;
753 task[threadIndex].type = Task::SUSPEND;
761 void Renderer::executeTask(int threadIndex)
764 int64_t startTick = Timer::ticks();
767 switch(task[threadIndex].type)
769 case Task::PRIMITIVES:
771 int unit = task[threadIndex].primitiveUnit;
773 int input = primitiveProgress[unit].firstPrimitive;
774 int count = primitiveProgress[unit].primitiveCount;
775 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
776 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
778 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
781 int64_t time = Timer::ticks();
782 vertexTime[threadIndex] += time - startTick;
786 int visible = setupPrimitives(this, unit, count);
788 primitiveProgress[unit].visible = visible;
789 primitiveProgress[unit].references = clusterCount;
792 setupTime[threadIndex] += Timer::ticks() - startTick;
798 int unit = task[threadIndex].primitiveUnit;
799 int visible = primitiveProgress[unit].visible;
803 int cluster = task[threadIndex].pixelCluster;
804 Primitive *primitive = primitiveBatch[unit];
805 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
806 DrawData *data = draw->data;
807 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
809 pixelRoutine(primitive, visible, cluster, data);
812 finishRendering(task[threadIndex]);
815 pixelTime[threadIndex] += Timer::ticks() - startTick;
828 void Renderer::synchronize()
830 sync->lock(sw::PUBLIC);
834 void Renderer::finishRendering(Task &pixelTask)
836 int unit = pixelTask.primitiveUnit;
837 int cluster = pixelTask.pixelCluster;
839 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
840 DrawData &data = *draw.data;
841 int primitive = primitiveProgress[unit].firstPrimitive;
842 int count = primitiveProgress[unit].primitiveCount;
844 pixelProgress[cluster].processedPrimitives = primitive + count;
846 if(pixelProgress[cluster].processedPrimitives >= draw.count)
848 pixelProgress[cluster].drawCall++;
849 pixelProgress[cluster].processedPrimitives = 0;
852 int ref = atomicDecrement(&primitiveProgress[unit].references);
856 ref = atomicDecrement(&draw.references);
861 for(int cluster = 0; cluster < clusterCount; cluster++)
863 for(int i = 0; i < PERF_TIMERS; i++)
865 profiler.cycles[i] += data.cycles[i][cluster];
872 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
876 for(int cluster = 0; cluster < clusterCount; cluster++)
878 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
881 atomicDecrement(&query->reference);
888 for(int i = 0; i < 4; i++)
890 if(draw.renderTarget[i])
892 draw.renderTarget[i]->unlockInternal();
896 if(draw.depthStencil)
898 draw.depthStencil->unlockInternal();
899 draw.depthStencil->unlockStencil();
902 for(int i = 0; i < 16 + 4; i++)
906 draw.texture[i]->unlock();
910 for(int i = 0; i < 16; i++)
912 if(draw.vertexStream[i])
914 draw.vertexStream[i]->unlock();
920 draw.indexBuffer->unlock();
923 draw.vertexRoutine->unbind();
924 draw.setupRoutine->unbind();
925 draw.pixelRoutine->unbind();
929 draw.references = -1;
934 pixelProgress[cluster].executing = false;
937 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int count, unsigned int loop, int thread)
939 Triangle *triangle = triangleBatch[unit];
940 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
941 DrawData *data = draw->data;
942 VertexTask *task = vertexTask[thread];
944 const void *indices = data->indices;
945 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
947 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
949 task->vertexCache.clear();
950 task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
953 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
955 switch(draw->drawType)
959 unsigned int index = start;
961 for(unsigned int i = 0; i < count; i++)
973 unsigned int index = 2 * start;
975 for(unsigned int i = 0; i < count; i++)
977 batch[i][0] = index + 0;
978 batch[i][1] = index + 1;
979 batch[i][2] = index + 1;
987 unsigned int index = start;
989 for(unsigned int i = 0; i < count; i++)
991 batch[i][0] = index + 0;
992 batch[i][1] = index + 1;
993 batch[i][2] = index + 1;
1001 unsigned int index = start;
1003 for(unsigned int i = 0; i < count; i++)
1005 batch[i][0] = (index + 0) % loop;
1006 batch[i][1] = (index + 1) % loop;
1007 batch[i][2] = (index + 1) % loop;
1013 case DRAW_TRIANGLELIST:
1015 unsigned int index = 3 * start;
1017 for(unsigned int i = 0; i < count; i++)
1019 batch[i][0] = index + 0;
1020 batch[i][1] = index + 1;
1021 batch[i][2] = index + 2;
1027 case DRAW_TRIANGLESTRIP:
1029 unsigned int index = start;
1031 for(unsigned int i = 0; i < count; i++)
1033 batch[i][0] = index + 0;
1034 batch[i][1] = index + (index & 1) + 1;
1035 batch[i][2] = index + (~index & 1) + 1;
1041 case DRAW_TRIANGLEFAN:
1043 unsigned int index = start;
1045 for(unsigned int i = 0; i < count; i++)
1047 batch[i][0] = index + 1;
1048 batch[i][1] = index + 2;
1055 case DRAW_INDEXEDPOINTLIST8:
1057 const unsigned char *index = (const unsigned char*)indices + start;
1059 for(unsigned int i = 0; i < count; i++)
1061 batch[i][0] = *index;
1062 batch[i][1] = *index;
1063 batch[i][2] = *index;
1069 case DRAW_INDEXEDPOINTLIST16:
1071 const unsigned short *index = (const unsigned short*)indices + start;
1073 for(unsigned int i = 0; i < count; i++)
1075 batch[i][0] = *index;
1076 batch[i][1] = *index;
1077 batch[i][2] = *index;
1083 case DRAW_INDEXEDPOINTLIST32:
1085 const unsigned int *index = (const unsigned int*)indices + start;
1087 for(unsigned int i = 0; i < count; i++)
1089 batch[i][0] = *index;
1090 batch[i][1] = *index;
1091 batch[i][2] = *index;
1097 case DRAW_INDEXEDLINELIST8:
1099 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1101 for(unsigned int i = 0; i < count; i++)
1103 batch[i][0] = index[0];
1104 batch[i][1] = index[1];
1105 batch[i][2] = index[1];
1111 case DRAW_INDEXEDLINELIST16:
1113 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1115 for(unsigned int i = 0; i < count; i++)
1117 batch[i][0] = index[0];
1118 batch[i][1] = index[1];
1119 batch[i][2] = index[1];
1125 case DRAW_INDEXEDLINELIST32:
1127 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1129 for(unsigned int i = 0; i < count; i++)
1131 batch[i][0] = index[0];
1132 batch[i][1] = index[1];
1133 batch[i][2] = index[1];
1139 case DRAW_INDEXEDLINESTRIP8:
1141 const unsigned char *index = (const unsigned char*)indices + start;
1143 for(unsigned int i = 0; i < count; i++)
1145 batch[i][0] = index[0];
1146 batch[i][1] = index[1];
1147 batch[i][2] = index[1];
1153 case DRAW_INDEXEDLINESTRIP16:
1155 const unsigned short *index = (const unsigned short*)indices + start;
1157 for(unsigned int i = 0; i < count; i++)
1159 batch[i][0] = index[0];
1160 batch[i][1] = index[1];
1161 batch[i][2] = index[1];
1167 case DRAW_INDEXEDLINESTRIP32:
1169 const unsigned int *index = (const unsigned int*)indices + start;
1171 for(unsigned int i = 0; i < count; i++)
1173 batch[i][0] = index[0];
1174 batch[i][1] = index[1];
1175 batch[i][2] = index[1];
1181 case DRAW_INDEXEDLINELOOP8:
1183 const unsigned char *index = (const unsigned char*)indices;
1185 for(unsigned int i = 0; i < count; i++)
1187 batch[i][0] = index[(start + i + 0) % loop];
1188 batch[i][1] = index[(start + i + 1) % loop];
1189 batch[i][2] = index[(start + i + 1) % loop];
1193 case DRAW_INDEXEDLINELOOP16:
1195 const unsigned short *index = (const unsigned short*)indices;
1197 for(unsigned int i = 0; i < count; i++)
1199 batch[i][0] = index[(start + i + 0) % loop];
1200 batch[i][1] = index[(start + i + 1) % loop];
1201 batch[i][2] = index[(start + i + 1) % loop];
1205 case DRAW_INDEXEDLINELOOP32:
1207 const unsigned int *index = (const unsigned int*)indices;
1209 for(unsigned int i = 0; i < count; i++)
1211 batch[i][0] = index[(start + i + 0) % loop];
1212 batch[i][1] = index[(start + i + 1) % loop];
1213 batch[i][2] = index[(start + i + 1) % loop];
1217 case DRAW_INDEXEDTRIANGLELIST8:
1219 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1221 for(unsigned int i = 0; i < count; i++)
1223 batch[i][0] = index[0];
1224 batch[i][1] = index[1];
1225 batch[i][2] = index[2];
1231 case DRAW_INDEXEDTRIANGLELIST16:
1233 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1235 for(unsigned int i = 0; i < count; i++)
1237 batch[i][0] = index[0];
1238 batch[i][1] = index[1];
1239 batch[i][2] = index[2];
1245 case DRAW_INDEXEDTRIANGLELIST32:
1247 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1249 for(unsigned int i = 0; i < count; i++)
1251 batch[i][0] = index[0];
1252 batch[i][1] = index[1];
1253 batch[i][2] = index[2];
1259 case DRAW_INDEXEDTRIANGLESTRIP8:
1261 const unsigned char *index = (const unsigned char*)indices + start;
1263 for(unsigned int i = 0; i < count; i++)
1265 batch[i][0] = index[0];
1266 batch[i][1] = index[((start + i) & 1) + 1];
1267 batch[i][2] = index[(~(start + i) & 1) + 1];
1273 case DRAW_INDEXEDTRIANGLESTRIP16:
1275 const unsigned short *index = (const unsigned short*)indices + start;
1277 for(unsigned int i = 0; i < count; i++)
1279 batch[i][0] = index[0];
1280 batch[i][1] = index[((start + i) & 1) + 1];
1281 batch[i][2] = index[(~(start + i) & 1) + 1];
1287 case DRAW_INDEXEDTRIANGLESTRIP32:
1289 const unsigned int *index = (const unsigned int*)indices + start;
1291 for(unsigned int i = 0; i < count; i++)
1293 batch[i][0] = index[0];
1294 batch[i][1] = index[((start + i) & 1) + 1];
1295 batch[i][2] = index[(~(start + i) & 1) + 1];
1301 case DRAW_INDEXEDTRIANGLEFAN8:
1303 const unsigned char *index = (const unsigned char*)indices;
1305 for(unsigned int i = 0; i < count; i++)
1307 batch[i][0] = index[start + i + 1];
1308 batch[i][1] = index[start + i + 2];
1309 batch[i][2] = index[0];
1313 case DRAW_INDEXEDTRIANGLEFAN16:
1315 const unsigned short *index = (const unsigned short*)indices;
1317 for(unsigned int i = 0; i < count; i++)
1319 batch[i][0] = index[start + i + 1];
1320 batch[i][1] = index[start + i + 2];
1321 batch[i][2] = index[0];
1325 case DRAW_INDEXEDTRIANGLEFAN32:
1327 const unsigned int *index = (const unsigned int*)indices;
1329 for(unsigned int i = 0; i < count; i++)
1331 batch[i][0] = index[start + i + 1];
1332 batch[i][1] = index[start + i + 2];
1333 batch[i][2] = index[0];
1341 task->count = count * 3;
1342 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1345 int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1347 Triangle *triangle = renderer->triangleBatch[unit];
1348 Primitive *primitive = renderer->primitiveBatch[unit];
1350 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1351 SetupProcessor::State &state = draw.setupState;
1352 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1354 int ms = state.multiSample;
1355 int pos = state.positionRegister;
1356 const DrawData *data = draw.data;
1359 for(int i = 0; i < count; i++, triangle++)
1361 Vertex &v0 = triangle->v0;
1362 Vertex &v1 = triangle->v1;
1363 Vertex &v2 = triangle->v2;
1365 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1367 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1369 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1371 if(clipFlagsOr != Clipper::CLIP_FINITE)
1373 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1379 if(setupRoutine(primitive, triangle, &polygon, data))
1390 int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1392 Triangle *triangle = renderer->triangleBatch[unit];
1393 Primitive *primitive = renderer->primitiveBatch[unit];
1396 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1397 SetupProcessor::State &state = draw.setupState;
1398 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1400 const Vertex &v0 = triangle[0].v0;
1401 const Vertex &v1 = triangle[0].v1;
1402 const Vertex &v2 = triangle[0].v2;
1404 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1406 if(state.cullMode == CULL_CLOCKWISE)
1408 if(d >= 0) return 0;
1410 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1412 if(d <= 0) return 0;
1416 triangle[1].v0 = v1;
1417 triangle[1].v1 = v2;
1418 triangle[2].v0 = v2;
1419 triangle[2].v1 = v0;
1421 if(state.color[0][0].flat) // FIXME
1423 for(int i = 0; i < 2; i++)
1425 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1426 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1427 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1428 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1432 for(int i = 0; i < 3; i++)
1434 if(setupLine(renderer, *primitive, *triangle, draw))
1436 primitive->area = 0.5f * d;
1448 int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1450 Triangle *triangle = renderer->triangleBatch[unit];
1451 Primitive *primitive = renderer->primitiveBatch[unit];
1454 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1455 SetupProcessor::State &state = draw.setupState;
1457 const Vertex &v0 = triangle[0].v0;
1458 const Vertex &v1 = triangle[0].v1;
1459 const Vertex &v2 = triangle[0].v2;
1461 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1463 if(state.cullMode == CULL_CLOCKWISE)
1465 if(d >= 0) return 0;
1467 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1469 if(d <= 0) return 0;
1473 triangle[1].v0 = v1;
1474 triangle[2].v0 = v2;
1476 for(int i = 0; i < 3; i++)
1478 if(setupPoint(renderer, *primitive, *triangle, draw))
1480 primitive->area = 0.5f * d;
1492 int Renderer::setupLines(Renderer *renderer, int unit, int count)
1494 Triangle *triangle = renderer->triangleBatch[unit];
1495 Primitive *primitive = renderer->primitiveBatch[unit];
1498 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1499 SetupProcessor::State &state = draw.setupState;
1501 int ms = state.multiSample;
1503 for(int i = 0; i < count; i++)
1505 if(setupLine(renderer, *primitive, *triangle, draw))
1517 int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1519 Triangle *triangle = renderer->triangleBatch[unit];
1520 Primitive *primitive = renderer->primitiveBatch[unit];
1523 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1524 SetupProcessor::State &state = draw.setupState;
1526 int ms = state.multiSample;
1528 for(int i = 0; i < count; i++)
1530 if(setupPoint(renderer, *primitive, *triangle, draw))
1542 bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1544 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1545 const SetupProcessor::State &state = draw.setupState;
1546 const DrawData &data = *draw.data;
1548 Vertex &v0 = triangle.v0;
1549 Vertex &v1 = triangle.v1;
1551 int pos = state.positionRegister;
1553 const float4 &P0 = v0.v[pos];
1554 const float4 &P1 = v1.v[pos];
1556 if(P0.w <= 0 && P1.w <= 0)
1561 const float W = data.Wx16[0] * (1.0f / 16.0f);
1562 const float H = data.Hx16[0] * (1.0f / 16.0f);
1564 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1565 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1567 if(dx == 0 && dy == 0)
1572 if(false) // Rectangle
1582 float scale = 0.5f / sqrt(dx*dx + dy*dy);
1587 float dx0w = dx * P0.w / W;
1588 float dy0h = dy * P0.w / H;
1589 float dx0h = dx * P0.w / H;
1590 float dy0w = dy * P0.w / W;
1592 float dx1w = dx * P1.w / W;
1593 float dy1h = dy * P1.w / H;
1594 float dx1h = dx * P1.w / H;
1595 float dy1w = dy * P1.w / W;
1597 P[0].x += -dy0w + -dx0w;
1598 P[0].y += -dx0h + +dy0h;
1599 C[0] = computeClipFlags(P[0], data);
1601 P[1].x += -dy1w + +dx1w;
1602 P[1].y += -dx1h + +dy1h;
1603 C[1] = computeClipFlags(P[1], data);
1605 P[2].x += +dy1w + +dx1w;
1606 P[2].y += +dx1h + -dy1h;
1607 C[2] = computeClipFlags(P[2], data);
1609 P[3].x += +dy0w + -dx0w;
1610 P[3].y += +dx0h + +dy0h;
1611 C[3] = computeClipFlags(P[3], data);
1613 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1615 Polygon polygon(P, 4);
1617 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1619 if(clipFlagsOr != Clipper::CLIP_FINITE)
1621 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1627 return setupRoutine(&primitive, &triangle, &polygon, &data);
1630 else // Diamond test convention
1644 float dx0 = 0.5f * P0.w / W;
1645 float dy0 = 0.5f * P0.w / H;
1647 float dx1 = 0.5f * P1.w / W;
1648 float dy1 = 0.5f * P1.w / H;
1651 C[0] = computeClipFlags(P[0], data);
1654 C[1] = computeClipFlags(P[1], data);
1657 C[2] = computeClipFlags(P[2], data);
1660 C[3] = computeClipFlags(P[3], data);
1663 C[4] = computeClipFlags(P[4], data);
1666 C[5] = computeClipFlags(P[5], data);
1669 C[6] = computeClipFlags(P[6], data);
1672 C[7] = computeClipFlags(P[7], data);
1674 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1680 if(dx > dy) // Right
1721 Polygon polygon(L, 6);
1723 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1725 if(clipFlagsOr != Clipper::CLIP_FINITE)
1727 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1733 return setupRoutine(&primitive, &triangle, &polygon, &data);
1740 bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1742 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1743 const SetupProcessor::State &state = draw.setupState;
1744 const DrawData &data = *draw.data;
1746 Vertex &v = triangle.v0;
1750 int pts = state.pointSizeRegister;
1752 if(state.pointSizeRegister != 0xF)
1758 pSize = data.point.pointSize[0];
1761 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1766 int pos = state.positionRegister;
1773 const float X = pSize * P[0].w * data.halfPixelX[0];
1774 const float Y = pSize * P[0].w * data.halfPixelY[0];
1778 C[0] = computeClipFlags(P[0], data);
1782 C[1] = computeClipFlags(P[1], data);
1786 C[2] = computeClipFlags(P[2], data);
1790 C[3] = computeClipFlags(P[3], data);
1792 triangle.v1 = triangle.v0;
1793 triangle.v2 = triangle.v0;
1795 triangle.v1.X += iround(16 * 0.5f * pSize);
1796 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1798 Polygon polygon(P, 4);
1800 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1802 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1804 if(clipFlagsOr != Clipper::CLIP_FINITE)
1806 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1812 return setupRoutine(&primitive, &triangle, &polygon, &data);
1818 unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1820 float clX = v.x + data.halfPixelX[0] * v.w;
1821 float clY = v.y + data.halfPixelY[0] * v.w;
1823 return ((clX > v.w) << 0) |
1824 ((clY > v.w) << 1) |
1825 ((v.z > v.w) << 2) |
1826 ((clX < -v.w) << 3) |
1827 ((clY < -v.w) << 4) |
1829 Clipper::CLIP_FINITE; // FIXME: xyz finite
1832 void Renderer::initializeThreads()
1834 unitCount = ceilPow2(threadCount);
1835 clusterCount = ceilPow2(threadCount);
1837 for(int i = 0; i < unitCount; i++)
1839 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1840 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1843 for(int i = 0; i < threadCount; i++)
1845 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1846 vertexTask[i]->vertexCache.drawCall = -1;
1848 task[i].type = Task::SUSPEND;
1850 resume[i] = new Event();
1851 suspend[i] = new Event();
1853 Parameters parameters;
1854 parameters.threadIndex = i;
1855 parameters.renderer = this;
1857 exitThreads = false;
1858 worker[i] = new Thread(threadFunction, ¶meters);
1861 suspend[i]->signal();
1865 void Renderer::terminateThreads()
1867 while(threadsAwake != 0)
1872 for(int thread = 0; thread < threadCount; thread++)
1877 resume[thread]->signal();
1878 worker[thread]->join();
1880 delete worker[thread];
1882 delete resume[thread];
1884 delete suspend[thread];
1885 suspend[thread] = 0;
1888 deallocate(vertexTask[thread]);
1889 vertexTask[thread] = 0;
1892 for(int i = 0; i < 16; i++)
1894 deallocate(triangleBatch[i]);
1895 triangleBatch[i] = 0;
1897 deallocate(primitiveBatch[i]);
1898 primitiveBatch[i] = 0;
1902 void Renderer::loadConstants(const VertexShader *vertexShader)
1904 if(!vertexShader) return;
1906 size_t count = vertexShader->getLength();
1908 for(size_t i = 0; i < count; i++)
1910 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1912 if(instruction->opcode == Shader::OPCODE_DEF)
1914 int index = instruction->dst.index;
1917 value[0] = instruction->src[0].value[0];
1918 value[1] = instruction->src[0].value[1];
1919 value[2] = instruction->src[0].value[2];
1920 value[3] = instruction->src[0].value[3];
1922 setVertexShaderConstantF(index, value);
1924 else if(instruction->opcode == Shader::OPCODE_DEFI)
1926 int index = instruction->dst.index;
1929 integer[0] = instruction->src[0].integer[0];
1930 integer[1] = instruction->src[0].integer[1];
1931 integer[2] = instruction->src[0].integer[2];
1932 integer[3] = instruction->src[0].integer[3];
1934 setVertexShaderConstantI(index, integer);
1936 else if(instruction->opcode == Shader::OPCODE_DEFB)
1938 int index = instruction->dst.index;
1939 int boolean = instruction->src[0].boolean[0];
1941 setVertexShaderConstantB(index, &boolean);
1946 void Renderer::loadConstants(const PixelShader *pixelShader)
1948 if(!pixelShader) return;
1950 size_t count = pixelShader->getLength();
1952 for(size_t i = 0; i < count; i++)
1954 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1956 if(instruction->opcode == Shader::OPCODE_DEF)
1958 int index = instruction->dst.index;
1961 value[0] = instruction->src[0].value[0];
1962 value[1] = instruction->src[0].value[1];
1963 value[2] = instruction->src[0].value[2];
1964 value[3] = instruction->src[0].value[3];
1966 setPixelShaderConstantF(index, value);
1968 else if(instruction->opcode == Shader::OPCODE_DEFI)
1970 int index = instruction->dst.index;
1973 integer[0] = instruction->src[0].integer[0];
1974 integer[1] = instruction->src[0].integer[1];
1975 integer[2] = instruction->src[0].integer[2];
1976 integer[3] = instruction->src[0].integer[3];
1978 setPixelShaderConstantI(index, integer);
1980 else if(instruction->opcode == Shader::OPCODE_DEFB)
1982 int index = instruction->dst.index;
1983 int boolean = instruction->src[0].boolean[0];
1985 setPixelShaderConstantB(index, &boolean);
1990 void Renderer::setIndexBuffer(Resource *indexBuffer)
1992 context->indexBuffer = indexBuffer;
1995 void Renderer::setMultiSampleMask(unsigned int mask)
1997 context->sampleMask = mask;
2000 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2002 sw::transparencyAntialiasing = transparencyAntialiasing;
2005 bool Renderer::isReadWriteTexture(int sampler)
2007 for(int index = 0; index < 4; index++)
2009 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2015 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2023 void Renderer::updateClipper()
2025 if(updateClipPlanes)
2027 if(VertexProcessor::isFixedFunction()) // User plane in world space
2029 const Matrix &scissorWorld = getViewTransform();
2031 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2032 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2033 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2034 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2035 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2036 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2038 else // User plane in clip space
2040 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2041 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2042 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2043 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2044 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2045 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2048 updateClipPlanes = false;
2052 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2054 ASSERT(sampler < (16 + 4));
2056 context->texture[sampler] = resource;
2059 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2061 ASSERT(sampler < (16 + 4) && face < 6 && level < MIPMAP_LEVELS);
2063 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2066 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2068 if(type == SAMPLER_PIXEL)
2070 PixelProcessor::setTextureFilter(sampler, textureFilter);
2074 VertexProcessor::setTextureFilter(sampler, textureFilter);
2078 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2080 if(type == SAMPLER_PIXEL)
2082 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2086 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2090 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2092 if(type == SAMPLER_PIXEL)
2094 PixelProcessor::setGatherEnable(sampler, enable);
2098 VertexProcessor::setGatherEnable(sampler, enable);
2102 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2104 if(type == SAMPLER_PIXEL)
2106 PixelProcessor::setAddressingModeU(sampler, addressMode);
2110 VertexProcessor::setAddressingModeU(sampler, addressMode);
2114 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2116 if(type == SAMPLER_PIXEL)
2118 PixelProcessor::setAddressingModeV(sampler, addressMode);
2122 VertexProcessor::setAddressingModeV(sampler, addressMode);
2126 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2128 if(type == SAMPLER_PIXEL)
2130 PixelProcessor::setAddressingModeW(sampler, addressMode);
2134 VertexProcessor::setAddressingModeW(sampler, addressMode);
2138 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2140 if(type == SAMPLER_PIXEL)
2142 PixelProcessor::setReadSRGB(sampler, sRGB);
2146 VertexProcessor::setReadSRGB(sampler, sRGB);
2150 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2152 if(type == SAMPLER_PIXEL)
2154 PixelProcessor::setMipmapLOD(sampler, bias);
2158 VertexProcessor::setMipmapLOD(sampler, bias);
2162 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2164 if(type == SAMPLER_PIXEL)
2166 PixelProcessor::setBorderColor(sampler, borderColor);
2170 VertexProcessor::setBorderColor(sampler, borderColor);
2174 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2176 if(type == SAMPLER_PIXEL)
2178 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2182 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2186 void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2188 context->setPointSpriteEnable(pointSpriteEnable);
2191 void Renderer::setPointScaleEnable(bool pointScaleEnable)
2193 context->setPointScaleEnable(pointScaleEnable);
2196 void Renderer::setDepthBias(float bias)
2201 void Renderer::setSlopeDepthBias(float slopeBias)
2203 slopeDepthBias = slopeBias;
2206 void Renderer::setPixelShader(const PixelShader *shader)
2208 context->pixelShader = shader;
2210 loadConstants(shader);
2213 void Renderer::setVertexShader(const VertexShader *shader)
2215 context->vertexShader = shader;
2217 loadConstants(shader);
2220 void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2222 for(int i = 0; i < DRAW_COUNT; i++)
2224 if(drawCall[i]->psDirtyConstF < index + count)
2226 drawCall[i]->psDirtyConstF = index + count;
2230 for(int i = 0; i < count; i++)
2232 PixelProcessor::setFloatConstant(index + i, value);
2237 void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2239 for(int i = 0; i < DRAW_COUNT; i++)
2241 if(drawCall[i]->psDirtyConstI < index + count)
2243 drawCall[i]->psDirtyConstI = index + count;
2247 for(int i = 0; i < count; i++)
2249 PixelProcessor::setIntegerConstant(index + i, value);
2254 void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2256 for(int i = 0; i < DRAW_COUNT; i++)
2258 if(drawCall[i]->psDirtyConstB < index + count)
2260 drawCall[i]->psDirtyConstB = index + count;
2264 for(int i = 0; i < count; i++)
2266 PixelProcessor::setBooleanConstant(index + i, *boolean);
2271 void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2273 for(int i = 0; i < DRAW_COUNT; i++)
2275 if(drawCall[i]->vsDirtyConstF < index + count)
2277 drawCall[i]->vsDirtyConstF = index + count;
2281 for(int i = 0; i < count; i++)
2283 VertexProcessor::setFloatConstant(index + i, value);
2288 void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2290 for(int i = 0; i < DRAW_COUNT; i++)
2292 if(drawCall[i]->vsDirtyConstI < index + count)
2294 drawCall[i]->vsDirtyConstI = index + count;
2298 for(int i = 0; i < count; i++)
2300 VertexProcessor::setIntegerConstant(index + i, value);
2305 void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2307 for(int i = 0; i < DRAW_COUNT; i++)
2309 if(drawCall[i]->vsDirtyConstB < index + count)
2311 drawCall[i]->vsDirtyConstB = index + count;
2315 for(int i = 0; i < count; i++)
2317 VertexProcessor::setBooleanConstant(index + i, *boolean);
2322 void Renderer::setModelMatrix(const Matrix &M, int i)
2324 VertexProcessor::setModelMatrix(M, i);
2327 void Renderer::setViewMatrix(const Matrix &V)
2329 VertexProcessor::setViewMatrix(V);
2330 updateClipPlanes = true;
2333 void Renderer::setBaseMatrix(const Matrix &B)
2335 VertexProcessor::setBaseMatrix(B);
2336 updateClipPlanes = true;
2339 void Renderer::setProjectionMatrix(const Matrix &P)
2341 VertexProcessor::setProjectionMatrix(P);
2342 updateClipPlanes = true;
2345 void Renderer::addQuery(Query *query)
2347 queries.push_back(query);
2350 void Renderer::removeQuery(Query *query)
2352 queries.remove(query);
2356 int Renderer::getThreadCount()
2361 int64_t Renderer::getVertexTime(int thread)
2363 return vertexTime[thread];
2366 int64_t Renderer::getSetupTime(int thread)
2368 return setupTime[thread];
2371 int64_t Renderer::getPixelTime(int thread)
2373 return pixelTime[thread];
2376 void Renderer::resetTimers()
2378 for(int thread = 0; thread < threadCount; thread++)
2380 vertexTime[thread] = 0;
2381 setupTime[thread] = 0;
2382 pixelTime[thread] = 0;
2387 void Renderer::setViewport(const Viewport &viewport)
2389 this->viewport = viewport;
2392 void Renderer::setScissor(const Rect &scissor)
2394 this->scissor = scissor;
2397 void Renderer::setClipFlags(int flags)
2399 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2402 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2406 userPlane[index] = plane;
2410 updateClipPlanes = true;
2413 void Renderer::updateConfiguration(bool initialUpdate)
2415 bool newConfiguration = swiftConfig->hasNewConfiguration();
2417 if(newConfiguration || initialUpdate)
2421 SwiftConfig::Configuration configuration = {0};
2422 swiftConfig->getConfiguration(configuration);
2424 precacheVertex = !newConfiguration && configuration.precache;
2425 precacheSetup = !newConfiguration && configuration.precache;
2426 precachePixel = !newConfiguration && configuration.precache;
2428 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2429 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2430 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2432 switch(configuration.textureSampleQuality)
2434 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2435 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2436 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2437 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2440 switch(configuration.mipmapQuality)
2442 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2443 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2444 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2447 setPerspectiveCorrection(configuration.perspectiveCorrection);
2449 switch(configuration.transcendentalPrecision)
2452 logPrecision = APPROXIMATE;
2453 expPrecision = APPROXIMATE;
2454 rcpPrecision = APPROXIMATE;
2455 rsqPrecision = APPROXIMATE;
2458 logPrecision = PARTIAL;
2459 expPrecision = PARTIAL;
2460 rcpPrecision = PARTIAL;
2461 rsqPrecision = PARTIAL;
2464 logPrecision = ACCURATE;
2465 expPrecision = ACCURATE;
2466 rcpPrecision = ACCURATE;
2467 rsqPrecision = ACCURATE;
2470 logPrecision = WHQL;
2471 expPrecision = WHQL;
2472 rcpPrecision = WHQL;
2473 rsqPrecision = WHQL;
2476 logPrecision = IEEE;
2477 expPrecision = IEEE;
2478 rcpPrecision = IEEE;
2479 rsqPrecision = IEEE;
2482 logPrecision = ACCURATE;
2483 expPrecision = ACCURATE;
2484 rcpPrecision = ACCURATE;
2485 rsqPrecision = ACCURATE;
2489 switch(configuration.transparencyAntialiasing)
2491 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2492 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2493 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2496 switch(configuration.threadCount)
2498 case -1: threadCount = CPUID::coreCount(); break;
2499 case 0: threadCount = CPUID::processAffinity(); break;
2500 default: threadCount = configuration.threadCount; break;
2503 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2504 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2505 CPUID::setEnableSSE3(configuration.enableSSE3);
2506 CPUID::setEnableSSE2(configuration.enableSSE2);
2507 CPUID::setEnableSSE(configuration.enableSSE);
2509 for(int pass = 0; pass < 10; pass++)
2511 optimization[pass] = configuration.optimization[pass];
2514 forceWindowed = configuration.forceWindowed;
2515 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2516 postBlendSRGB = configuration.postBlendSRGB;
2517 exactColorRounding = configuration.exactColorRounding;
2518 forceClearRegisters = configuration.forceClearRegisters;
2521 minPrimitives = configuration.minPrimitives;
2522 maxPrimitives = configuration.maxPrimitives;
2526 if(!initialUpdate && !worker[0])
2528 initializeThreads();