OSDN Git Service

Defer worker thread creation until the first draw call.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #include <malloc.h>
32 #include <assert.h>
33
34 #undef max
35
36 bool disableServer = true;
37
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42
43 namespace sw
44 {
45         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47         extern bool booleanFaceRegister;
48         extern bool fullPixelPositionRegister;
49
50         extern bool forceWindowed;
51         extern bool complementaryDepthBuffer;
52         extern bool postBlendSRGB;
53         extern bool exactColorRounding;
54         extern Context::TransparencyAntialiasing transparencyAntialiasing;
55         extern bool forceClearRegisters;
56
57         extern bool precacheVertex;
58         extern bool precacheSetup;
59         extern bool precachePixel;
60
61         int batchSize = 128;
62         int threadCount = 1;
63         int unitCount = 1;
64         int clusterCount = 1;
65
66         TranscendentalPrecision logPrecision = ACCURATE;
67         TranscendentalPrecision expPrecision = ACCURATE;
68         TranscendentalPrecision rcpPrecision = ACCURATE;
69         TranscendentalPrecision rsqPrecision = ACCURATE;
70         bool perspectiveCorrection = true;
71
72         struct Parameters
73         {
74                 Renderer *renderer;
75                 int threadIndex;
76         };
77
78         DrawCall::DrawCall()
79         {
80                 queries = 0;
81
82                 vsDirtyConstF = 256 + 1;
83                 vsDirtyConstI = 16;
84                 vsDirtyConstB = 16;
85
86                 psDirtyConstF = 224;
87                 psDirtyConstI = 16;
88                 psDirtyConstB = 16;
89
90                 references = -1;
91
92                 data = (DrawData*)allocate(sizeof(DrawData));
93                 data->constants = &constants;
94         }
95
96         DrawCall::~DrawCall()
97         {
98                 delete queries;
99
100                 deallocate(data);
101         }
102
103         Renderer::Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
104         {
105                 sw::halfIntegerCoordinates = halfIntegerCoordinates;
106                 sw::symmetricNormalizedDepth = symmetricNormalizedDepth;
107                 sw::booleanFaceRegister = booleanFaceRegister;
108                 sw::fullPixelPositionRegister = fullPixelPositionRegister;
109                 sw::exactColorRounding = exactColorRounding;
110
111                 setRenderTarget(0, 0);
112                 clipper = new Clipper();
113
114                 updateViewMatrix = true;
115                 updateBaseMatrix = true;
116                 updateProjectionMatrix = true;
117                 updateClipPlanes = true;
118
119                 #if PERF_HUD
120                         resetTimers();
121                 #endif
122
123                 for(int i = 0; i < 16; i++)
124                 {
125                         vertexTask[i] = 0;
126
127                         worker[i] = 0;
128                         resume[i] = 0;
129                         suspend[i] = 0;
130                 }
131
132                 threadsAwake = 0;
133                 resumeApp = new Event();
134
135                 currentDraw = 0;
136                 nextDraw = 0;
137
138                 qHead = 0;
139                 qSize = 0;
140
141                 for(int i = 0; i < 16; i++)
142                 {
143                         triangleBatch[i] = 0;
144                         primitiveBatch[i] = 0;
145                 }
146
147                 for(int draw = 0; draw < DRAW_COUNT; draw++)
148                 {
149                         drawCall[draw] = new DrawCall();
150                         drawList[draw] = drawCall[draw];
151                 }
152
153                 for(int unit = 0; unit < 16; unit++)
154                 {
155                         primitiveProgress[unit].init();
156                 }
157
158                 for(int cluster = 0; cluster < 16; cluster++)
159                 {
160                         pixelProgress[cluster].init();
161                 }
162
163                 clipFlags = 0;
164
165                 swiftConfig = new SwiftConfig(disableServer);
166                 updateConfiguration(true);
167
168                 sync = new Resource(0);
169         }
170
171         Renderer::~Renderer()
172         {
173                 sync->destruct();
174
175                 delete clipper;
176                 clipper = 0;
177
178                 terminateThreads();
179                 delete resumeApp;
180
181                 for(int draw = 0; draw < DRAW_COUNT; draw++)
182                 {
183                         delete drawCall[draw];
184                 }
185
186                 delete swiftConfig;
187         }
188
189         void Renderer::blit(Surface *source, const Rect &sRect, Surface *dest, const Rect &dRect, bool filter)
190         {
191                 blitter.blit(source, sRect, dest, dRect, filter);
192         }
193
194         void Renderer::draw(Context::DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
195         {
196                 #ifndef NDEBUG
197                         if(count < minPrimitives || count > maxPrimitives)
198                         {
199                                 return;
200                         }
201                 #endif
202
203                 context->drawType = drawType;
204
205                 updateConfiguration();
206                 updateClipper();
207
208                 int ss = context->getSuperSampleCount();
209                 int ms = context->getMultiSampleCount();
210
211                 for(int q = 0; q < ss; q++)
212                 {
213                         int oldMultiSampleMask = context->multiSampleMask;
214                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
215
216                         if(!context->multiSampleMask)
217                         {
218                                 continue;
219                         }
220
221                         sync->lock(sw::PRIVATE);
222
223                         if(update || oldMultiSampleMask != context->multiSampleMask)
224                         {
225                                 vertexState = VertexProcessor::update();
226                                 setupState = SetupProcessor::update();
227                                 pixelState = PixelProcessor::update();
228
229                                 vertexRoutine = VertexProcessor::routine(vertexState);
230                                 setupRoutine = SetupProcessor::routine(setupState);
231                                 pixelRoutine = PixelProcessor::routine(pixelState);
232                         }
233
234                         int batch = batchSize / ms;
235
236                         if(context->isDrawTriangle())
237                         {
238                                 switch(context->fillMode)
239                                 {
240                                 case Context::FILL_SOLID:
241                                         setupPrimitives = setupSolidTriangles;
242                                         break;
243                                 case Context::FILL_WIREFRAME:
244                                         setupPrimitives = setupWireframeTriangle;
245                                         batch = 1;
246                                         break;
247                                 case Context::FILL_VERTEX:
248                                         setupPrimitives = setupVertexTriangle;
249                                         batch = 1;
250                                         break;
251                                 default: ASSERT(false);
252                                 }
253                         }
254                         else if(context->isDrawLine())
255                         {
256                                 setupPrimitives = setupLines;
257                         }
258                         else   // Point draw
259                         {
260                                 setupPrimitives = setupPoints;
261                         }
262
263                         DrawCall *draw = 0;
264
265                         do
266                         {
267                                 for(int i = 0; i < DRAW_COUNT; i++)
268                                 {
269                                         if(drawCall[i]->references == -1)
270                                         {
271                                                 draw = drawCall[i];
272                                                 drawList[nextDraw % DRAW_COUNT] = draw;
273
274                                                 break;
275                                         }
276                                 }
277
278                                 if(!draw)
279                                 {
280                                         resumeApp->wait();
281                                 }
282                         }
283                         while(!draw);
284
285                         DrawData *data = draw->data;
286
287                         if(queries.size() != 0)
288                         {
289                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
290                                 {
291                                         atomicIncrement(&(*query)->reference);
292                                 }
293
294                                 draw->queries = new std::list<Query*>(queries);
295                         }
296
297                         draw->drawType = drawType;
298                         draw->batchSize = batch;
299
300                         vertexRoutine->bind();
301                         setupRoutine->bind();
302                         pixelRoutine->bind();
303
304                         draw->vertexRoutine = vertexRoutine;
305                         draw->setupRoutine = setupRoutine;
306                         draw->pixelRoutine = pixelRoutine;
307                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();;
308                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
309                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
310                         draw->setupPrimitives = setupPrimitives;
311                         draw->setupState = setupState;
312
313                         for(int i = 0; i < 16; i++)
314                         {
315                                 draw->vertexStream[i] = context->input[i].resource;
316                                 data->input[i] = context->input[i].buffer;
317                                 data->stride[i] = context->input[i].stride;
318
319                                 if(draw->vertexStream[i])
320                                 {
321                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
322                                 }
323                         }
324
325                         if(context->indexBuffer)
326                         {
327                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
328                         }
329
330                         draw->indexBuffer = context->indexBuffer;
331
332                         for(int sampler = 0; sampler < 20; sampler++)
333                         {
334                                 draw->texture[sampler] = 0;
335                         }
336
337                         for(int sampler = 0; sampler < 16; sampler++)
338                         {
339                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
340                                 {
341                                         draw->texture[sampler] = context->texture[sampler];
342                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
343
344                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
345                                 }
346                         }
347
348                         if(context->pixelShader)
349                         {
350                                 if(draw->psDirtyConstF)
351                                 {
352                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
353                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
354                                         draw->psDirtyConstF = 0;
355                                 }
356
357                                 if(draw->psDirtyConstI)
358                                 {
359                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
360                                         draw->psDirtyConstI = 0;
361                                 }
362
363                                 if(draw->psDirtyConstB)
364                                 {
365                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
366                                         draw->psDirtyConstB = 0;
367                                 }
368                         }
369                         
370                         if(context->pixelShaderVersion() <= 0x0104)
371                         {
372                                 for(int stage = 0; stage < 8; stage++)
373                                 {
374                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
375                                         {
376                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
377                                         }
378                                         else break;
379                                 }
380                         }
381
382                         if(context->vertexShader)
383                         {
384                                 if(context->vertexShader->getVersion() >= 0x0300)
385                                 {
386                                         for(int sampler = 0; sampler < 4; sampler++)
387                                         {
388                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
389                                                 {
390                                                         draw->texture[16 + sampler] = context->texture[16 + sampler];
391                                                         draw->texture[16 + sampler]->lock(PUBLIC, PRIVATE);
392
393                                                         data->mipmap[16 + sampler] = context->sampler[16 + sampler].getTextureData();
394                                                 }
395                                         }
396                                 }
397
398                                 if(draw->vsDirtyConstF)
399                                 {
400                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
401                                         draw->vsDirtyConstF = 0;
402                                 }
403
404                                 if(draw->vsDirtyConstI)
405                                 {
406                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
407                                         draw->vsDirtyConstI = 0;
408                                 }
409
410                                 if(draw->vsDirtyConstB)
411                                 {
412                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
413                                         draw->vsDirtyConstB = 0;
414                                 }
415                         }
416                         else
417                         {
418                                 data->ff = ff;
419
420                                 draw->vsDirtyConstF = 256 + 1;
421                                 draw->vsDirtyConstI = 16;
422                                 draw->vsDirtyConstB = 16;
423                         }
424
425                         if(pixelState.stencilActive)
426                         {
427                                 data->stencil[0] = stencil;
428                                 data->stencil[1] = stencilCCW;
429                         }
430
431                         if(pixelState.fogActive)
432                         {
433                                 data->fog = fog;
434                         }
435
436                         if(setupState.isDrawPoint)
437                         {
438                                 data->point = point;
439                         }
440
441                         data->factor = factor;
442
443                         if(pixelState.transparencyAntialiasing == Context::TRANSPARENCY_ALPHA_TO_COVERAGE)
444                         {
445                                 float ref = (float)context->alphaReference * (1.0f / 255.0f);
446                                 float margin = sw::min(ref, 1.0f - ref);
447
448                                 if(ms == 4)
449                                 {
450                                         data->a2c0 = replicate(ref - margin * 0.6f);
451                                         data->a2c1 = replicate(ref - margin * 0.2f);
452                                         data->a2c2 = replicate(ref + margin * 0.2f);
453                                         data->a2c3 = replicate(ref + margin * 0.6f);
454                                 }
455                                 else if(ms == 2)
456                                 {
457                                         data->a2c0 = replicate(ref - margin * 0.3f);
458                                         data->a2c1 = replicate(ref + margin * 0.3f);
459                                 }
460                                 else ASSERT(false);
461                         }
462
463                         if(pixelState.occlusionEnabled)
464                         {
465                                 for(int cluster = 0; cluster < clusterCount; cluster++)
466                                 {
467                                         data->occlusion[cluster] = 0;
468                                 }
469                         }
470
471                         #if PERF_PROFILE
472                                 for(int cluster = 0; cluster < clusterCount; cluster++)
473                                 {
474                                         for(int i = 0; i < PERF_TIMERS; i++)
475                                         {
476                                                 data->cycles[i][cluster] = 0;
477                                         }
478                                 }
479                         #endif
480
481                         // Viewport
482                         {
483                                 float W = 0.5f * viewport.width;
484                                 float H = 0.5f * viewport.height;
485                                 float X0 = viewport.x0 + W;
486                                 float Y0 = viewport.y0 + H;
487                                 float N = viewport.minZ;
488                                 float F = viewport.maxZ;
489                                 float Z = F - N;
490
491                                 if(context->isDrawTriangle(false))
492                                 {
493                                         N += depthBias;
494                                 }
495
496                                 if(complementaryDepthBuffer)
497                                 {
498                                         Z = -Z;
499                                         N = 1 - N;
500                                 }
501
502                                 static const float X[5][16] =   // Fragment offsets
503                                 {
504                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
505                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
506                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
507                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
508                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
509                                 };
510
511                                 static const float Y[5][16] =   // Fragment offsets
512                                 {
513                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
514                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
515                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
516                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
517                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
518                                 };
519
520                                 int s = sw::log2(ss);
521
522                                 data->Wx16 = replicate(W * 16);
523                                 data->Hx16 = replicate(H * 16);
524                                 data->X0x16 = replicate(X0 * 16);
525                                 data->Y0x16 = replicate(Y0 * 16);
526                                 data->XXXX = replicate(X[s][q] / W);
527                                 data->YYYY = replicate(Y[s][q] / H);
528                                 data->halfPixelX = replicate(0.5f / W);
529                                 data->halfPixelY = replicate(0.5f / H);
530                                 data->viewportHeight = abs(viewport.height);
531                                 data->slopeDepthBias = slopeDepthBias;
532                                 data->depthRange = Z;
533                                 data->depthNear = N;
534                                 draw->clipFlags = clipFlags;
535
536                                 if(clipFlags)
537                                 {
538                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
539                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
540                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
541                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
542                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
543                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
544                                 }
545                         }
546
547                         // Target
548                         {
549                                 for(int index = 0; index < 4; index++)
550                                 {
551                                         draw->renderTarget[index] = context->renderTarget[index];
552
553                                         if(draw->renderTarget[index])
554                                         {
555                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
556                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
557                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
558                                         }
559                                 }
560
561                                 draw->depthStencil = context->depthStencil;
562
563                                 if(draw->depthStencil)
564                                 {
565                                         data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
566                                         data->depthPitchB = context->depthStencil->getInternalPitchB();
567                                         data->depthSliceB = context->depthStencil->getInternalSliceB();
568
569                                         data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
570                                         data->stencilPitchB = context->depthStencil->getStencilPitchB();
571                                         data->stencilSliceB = context->depthStencil->getStencilSliceB();
572                                 }
573                         }
574
575                         // Scissor
576                         {
577                                 data->scissorX0 = scissor.x0;
578                                 data->scissorX1 = scissor.x1;
579                                 data->scissorY0 = scissor.y0;
580                                 data->scissorY1 = scissor.y1;
581                         }
582
583                         draw->primitive = 0;
584                         draw->count = count;
585
586                         draw->references = (count + batch - 1) / batch;
587
588                         mutex.lock();
589                         nextDraw++;
590                         mutex.unlock();
591
592                         if(!threadsAwake)
593                         {
594                                 suspend[0]->wait();
595
596                                 threadsAwake = 1;
597                                 task[0].type = Task::RESUME;
598
599                                 resume[0]->signal();
600                         }
601                 }
602         }
603
604         void Renderer::threadFunction(void *parameters)
605         {
606                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
607                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
608
609                 if(logPrecision < IEEE)
610                 {
611                         CPUID::setFlushToZero(true);
612                         CPUID::setDenormalsAreZero(true);
613                 }
614
615                 renderer->threadLoop(threadIndex);
616         }
617
618         void Renderer::threadLoop(int threadIndex)
619         {
620                 while(!exitThreads)
621                 {
622                         taskLoop(threadIndex);
623
624                         suspend[threadIndex]->signal();
625                         resume[threadIndex]->wait();
626                 }
627         }
628
629         void Renderer::taskLoop(int threadIndex)
630         {
631                 while(task[threadIndex].type != Task::SUSPEND)
632                 {
633                         scheduleTask(threadIndex);
634                         executeTask(threadIndex);
635                 }
636         }
637
638         void Renderer::findAvailableTasks()
639         {
640                 // Find pixel tasks
641                 for(int cluster = 0; cluster < clusterCount; cluster++)
642                 {
643                         if(!pixelProgress[cluster].executing)
644                         {
645                                 for(int unit = 0; unit < unitCount; unit++)
646                                 {
647                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
648                                         {
649                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
650                                                 {
651                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
652                                                         {
653                                                                 Task &task = taskQueue[qHead];
654                                                                 task.type = Task::PIXELS;
655                                                                 task.primitiveUnit = unit;
656                                                                 task.pixelCluster = cluster;
657
658                                                                 pixelProgress[cluster].executing = true;
659
660                                                                 // Commit to the task queue
661                                                                 qHead = (qHead + 1) % 32;
662                                                                 qSize++;
663
664                                                                 break;
665                                                         }
666                                                 }
667                                         }
668                                 }
669                         }
670                 }
671         
672                 // Find primitive tasks
673                 if(currentDraw == nextDraw)
674                 {
675                         return;   // No more primitives to process
676                 }
677
678                 for(int unit = 0; unit < unitCount; unit++)
679                 {
680                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
681
682                         if(draw->primitive >= draw->count)
683                         {
684                                 currentDraw++;
685
686                                 if(currentDraw == nextDraw)
687                                 {
688                                         return;   // No more primitives to process
689                                 }
690
691                                 draw = drawList[currentDraw % DRAW_COUNT];
692                         }
693
694                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
695                         {
696                                 int primitive = draw->primitive;
697                                 int count = draw->count;
698                                 int batch = draw->batchSize;
699
700                                 primitiveProgress[unit].drawCall = currentDraw;
701                                 primitiveProgress[unit].firstPrimitive = primitive;
702                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
703
704                                 draw->primitive += batch;
705
706                                 Task &task = taskQueue[qHead];
707                                 task.type = Task::PRIMITIVES;
708                                 task.primitiveUnit = unit;
709
710                                 primitiveProgress[unit].references = -1;
711
712                                 // Commit to the task queue
713                                 qHead = (qHead + 1) % 32;
714                                 qSize++;
715                         }
716                 }
717         }
718
719         void Renderer::scheduleTask(int threadIndex)
720         {
721                 mutex.lock();
722
723                 if((int)qSize < threadCount - threadsAwake + 1)
724                 {
725                         findAvailableTasks();
726                 }
727
728                 if(qSize != 0)
729                 {
730                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
731                         qSize--;
732
733                         if(threadsAwake != threadCount)
734                         {
735                                 int wakeup = qSize - threadsAwake + 1;
736
737                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
738                                 {
739                                         if(task[i].type == Task::SUSPEND)
740                                         {
741                                                 suspend[i]->wait();
742                                                 task[i].type = Task::RESUME;
743                                                 resume[i]->signal();
744
745                                                 threadsAwake++;
746                                                 wakeup--;
747                                         }
748                                 }
749                         }
750                 }
751                 else
752                 {
753                         task[threadIndex].type = Task::SUSPEND;
754
755                         threadsAwake--;
756                 }
757
758                 mutex.unlock();
759         }
760
761         void Renderer::executeTask(int threadIndex)
762         {
763                 #if PERF_HUD
764                         int64_t startTick = Timer::ticks();
765                 #endif
766
767                 switch(task[threadIndex].type)
768                 {
769                 case Task::PRIMITIVES:
770                         {
771                                 int unit = task[threadIndex].primitiveUnit;
772                                 
773                                 int input = primitiveProgress[unit].firstPrimitive;
774                                 int count = primitiveProgress[unit].primitiveCount;
775                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
776                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
777
778                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
779
780                                 #if PERF_HUD
781                                         int64_t time = Timer::ticks();
782                                         vertexTime[threadIndex] += time - startTick;
783                                         startTick = time;
784                                 #endif
785
786                                 int visible = setupPrimitives(this, unit, count);
787
788                                 primitiveProgress[unit].visible = visible;
789                                 primitiveProgress[unit].references = clusterCount;
790
791                                 #if PERF_HUD
792                                         setupTime[threadIndex] += Timer::ticks() - startTick;
793                                 #endif
794                         }
795                         break;
796                 case Task::PIXELS:
797                         {
798                                 int unit = task[threadIndex].primitiveUnit;
799                                 int visible = primitiveProgress[unit].visible;
800
801                                 if(visible > 0)
802                                 {
803                                         int cluster = task[threadIndex].pixelCluster;
804                                         Primitive *primitive = primitiveBatch[unit];
805                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
806                                         DrawData *data = draw->data;
807                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
808
809                                         pixelRoutine(primitive, visible, cluster, data);
810                                 }
811
812                                 finishRendering(task[threadIndex]);
813
814                                 #if PERF_HUD
815                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
816                                 #endif
817                         }
818                         break;
819                 case Task::RESUME:
820                         break;
821                 case Task::SUSPEND:
822                         break;
823                 default:
824                         ASSERT(false);
825                 }
826         }
827
828         void Renderer::synchronize()
829         {
830                 sync->lock(sw::PUBLIC);
831                 sync->unlock();
832         }
833
834         void Renderer::finishRendering(Task &pixelTask)
835         {
836                 int unit = pixelTask.primitiveUnit;
837                 int cluster = pixelTask.pixelCluster;
838
839                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
840                 DrawData &data = *draw.data;
841                 int primitive = primitiveProgress[unit].firstPrimitive;
842                 int count = primitiveProgress[unit].primitiveCount;
843
844                 pixelProgress[cluster].processedPrimitives = primitive + count;
845
846                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
847                 {
848                         pixelProgress[cluster].drawCall++;
849                         pixelProgress[cluster].processedPrimitives = 0;
850                 }
851
852                 int ref = atomicDecrement(&primitiveProgress[unit].references);
853
854                 if(ref == 0)
855                 {
856                         ref = atomicDecrement(&draw.references);
857
858                         if(ref == 0)
859                         {
860                                 #if PERF_PROFILE
861                                         for(int cluster = 0; cluster < clusterCount; cluster++)
862                                         {
863                                                 for(int i = 0; i < PERF_TIMERS; i++)
864                                                 {
865                                                         profiler.cycles[i] += data.cycles[i][cluster];
866                                                 }
867                                         }
868                                 #endif
869
870                                 if(draw.queries)
871                                 {
872                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
873                                         {
874                                                 Query *query = *q;
875
876                                                 for(int cluster = 0; cluster < clusterCount; cluster++)
877                                                 {
878                                                         atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
879                                                 }
880
881                                                 atomicDecrement(&query->reference);
882                                         }
883
884                                         delete draw.queries;
885                                         draw.queries = 0;
886                                 }
887
888                                 for(int i = 0; i < 4; i++)
889                                 {
890                                         if(draw.renderTarget[i])
891                                         {
892                                                 draw.renderTarget[i]->unlockInternal();
893                                         }
894                                 }
895
896                                 if(draw.depthStencil)
897                                 {
898                                         draw.depthStencil->unlockInternal();
899                                         draw.depthStencil->unlockStencil();
900                                 }
901
902                                 for(int i = 0; i < 16 + 4; i++)
903                                 {
904                                         if(draw.texture[i])
905                                         {
906                                                 draw.texture[i]->unlock();
907                                         }
908                                 }
909
910                                 for(int i = 0; i < 16; i++)
911                                 {
912                                         if(draw.vertexStream[i])
913                                         {
914                                                 draw.vertexStream[i]->unlock();
915                                         }
916                                 }
917
918                                 if(draw.indexBuffer)
919                                 {
920                                         draw.indexBuffer->unlock();
921                                 }
922
923                                 draw.vertexRoutine->unbind();
924                                 draw.setupRoutine->unbind();
925                                 draw.pixelRoutine->unbind();
926
927                                 sync->unlock();
928
929                                 draw.references = -1;
930                                 resumeApp->signal();
931                         }
932                 }
933
934                 pixelProgress[cluster].executing = false;
935         }
936
937         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int count, unsigned int loop, int thread)
938         {
939                 Triangle *triangle = triangleBatch[unit];
940                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
941                 DrawData *data = draw->data;
942                 VertexTask *task = vertexTask[thread];
943
944                 const void *indices = data->indices;
945                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
946
947                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
948                 {
949                         task->vertexCache.clear();
950                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
951                 }
952
953                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
954
955                 switch(draw->drawType)
956                 {
957                 case Context::DRAW_POINTLIST:
958                         {
959                                 unsigned int index = start;
960
961                                 for(unsigned int i = 0; i < count; i++)
962                                 {
963                                         batch[i][0] = index;
964                                         batch[i][1] = index;
965                                         batch[i][2] = index;
966
967                                         index += 1;
968                                 }
969                         }
970                         break;
971                 case Context::DRAW_LINELIST:
972                         {
973                                 unsigned int index = 2 * start;
974
975                                 for(unsigned int i = 0; i < count; i++)
976                                 {
977                                         batch[i][0] = index + 0;
978                                         batch[i][1] = index + 1;
979                                         batch[i][2] = index + 1;
980
981                                         index += 2;
982                                 }
983                         }
984                         break;
985                 case Context::DRAW_LINESTRIP:
986                         {
987                                 unsigned int index = start;
988
989                                 for(unsigned int i = 0; i < count; i++)
990                                 {
991                                         batch[i][0] = index + 0;
992                                         batch[i][1] = index + 1;
993                                         batch[i][2] = index + 1;
994
995                                         index += 1;
996                                 }
997                         }
998                         break;
999                 case Context::DRAW_LINELOOP:
1000                         {
1001                                 unsigned int index = start;
1002
1003                                 for(unsigned int i = 0; i < count; i++)
1004                                 {
1005                                         batch[i][0] = (index + 0) % loop;
1006                                         batch[i][1] = (index + 1) % loop;
1007                                         batch[i][2] = (index + 1) % loop;
1008
1009                                         index += 1;
1010                                 }
1011                         }
1012                         break;
1013                 case Context::DRAW_TRIANGLELIST:
1014                         {
1015                                 unsigned int index = 3 * start;
1016
1017                                 for(unsigned int i = 0; i < count; i++)
1018                                 {
1019                                         batch[i][0] = index + 0;
1020                                         batch[i][1] = index + 1;
1021                                         batch[i][2] = index + 2;
1022
1023                                         index += 3;
1024                                 }
1025                         }
1026                         break;
1027                 case Context::DRAW_TRIANGLESTRIP:
1028                         {
1029                                 unsigned int index = start;
1030
1031                                 for(unsigned int i = 0; i < count; i++)
1032                                 {
1033                                         batch[i][0] = index + 0;
1034                                         batch[i][1] = index + (index & 1) + 1;
1035                                         batch[i][2] = index + (~index & 1) + 1;
1036
1037                                         index += 1;
1038                                 }
1039                         }
1040                         break;
1041                 case Context::DRAW_TRIANGLEFAN:
1042                         {
1043                                 unsigned int index = start;
1044
1045                                 for(unsigned int i = 0; i < count; i++)
1046                                 {
1047                                         batch[i][0] = index + 1;
1048                                         batch[i][1] = index + 2;
1049                                         batch[i][2] = 0;
1050
1051                                         index += 1;
1052                                 }
1053                         }
1054                         break;
1055                 case Context::DRAW_INDEXEDPOINTLIST8:
1056                         {
1057                                 const unsigned char *index = (const unsigned char*)indices + start;
1058
1059                                 for(unsigned int i = 0; i < count; i++)
1060                                 {
1061                                         batch[i][0] = *index;
1062                                         batch[i][1] = *index;
1063                                         batch[i][2] = *index;
1064
1065                                         index += 1;
1066                                 }
1067                         }
1068                         break;
1069                 case Context::DRAW_INDEXEDPOINTLIST16:
1070                         {
1071                                 const unsigned short *index = (const unsigned short*)indices + start;
1072
1073                                 for(unsigned int i = 0; i < count; i++)
1074                                 {
1075                                         batch[i][0] = *index;
1076                                         batch[i][1] = *index;
1077                                         batch[i][2] = *index;
1078
1079                                         index += 1;
1080                                 }
1081                         }
1082                         break;
1083                 case Context::DRAW_INDEXEDPOINTLIST32:
1084                         {
1085                                 const unsigned int *index = (const unsigned int*)indices + start;
1086
1087                                 for(unsigned int i = 0; i < count; i++)
1088                                 {
1089                                         batch[i][0] = *index;
1090                                         batch[i][1] = *index;
1091                                         batch[i][2] = *index;
1092
1093                                         index += 1;
1094                                 }
1095                         }
1096                         break;
1097                 case Context::DRAW_INDEXEDLINELIST8:
1098                         {
1099                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1100
1101                                 for(unsigned int i = 0; i < count; i++)
1102                                 {
1103                                         batch[i][0] = index[0];
1104                                         batch[i][1] = index[1];
1105                                         batch[i][2] = index[1];
1106
1107                                         index += 2;
1108                                 }
1109                         }
1110                         break;
1111                 case Context::DRAW_INDEXEDLINELIST16:
1112                         {
1113                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1114
1115                                 for(unsigned int i = 0; i < count; i++)
1116                                 {
1117                                         batch[i][0] = index[0];
1118                                         batch[i][1] = index[1];
1119                                         batch[i][2] = index[1];
1120
1121                                         index += 2;
1122                                 }
1123                         }
1124                         break;
1125                 case Context::DRAW_INDEXEDLINELIST32:
1126                         {
1127                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1128
1129                                 for(unsigned int i = 0; i < count; i++)
1130                                 {
1131                                         batch[i][0] = index[0];
1132                                         batch[i][1] = index[1];
1133                                         batch[i][2] = index[1];
1134
1135                                         index += 2;
1136                                 }
1137                         }
1138                         break;
1139                 case Context::DRAW_INDEXEDLINESTRIP8:
1140                         {
1141                                 const unsigned char *index = (const unsigned char*)indices + start;
1142
1143                                 for(unsigned int i = 0; i < count; i++)
1144                                 {
1145                                         batch[i][0] = index[0];
1146                                         batch[i][1] = index[1];
1147                                         batch[i][2] = index[1];
1148
1149                                         index += 1;
1150                                 }
1151                         }
1152                         break;
1153                 case Context::DRAW_INDEXEDLINESTRIP16:
1154                         {
1155                                 const unsigned short *index = (const unsigned short*)indices + start;
1156
1157                                 for(unsigned int i = 0; i < count; i++)
1158                                 {
1159                                         batch[i][0] = index[0];
1160                                         batch[i][1] = index[1];
1161                                         batch[i][2] = index[1];
1162
1163                                         index += 1;
1164                                 }
1165                         }
1166                         break;
1167                 case Context::DRAW_INDEXEDLINESTRIP32:
1168                         {
1169                                 const unsigned int *index = (const unsigned int*)indices + start;
1170
1171                                 for(unsigned int i = 0; i < count; i++)
1172                                 {
1173                                         batch[i][0] = index[0];
1174                                         batch[i][1] = index[1];
1175                                         batch[i][2] = index[1];
1176
1177                                         index += 1;
1178                                 }
1179                         }
1180                         break;
1181                 case Context::DRAW_INDEXEDLINELOOP8:
1182                         {
1183                                 const unsigned char *index = (const unsigned char*)indices;
1184
1185                                 for(unsigned int i = 0; i < count; i++)
1186                                 {
1187                                         batch[i][0] = index[(start + i + 0) % loop];
1188                                         batch[i][1] = index[(start + i + 1) % loop];
1189                                         batch[i][2] = index[(start + i + 1) % loop];
1190                                 }
1191                         }
1192                         break;
1193                 case Context::DRAW_INDEXEDLINELOOP16:
1194                         {
1195                                 const unsigned short *index = (const unsigned short*)indices;
1196
1197                                 for(unsigned int i = 0; i < count; i++)
1198                                 {
1199                                         batch[i][0] = index[(start + i + 0) % loop];
1200                                         batch[i][1] = index[(start + i + 1) % loop];
1201                                         batch[i][2] = index[(start + i + 1) % loop];
1202                                 }
1203                         }
1204                         break;
1205                 case Context::DRAW_INDEXEDLINELOOP32:
1206                         {
1207                                 const unsigned int *index = (const unsigned int*)indices;
1208
1209                                 for(unsigned int i = 0; i < count; i++)
1210                                 {
1211                                         batch[i][0] = index[(start + i + 0) % loop];
1212                                         batch[i][1] = index[(start + i + 1) % loop];
1213                                         batch[i][2] = index[(start + i + 1) % loop];
1214                                 }
1215                         }
1216                         break;
1217                 case Context::DRAW_INDEXEDTRIANGLELIST8:
1218                         {
1219                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1220
1221                                 for(unsigned int i = 0; i < count; i++)
1222                                 {
1223                                         batch[i][0] = index[0];
1224                                         batch[i][1] = index[1];
1225                                         batch[i][2] = index[2];
1226
1227                                         index += 3;
1228                                 }
1229                         }
1230                         break;
1231                 case Context::DRAW_INDEXEDTRIANGLELIST16:
1232                         {
1233                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1234
1235                                 for(unsigned int i = 0; i < count; i++)
1236                                 {
1237                                         batch[i][0] = index[0];
1238                                         batch[i][1] = index[1];
1239                                         batch[i][2] = index[2];
1240
1241                                         index += 3;
1242                                 }
1243                         }
1244                         break;
1245                 case Context::DRAW_INDEXEDTRIANGLELIST32:
1246                         {
1247                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1248
1249                                 for(unsigned int i = 0; i < count; i++)
1250                                 {
1251                                         batch[i][0] = index[0];
1252                                         batch[i][1] = index[1];
1253                                         batch[i][2] = index[2];
1254
1255                                         index += 3;
1256                                 }
1257                         }
1258                         break;
1259                 case Context::DRAW_INDEXEDTRIANGLESTRIP8:
1260                         {
1261                                 const unsigned char *index = (const unsigned char*)indices + start;
1262
1263                                 for(unsigned int i = 0; i < count; i++)
1264                                 {
1265                                         batch[i][0] = index[0];
1266                                         batch[i][1] = index[((start + i) & 1) + 1];
1267                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1268
1269                                         index += 1;
1270                                 }
1271                         }
1272                         break;
1273                 case Context::DRAW_INDEXEDTRIANGLESTRIP16:
1274                         {
1275                                 const unsigned short *index = (const unsigned short*)indices + start;
1276
1277                                 for(unsigned int i = 0; i < count; i++)
1278                                 {
1279                                         batch[i][0] = index[0];
1280                                         batch[i][1] = index[((start + i) & 1) + 1];
1281                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1282
1283                                         index += 1;
1284                                 }
1285                         }
1286                         break;
1287                 case Context::DRAW_INDEXEDTRIANGLESTRIP32:
1288                         {
1289                                 const unsigned int *index = (const unsigned int*)indices + start;
1290
1291                                 for(unsigned int i = 0; i < count; i++)
1292                                 {
1293                                         batch[i][0] = index[0];
1294                                         batch[i][1] = index[((start + i) & 1) + 1];
1295                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1296
1297                                         index += 1;
1298                                 }
1299                         }
1300                         break;
1301                 case Context::DRAW_INDEXEDTRIANGLEFAN8:
1302                         {
1303                                 const unsigned char *index = (const unsigned char*)indices;
1304
1305                                 for(unsigned int i = 0; i < count; i++)
1306                                 {
1307                                         batch[i][0] = index[start + i + 1];
1308                                         batch[i][1] = index[start + i + 2];
1309                                         batch[i][2] = index[0];
1310                                 }
1311                         }
1312                         break;
1313                 case Context::DRAW_INDEXEDTRIANGLEFAN16:
1314                         {
1315                                 const unsigned short *index = (const unsigned short*)indices;
1316
1317                                 for(unsigned int i = 0; i < count; i++)
1318                                 {
1319                                         batch[i][0] = index[start + i + 1];
1320                                         batch[i][1] = index[start + i + 2];
1321                                         batch[i][2] = index[0];
1322                                 }
1323                         }
1324                         break;
1325                 case Context::DRAW_INDEXEDTRIANGLEFAN32:
1326                         {
1327                                 const unsigned int *index = (const unsigned int*)indices;
1328
1329                                 for(unsigned int i = 0; i < count; i++)
1330                                 {
1331                                         batch[i][0] = index[start + i + 1];
1332                                         batch[i][1] = index[start + i + 2];
1333                                         batch[i][2] = index[0];
1334                                 }
1335                         }
1336                         break;
1337                 default:
1338                         ASSERT(false);
1339                 }
1340
1341                 task->count = count * 3;
1342                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1343         }
1344
1345         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1346         {
1347                 Triangle *triangle = renderer->triangleBatch[unit];
1348                 Primitive *primitive = renderer->primitiveBatch[unit];
1349
1350                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1351                 SetupProcessor::State &state = draw.setupState;
1352                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1353
1354                 int ms = state.multiSample;
1355                 int pos = state.positionRegister;
1356                 const DrawData *data = draw.data;
1357                 int visible = 0;
1358
1359                 for(int i = 0; i < count; i++, triangle++)
1360                 {
1361                         Vertex &v0 = triangle->v0;
1362                         Vertex &v1 = triangle->v1;
1363                         Vertex &v2 = triangle->v2;
1364
1365                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1366                         {
1367                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1368
1369                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1370
1371                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1372                                 {
1373                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1374                                         {
1375                                                 continue;
1376                                         }
1377                                 }
1378
1379                                 if(setupRoutine(primitive, triangle, &polygon, data))
1380                                 {
1381                                         primitive += ms;
1382                                         visible++;
1383                                 }
1384                         }
1385                 }
1386
1387                 return visible;
1388         }
1389
1390         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1391         {
1392                 Triangle *triangle = renderer->triangleBatch[unit];
1393                 Primitive *primitive = renderer->primitiveBatch[unit];
1394                 int visible = 0;
1395
1396                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1397                 SetupProcessor::State &state = draw.setupState;
1398                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1399
1400                 const Vertex &v0 = triangle[0].v0;
1401                 const Vertex &v1 = triangle[0].v1;
1402                 const Vertex &v2 = triangle[0].v2;
1403
1404                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1405
1406                 if(state.cullMode == Context::CULL_CLOCKWISE)
1407                 {
1408                         if(d >= 0) return 0;
1409                 }
1410                 else if(state.cullMode == Context::CULL_COUNTERCLOCKWISE)
1411                 {
1412                         if(d <= 0) return 0;
1413                 }
1414
1415                 // Copy attributes
1416                 triangle[1].v0 = v1;
1417                 triangle[1].v1 = v2;
1418                 triangle[2].v0 = v2;
1419                 triangle[2].v1 = v0;
1420
1421                 if(state.color[0][0].flat)   // FIXME
1422                 {
1423                         for(int i = 0; i < 2; i++)
1424                         {
1425                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1426                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1427                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1428                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1429                         }
1430                 }
1431
1432                 for(int i = 0; i < 3; i++)
1433                 {
1434                         if(setupLine(renderer, *primitive, *triangle, draw))
1435                         {
1436                                 primitive->area = 0.5f * d;
1437
1438                                 primitive++;
1439                                 visible++;
1440                         }
1441
1442                         triangle++;
1443                 }
1444
1445                 return visible;
1446         }
1447         
1448         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1449         {
1450                 Triangle *triangle = renderer->triangleBatch[unit];
1451                 Primitive *primitive = renderer->primitiveBatch[unit];
1452                 int visible = 0;
1453
1454                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1455                 SetupProcessor::State &state = draw.setupState;
1456
1457                 const Vertex &v0 = triangle[0].v0;
1458                 const Vertex &v1 = triangle[0].v1;
1459                 const Vertex &v2 = triangle[0].v2;
1460
1461                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1462
1463                 if(state.cullMode == Context::CULL_CLOCKWISE)
1464                 {
1465                         if(d >= 0) return 0;
1466                 }
1467                 else if(state.cullMode == Context::CULL_COUNTERCLOCKWISE)
1468                 {
1469                         if(d <= 0) return 0;
1470                 }
1471
1472                 // Copy attributes
1473                 triangle[1].v0 = v1;
1474                 triangle[2].v0 = v2;
1475
1476                 for(int i = 0; i < 3; i++)
1477                 {
1478                         if(setupPoint(renderer, *primitive, *triangle, draw))
1479                         {
1480                                 primitive->area = 0.5f * d;
1481
1482                                 primitive++;
1483                                 visible++;
1484                         }
1485
1486                         triangle++;
1487                 }
1488
1489                 return visible;
1490         }
1491
1492         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1493         {
1494                 Triangle *triangle = renderer->triangleBatch[unit];
1495                 Primitive *primitive = renderer->primitiveBatch[unit];
1496                 int visible = 0;
1497
1498                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1499                 SetupProcessor::State &state = draw.setupState;
1500
1501                 int ms = state.multiSample;
1502
1503                 for(int i = 0; i < count; i++)
1504                 {
1505                         if(setupLine(renderer, *primitive, *triangle, draw))
1506                         {
1507                                 primitive += ms;
1508                                 visible++;
1509                         }
1510
1511                         triangle++;
1512                 }
1513
1514                 return visible;
1515         }
1516
1517         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1518         {
1519                 Triangle *triangle = renderer->triangleBatch[unit];
1520                 Primitive *primitive = renderer->primitiveBatch[unit];
1521                 int visible = 0;
1522
1523                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1524                 SetupProcessor::State &state = draw.setupState;
1525
1526                 int ms = state.multiSample;
1527
1528                 for(int i = 0; i < count; i++)
1529                 {
1530                         if(setupPoint(renderer, *primitive, *triangle, draw))
1531                         {
1532                                 primitive += ms;
1533                                 visible++;
1534                         }
1535
1536                         triangle++;
1537                 }
1538
1539                 return visible;
1540         }
1541
1542         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1543         {
1544                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1545                 const SetupProcessor::State &state = draw.setupState;
1546                 const DrawData &data = *draw.data;
1547
1548                 Vertex &v0 = triangle.v0;
1549                 Vertex &v1 = triangle.v1;
1550
1551                 int pos = state.positionRegister;
1552
1553                 const float4 &P0 = v0.v[pos];
1554                 const float4 &P1 = v1.v[pos];
1555
1556                 if(P0.w <= 0 && P1.w <= 0)
1557                 {
1558                         return false;
1559                 }
1560
1561                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1562                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1563
1564                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1565                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1566
1567                 if(dx == 0 && dy == 0)
1568                 {
1569                         return false;
1570                 }
1571
1572                 if(false)   // Rectangle
1573                 {
1574                         float4 P[4];
1575                         int C[4];
1576
1577                         P[0] = P0;
1578                         P[1] = P1;
1579                         P[2] = P1;
1580                         P[3] = P0;
1581
1582                         float scale = 0.5f / sqrt(dx*dx + dy*dy);
1583
1584                         dx *= scale;
1585                         dy *= scale;
1586
1587                         float dx0w = dx * P0.w / W;
1588                         float dy0h = dy * P0.w / H;
1589                         float dx0h = dx * P0.w / H;
1590                         float dy0w = dy * P0.w / W;
1591
1592                         float dx1w = dx * P1.w / W;
1593                         float dy1h = dy * P1.w / H;
1594                         float dx1h = dx * P1.w / H;
1595                         float dy1w = dy * P1.w / W;
1596
1597                         P[0].x += -dy0w + -dx0w;
1598                         P[0].y += -dx0h + +dy0h;
1599                         C[0] = computeClipFlags(P[0], data);
1600
1601                         P[1].x += -dy1w + +dx1w;
1602                         P[1].y += -dx1h + +dy1h;
1603                         C[1] = computeClipFlags(P[1], data);
1604
1605                         P[2].x += +dy1w + +dx1w;
1606                         P[2].y += +dx1h + -dy1h;
1607                         C[2] = computeClipFlags(P[2], data);
1608
1609                         P[3].x += +dy0w + -dx0w;
1610                         P[3].y += +dx0h + +dy0h;
1611                         C[3] = computeClipFlags(P[3], data);
1612
1613                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1614                         {
1615                                 Polygon polygon(P, 4);
1616
1617                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1618
1619                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1620                                 {
1621                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1622                                         {
1623                                                 return false;
1624                                         }
1625                                 }
1626
1627                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1628                         }
1629                 }
1630                 else   // Diamond test convention
1631                 {
1632                         float4 P[8];
1633                         int C[8];
1634
1635                         P[0] = P0;
1636                         P[1] = P0;
1637                         P[2] = P0;
1638                         P[3] = P0;
1639                         P[4] = P1;
1640                         P[5] = P1;
1641                         P[6] = P1;
1642                         P[7] = P1;
1643
1644                         float dx0 = 0.5f * P0.w / W;
1645                         float dy0 = 0.5f * P0.w / H;
1646
1647                         float dx1 = 0.5f * P1.w / W;
1648                         float dy1 = 0.5f * P1.w / H;
1649
1650                         P[0].x += -dx0;
1651                         C[0] = computeClipFlags(P[0], data);
1652
1653                         P[1].y += +dy0;
1654                         C[1] = computeClipFlags(P[1], data);
1655
1656                         P[2].x += +dx0;
1657                         C[2] = computeClipFlags(P[2], data);
1658
1659                         P[3].y += -dy0;
1660                         C[3] = computeClipFlags(P[3], data);
1661
1662                         P[4].x += -dx1;
1663                         C[4] = computeClipFlags(P[4], data);
1664
1665                         P[5].y += +dy1;
1666                         C[5] = computeClipFlags(P[5], data);
1667
1668                         P[6].x += +dx1;
1669                         C[6] = computeClipFlags(P[6], data);
1670
1671                         P[7].y += -dy1;
1672                         C[7] = computeClipFlags(P[7], data);
1673
1674                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1675                         {
1676                                 float4 L[6];
1677
1678                                 if(dx > -dy)
1679                                 {
1680                                         if(dx > dy)   // Right
1681                                         {
1682                                                 L[0] = P[0];
1683                                                 L[1] = P[1];
1684                                                 L[2] = P[5];
1685                                                 L[3] = P[6];
1686                                                 L[4] = P[7];
1687                                                 L[5] = P[3];
1688                                         }
1689                                         else   // Down
1690                                         {
1691                                                 L[0] = P[0];
1692                                                 L[1] = P[4];
1693                                                 L[2] = P[5];
1694                                                 L[3] = P[6];
1695                                                 L[4] = P[2];
1696                                                 L[5] = P[3];
1697                                         }
1698                                 }
1699                                 else
1700                                 {
1701                                         if(dx > dy)   // Up
1702                                         {
1703                                                 L[0] = P[0];
1704                                                 L[1] = P[1];
1705                                                 L[2] = P[2];
1706                                                 L[3] = P[6];
1707                                                 L[4] = P[7];
1708                                                 L[5] = P[4];
1709                                         }
1710                                         else   // Left
1711                                         {
1712                                                 L[0] = P[1];
1713                                                 L[1] = P[2];
1714                                                 L[2] = P[3];
1715                                                 L[3] = P[7];
1716                                                 L[4] = P[4];
1717                                                 L[5] = P[5];
1718                                         }
1719                                 }
1720
1721                                 Polygon polygon(L, 6);
1722
1723                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1724
1725                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1726                                 {
1727                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1728                                         {
1729                                                 return false;
1730                                         }
1731                                 }
1732
1733                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1734                         }
1735                 }
1736
1737                 return false;
1738         }
1739
1740         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1741         {
1742                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1743                 const SetupProcessor::State &state = draw.setupState;
1744                 const DrawData &data = *draw.data;
1745
1746                 Vertex &v = triangle.v0;
1747
1748                 float pSize;
1749
1750                 int pts = state.pointSizeRegister;
1751
1752                 if(state.pointSizeRegister != 0xF)
1753                 {
1754                         pSize = v.v[pts].y;
1755                 }
1756                 else
1757                 {
1758                         pSize = data.point.pointSize[0];
1759                 }
1760
1761                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1762
1763                 float4 P[4];
1764                 int C[4];
1765
1766                 int pos = state.positionRegister;
1767
1768                 P[0] = v.v[pos];
1769                 P[1] = v.v[pos];
1770                 P[2] = v.v[pos];
1771                 P[3] = v.v[pos];
1772
1773                 const float X = pSize * P[0].w * data.halfPixelX[0];
1774                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1775
1776                 P[0].x -= X;
1777                 P[0].y += Y;
1778                 C[0] = computeClipFlags(P[0], data);
1779
1780                 P[1].x += X;
1781                 P[1].y += Y;
1782                 C[1] = computeClipFlags(P[1], data);
1783
1784                 P[2].x += X;
1785                 P[2].y -= Y;
1786                 C[2] = computeClipFlags(P[2], data);
1787
1788                 P[3].x -= X;
1789                 P[3].y -= Y;
1790                 C[3] = computeClipFlags(P[3], data);
1791
1792                 triangle.v1 = triangle.v0;
1793                 triangle.v2 = triangle.v0;
1794
1795                 triangle.v1.X += iround(16 * 0.5f * pSize);
1796                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1797
1798                 Polygon polygon(P, 4);
1799
1800                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1801                 {
1802                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1803
1804                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1805                         {
1806                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1807                                 {
1808                                         return false;
1809                                 }
1810                         }
1811                         
1812                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1813                 }
1814
1815                 return false;
1816         }
1817
1818         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1819         {
1820                 float clX = v.x + data.halfPixelX[0] * v.w;
1821                 float clY = v.y + data.halfPixelY[0] * v.w;
1822
1823                 return ((clX > v.w)  << 0) |
1824                            ((clY > v.w)  << 1) |
1825                            ((v.z > v.w)  << 2) |
1826                            ((clX < -v.w) << 3) |
1827                        ((clY < -v.w) << 4) |
1828                            ((v.z < 0)    << 5) |
1829                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1830         }
1831
1832         void Renderer::initializeThreads()
1833         {
1834                 unitCount = ceilPow2(threadCount);
1835                 clusterCount = ceilPow2(threadCount);
1836
1837                 for(int i = 0; i < unitCount; i++)
1838                 {
1839                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1840                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1841                 }
1842
1843                 for(int i = 0; i < threadCount; i++)
1844                 {
1845                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1846                         vertexTask[i]->vertexCache.drawCall = -1;
1847
1848                         task[i].type = Task::SUSPEND;
1849
1850                         resume[i] = new Event();
1851                         suspend[i] = new Event();
1852
1853                         Parameters parameters;
1854                         parameters.threadIndex = i;
1855                         parameters.renderer = this;
1856
1857                         exitThreads = false;
1858                         worker[i] = new Thread(threadFunction, &parameters);
1859
1860                         suspend[i]->wait();
1861                         suspend[i]->signal();
1862                 }
1863         }
1864
1865         void Renderer::terminateThreads()
1866         {
1867                 while(threadsAwake != 0)
1868                 {
1869                         Thread::sleep(1);
1870                 }
1871
1872                 for(int thread = 0; thread < threadCount; thread++)
1873                 {
1874                         if(worker[thread])
1875                         {
1876                                 exitThreads = true;
1877                                 resume[thread]->signal();
1878                                 worker[thread]->join();
1879                                 
1880                                 delete worker[thread];
1881                                 worker[thread] = 0;
1882                                 delete resume[thread];
1883                                 resume[thread] = 0;
1884                                 delete suspend[thread];
1885                                 suspend[thread] = 0;
1886                         }
1887                 
1888                         deallocate(vertexTask[thread]);
1889                         vertexTask[thread] = 0;
1890                 }
1891
1892                 for(int i = 0; i < 16; i++)
1893                 {
1894                         deallocate(triangleBatch[i]);
1895                         triangleBatch[i] = 0;
1896
1897                         deallocate(primitiveBatch[i]);
1898                         primitiveBatch[i] = 0;
1899                 }
1900         }
1901
1902         void Renderer::loadConstants(const VertexShader *vertexShader)
1903         {
1904                 if(!vertexShader) return;
1905
1906                 int count = vertexShader->getLength();
1907
1908                 for(int i = 0; i < count; i++)
1909                 {
1910                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1911
1912                         if(instruction->opcode == Shader::OPCODE_DEF)
1913                         {
1914                                 int index = instruction->dst.index;
1915                                 float value[4];
1916
1917                                 value[0] = instruction->src[0].value[0];
1918                                 value[1] = instruction->src[0].value[1];
1919                                 value[2] = instruction->src[0].value[2];
1920                                 value[3] = instruction->src[0].value[3];
1921
1922                                 setVertexShaderConstantF(index, value);
1923                         }
1924                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1925                         {
1926                                 int index = instruction->dst.index;
1927                                 int integer[4];
1928
1929                                 integer[0] = instruction->src[0].integer[0];
1930                                 integer[1] = instruction->src[0].integer[1];
1931                                 integer[2] = instruction->src[0].integer[2];
1932                                 integer[3] = instruction->src[0].integer[3];
1933
1934                                 setVertexShaderConstantI(index, integer);
1935                         }
1936                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1937                         {
1938                                 int index = instruction->dst.index;
1939                                 int boolean = instruction->src[0].boolean[0];
1940
1941                                 setVertexShaderConstantB(index, &boolean);
1942                         }
1943                 }
1944         }
1945
1946         void Renderer::loadConstants(const PixelShader *pixelShader)
1947         {
1948                 if(!pixelShader) return;
1949
1950                 int count = pixelShader->getLength();
1951
1952                 for(int i = 0; i < count; i++)
1953                 {
1954                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1955
1956                         if(instruction->opcode == Shader::OPCODE_DEF)
1957                         {
1958                                 int index = instruction->dst.index;
1959                                 float value[4];
1960
1961                                 value[0] = instruction->src[0].value[0];
1962                                 value[1] = instruction->src[0].value[1];
1963                                 value[2] = instruction->src[0].value[2];
1964                                 value[3] = instruction->src[0].value[3];
1965
1966                                 setPixelShaderConstantF(index, value);
1967                         }
1968                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1969                         {
1970                                 int index = instruction->dst.index;
1971                                 int integer[4];
1972
1973                                 integer[0] = instruction->src[0].integer[0];
1974                                 integer[1] = instruction->src[0].integer[1];
1975                                 integer[2] = instruction->src[0].integer[2];
1976                                 integer[3] = instruction->src[0].integer[3];
1977
1978                                 setPixelShaderConstantI(index, integer);
1979                         }
1980                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1981                         {
1982                                 int index = instruction->dst.index;
1983                                 int boolean = instruction->src[0].boolean[0];
1984
1985                                 setPixelShaderConstantB(index, &boolean);
1986                         }
1987                 }
1988         }
1989
1990         void Renderer::setIndexBuffer(Resource *indexBuffer)
1991         {
1992                 context->indexBuffer = indexBuffer;
1993         }
1994
1995         void Renderer::setMultiSampleMask(unsigned int mask)
1996         {
1997                 context->sampleMask = mask;
1998         }
1999
2000         void Renderer::setTransparencyAntialiasing(Context::TransparencyAntialiasing transparencyAntialiasing)
2001         {
2002                 sw::transparencyAntialiasing = transparencyAntialiasing;
2003         }
2004
2005         bool Renderer::isReadWriteTexture(int sampler)
2006         {
2007                 for(int index = 0; index < 4; index++)
2008                 {
2009                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2010                         {
2011                                 return true;
2012                         }
2013                 }
2014         
2015                 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2016                 {
2017                         return true;
2018                 }
2019
2020                 return false;
2021         }
2022         
2023         void Renderer::updateClipper()
2024         {
2025                 if(updateClipPlanes)
2026                 {
2027                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2028                         {
2029                                 const Matrix &scissorWorld = getViewTransform();
2030
2031                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2032                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2033                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2034                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2035                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2036                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2037                         }
2038                         else   // User plane in clip space
2039                         {
2040                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2041                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2042                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2043                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2044                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2045                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2046                         }
2047
2048                         updateClipPlanes = false;
2049                 }
2050         }
2051
2052         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2053         {
2054                 ASSERT(sampler < (16 + 4));
2055
2056                 context->texture[sampler] = resource;
2057         }
2058
2059         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2060         {
2061                 ASSERT(sampler < (16 + 4) && face < 6 && level < MIPMAP_LEVELS);
2062                 
2063                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2064         }
2065
2066         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2067         {
2068                 if(type == SAMPLER_PIXEL)
2069                 {
2070                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2071                 }
2072                 else
2073                 {
2074                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2075                 }
2076         }
2077
2078         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2079         {
2080                 if(type == SAMPLER_PIXEL)
2081                 {
2082                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2083                 }
2084                 else
2085                 {
2086                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2087                 }
2088         }
2089
2090         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2091         {
2092                 if(type == SAMPLER_PIXEL)
2093                 {
2094                         PixelProcessor::setGatherEnable(sampler, enable);
2095                 }
2096                 else
2097                 {
2098                         VertexProcessor::setGatherEnable(sampler, enable);
2099                 }
2100         }
2101
2102         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2103         {
2104                 if(type == SAMPLER_PIXEL)
2105                 {
2106                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2107                 }
2108                 else
2109                 {
2110                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2111                 }
2112         }
2113
2114         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2115         {
2116                 if(type == SAMPLER_PIXEL)
2117                 {
2118                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2119                 }
2120                 else
2121                 {
2122                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2123                 }
2124         }
2125
2126         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2127         {
2128                 if(type == SAMPLER_PIXEL)
2129                 {
2130                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2131                 }
2132                 else
2133                 {
2134                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2135                 }
2136         }
2137
2138         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2139         {
2140                 if(type == SAMPLER_PIXEL)
2141                 {
2142                         PixelProcessor::setReadSRGB(sampler, sRGB);
2143                 }
2144                 else
2145                 {
2146                         VertexProcessor::setReadSRGB(sampler, sRGB);
2147                 }
2148         }
2149
2150         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2151         {
2152                 if(type == SAMPLER_PIXEL)
2153                 {
2154                         PixelProcessor::setMipmapLOD(sampler, bias);
2155                 }
2156                 else
2157                 {
2158                         VertexProcessor::setMipmapLOD(sampler, bias);
2159                 }
2160         }
2161
2162         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2163         {
2164                 if(type == SAMPLER_PIXEL)
2165                 {
2166                         PixelProcessor::setBorderColor(sampler, borderColor);
2167                 }
2168                 else
2169                 {
2170                         VertexProcessor::setBorderColor(sampler, borderColor);
2171                 }
2172         }
2173
2174         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, unsigned int maxAnisotropy)
2175         {
2176                 if(type == SAMPLER_PIXEL)
2177                 {
2178                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2179                 }
2180                 else
2181                 {
2182                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2183                 }
2184         }
2185
2186         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2187         {
2188                 context->setPointSpriteEnable(pointSpriteEnable);
2189         }
2190
2191         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2192         {
2193                 context->setPointScaleEnable(pointScaleEnable);
2194         }
2195
2196         void Renderer::setDepthBias(float bias)
2197         {
2198                 depthBias = bias;
2199         }
2200
2201         void Renderer::setSlopeDepthBias(float slopeBias)
2202         {
2203                 slopeDepthBias = slopeBias;
2204         }
2205
2206         void Renderer::setPixelShader(const PixelShader *shader)
2207         {
2208                 context->pixelShader = shader;
2209
2210                 loadConstants(shader);
2211         }
2212
2213         void Renderer::setVertexShader(const VertexShader *shader)
2214         {
2215                 context->vertexShader = shader;
2216
2217                 loadConstants(shader);
2218         }
2219
2220         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2221         {
2222                 for(int i = 0; i < DRAW_COUNT; i++)
2223                 {
2224                         if(drawCall[i]->psDirtyConstF < index + count)
2225                         {
2226                                 drawCall[i]->psDirtyConstF = index + count;
2227                         }
2228                 }
2229
2230                 for(int i = 0; i < count; i++)
2231                 {
2232                         PixelProcessor::setFloatConstant(index + i, value);
2233                         value += 4;
2234                 }
2235         }
2236
2237         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2238         {
2239                 for(int i = 0; i < DRAW_COUNT; i++)
2240                 {
2241                         if(drawCall[i]->psDirtyConstI < index + count)
2242                         {
2243                                 drawCall[i]->psDirtyConstI = index + count;
2244                         }
2245                 }
2246
2247                 for(int i = 0; i < count; i++)
2248                 {
2249                         PixelProcessor::setIntegerConstant(index + i, value);
2250                         value += 4;
2251                 }
2252         }
2253
2254         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2255         {
2256                 for(int i = 0; i < DRAW_COUNT; i++)
2257                 {
2258                         if(drawCall[i]->psDirtyConstB < index + count)
2259                         {
2260                                 drawCall[i]->psDirtyConstB = index + count;
2261                         }
2262                 }
2263
2264                 for(int i = 0; i < count; i++)
2265                 {
2266                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2267                         boolean++;
2268                 }
2269         }
2270
2271         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2272         {
2273                 for(int i = 0; i < DRAW_COUNT; i++)
2274                 {
2275                         if(drawCall[i]->vsDirtyConstF < index + count)
2276                         {
2277                                 drawCall[i]->vsDirtyConstF = index + count;
2278                         }
2279                 }
2280
2281                 for(int i = 0; i < count; i++)
2282                 {
2283                         VertexProcessor::setFloatConstant(index + i, value);
2284                         value += 4;
2285                 }
2286         }
2287
2288         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2289         {
2290                 for(int i = 0; i < DRAW_COUNT; i++)
2291                 {
2292                         if(drawCall[i]->vsDirtyConstI < index + count)
2293                         {
2294                                 drawCall[i]->vsDirtyConstI = index + count;
2295                         }
2296                 }
2297
2298                 for(int i = 0; i < count; i++)
2299                 {
2300                         VertexProcessor::setIntegerConstant(index + i, value);
2301                         value += 4;
2302                 }
2303         }
2304
2305         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2306         {
2307                 for(int i = 0; i < DRAW_COUNT; i++)
2308                 {
2309                         if(drawCall[i]->vsDirtyConstB < index + count)
2310                         {
2311                                 drawCall[i]->vsDirtyConstB = index + count;
2312                         }
2313                 }
2314
2315                 for(int i = 0; i < count; i++)
2316                 {
2317                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2318                         boolean++;
2319                 }
2320         }
2321
2322         void Renderer::setModelMatrix(const Matrix &M, int i)
2323         {
2324                 VertexProcessor::setModelMatrix(M, i);
2325         }
2326
2327         void Renderer::setViewMatrix(const Matrix &V)
2328         {
2329                 VertexProcessor::setViewMatrix(V);
2330                 updateClipPlanes = true;
2331         }
2332
2333         void Renderer::setBaseMatrix(const Matrix &B)
2334         {
2335                 VertexProcessor::setBaseMatrix(B);
2336                 updateClipPlanes = true;
2337         }
2338
2339         void Renderer::setProjectionMatrix(const Matrix &P)
2340         {
2341                 VertexProcessor::setProjectionMatrix(P);
2342                 updateClipPlanes = true;
2343         }
2344
2345         void Renderer::addQuery(Query *query)
2346         {
2347                 queries.push_back(query);
2348         }
2349         
2350         void Renderer::removeQuery(Query *query)
2351         {
2352                 queries.remove(query);
2353         }
2354
2355         #if PERF_HUD
2356                 int Renderer::getThreadCount()
2357                 {
2358                         return threadCount;
2359                 }
2360                 
2361                 int64_t Renderer::getVertexTime(int thread)
2362                 {
2363                         return vertexTime[thread];
2364                 }
2365
2366                 int64_t Renderer::getSetupTime(int thread)
2367                 {
2368                         return setupTime[thread];
2369                 }
2370                         
2371                 int64_t Renderer::getPixelTime(int thread)
2372                 {
2373                         return pixelTime[thread];
2374                 }
2375
2376                 void Renderer::resetTimers()
2377                 {
2378                         for(int thread = 0; thread < threadCount; thread++)
2379                         {
2380                                 vertexTime[thread] = 0;
2381                                 setupTime[thread] = 0;
2382                                 pixelTime[thread] = 0;
2383                         }
2384                 }
2385         #endif
2386
2387         void Renderer::setViewport(const Viewport &viewport)
2388         {
2389                 this->viewport = viewport;
2390         }
2391
2392         void Renderer::setScissor(const Rect &scissor)
2393         {
2394                 this->scissor = scissor;
2395         }
2396
2397         void Renderer::setClipFlags(int flags)
2398         {
2399                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2400         }
2401
2402         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2403         {
2404                 if(index < 6)
2405                 {
2406                         userPlane[index] = plane;
2407                 }
2408                 else ASSERT(false);
2409
2410                 updateClipPlanes = true;
2411         }
2412
2413         void Renderer::updateConfiguration(bool initialUpdate)
2414         {
2415                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2416
2417                 if(newConfiguration || initialUpdate)
2418                 {
2419                         terminateThreads();
2420
2421                         SwiftConfig::Configuration configuration = {0};
2422                         swiftConfig->getConfiguration(configuration);
2423
2424                         precacheVertex = !newConfiguration && configuration.precache;
2425                         precacheSetup = !newConfiguration && configuration.precache;
2426                         precachePixel = !newConfiguration && configuration.precache;
2427
2428                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2429                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2430                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2431
2432                         switch(configuration.textureSampleQuality)
2433                         {
2434                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2435                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2436                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2437                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2438                         }
2439
2440                         switch(configuration.mipmapQuality)
2441                         {
2442                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2443                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2444                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2445                         }
2446
2447                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2448
2449                         switch(configuration.transcendentalPrecision)
2450                         {
2451                         case 0:
2452                                 logPrecision = APPROXIMATE;
2453                                 expPrecision = APPROXIMATE;
2454                                 rcpPrecision = APPROXIMATE;
2455                                 rsqPrecision = APPROXIMATE;
2456                                 break;
2457                         case 1:
2458                                 logPrecision = PARTIAL;
2459                                 expPrecision = PARTIAL;
2460                                 rcpPrecision = PARTIAL;
2461                                 rsqPrecision = PARTIAL;
2462                                 break;
2463                         case 2:
2464                                 logPrecision = ACCURATE;
2465                                 expPrecision = ACCURATE;
2466                                 rcpPrecision = ACCURATE;
2467                                 rsqPrecision = ACCURATE;
2468                                 break;
2469                         case 3:
2470                                 logPrecision = WHQL;
2471                                 expPrecision = WHQL;
2472                                 rcpPrecision = WHQL;
2473                                 rsqPrecision = WHQL;
2474                                 break;
2475                         case 4:
2476                                 logPrecision = IEEE;
2477                                 expPrecision = IEEE;
2478                                 rcpPrecision = IEEE;
2479                                 rsqPrecision = IEEE;
2480                                 break;
2481                         default:
2482                                 logPrecision = ACCURATE;
2483                                 expPrecision = ACCURATE;
2484                                 rcpPrecision = ACCURATE;
2485                                 rsqPrecision = ACCURATE;
2486                                 break;
2487                         }
2488
2489                         switch(configuration.transparencyAntialiasing)
2490                         {
2491                         case 0:  transparencyAntialiasing = Context::TRANSPARENCY_NONE;              break;
2492                         case 1:  transparencyAntialiasing = Context::TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2493                         default: transparencyAntialiasing = Context::TRANSPARENCY_NONE;              break;
2494                         }
2495
2496                         switch(configuration.threadCount)
2497                         {
2498                         case -1: threadCount = CPUID::coreCount();        break;
2499                         case 0:  threadCount = CPUID::processAffinity();  break;
2500                         default: threadCount = configuration.threadCount; break;
2501                         }
2502
2503                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2504                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2505                         CPUID::setEnableSSE3(configuration.enableSSE3);
2506                         CPUID::setEnableSSE2(configuration.enableSSE2);
2507                         CPUID::setEnableSSE(configuration.enableSSE);
2508
2509                         for(int pass = 0; pass < 10; pass++)
2510                         {
2511                                 optimization[pass] = configuration.optimization[pass];
2512                         }
2513
2514                         forceWindowed = configuration.forceWindowed;
2515                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2516                         postBlendSRGB = configuration.postBlendSRGB;
2517                         exactColorRounding = configuration.exactColorRounding;
2518                         forceClearRegisters = configuration.forceClearRegisters;
2519
2520                 #ifndef NDEBUG
2521                         minPrimitives = configuration.minPrimitives;
2522                         maxPrimitives = configuration.maxPrimitives;
2523                 #endif
2524                 }
2525
2526                 if(!initialUpdate && !worker[0])
2527                 {
2528                         initializeThreads();
2529                 }
2530         }
2531 }