OSDN Git Service

Add a critical section for dynamic code generation.
[android-x86/external-swiftshader.git] / src / Renderer / Renderer.cpp
1 // SwiftShader Software Renderer
2 //
3 // Copyright(c) 2005-2012 TransGaming Inc.
4 //
5 // All rights reserved. No part of this software may be copied, distributed, transmitted,
6 // transcribed, stored in a retrieval system, translated into any human or computer
7 // language by any means, or disclosed to third parties without the explicit written
8 // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9 // or implied, including but not limited to any patent rights, are granted to you.
10 //
11
12 #include "Renderer.hpp"
13
14 #include "Clipper.hpp"
15 #include "Math.hpp"
16 #include "FrameBuffer.hpp"
17 #include "Timer.hpp"
18 #include "Surface.hpp"
19 #include "Half.hpp"
20 #include "Primitive.hpp"
21 #include "Polygon.hpp"
22 #include "SwiftConfig.hpp"
23 #include "MutexLock.hpp"
24 #include "CPUID.hpp"
25 #include "Memory.hpp"
26 #include "Resource.hpp"
27 #include "Constants.hpp"
28 #include "Debug.hpp"
29 #include "Reactor/Reactor.hpp"
30
31 #include <malloc.h>
32 #include <assert.h>
33
34 #undef max
35
36 bool disableServer = true;
37
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42
43 namespace sw
44 {
45         extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46         extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47         extern bool booleanFaceRegister;
48         extern bool fullPixelPositionRegister;
49
50         extern bool forceWindowed;
51         extern bool complementaryDepthBuffer;
52         extern bool postBlendSRGB;
53         extern bool exactColorRounding;
54         extern TransparencyAntialiasing transparencyAntialiasing;
55         extern bool forceClearRegisters;
56
57         extern bool precacheVertex;
58         extern bool precacheSetup;
59         extern bool precachePixel;
60
61         int batchSize = 128;
62         int threadCount = 1;
63         int unitCount = 1;
64         int clusterCount = 1;
65
66         TranscendentalPrecision logPrecision = ACCURATE;
67         TranscendentalPrecision expPrecision = ACCURATE;
68         TranscendentalPrecision rcpPrecision = ACCURATE;
69         TranscendentalPrecision rsqPrecision = ACCURATE;
70         bool perspectiveCorrection = true;
71
72         BackoffLock Renderer::codegenMutex;
73
74         struct Parameters
75         {
76                 Renderer *renderer;
77                 int threadIndex;
78         };
79
80         DrawCall::DrawCall()
81         {
82                 queries = 0;
83
84                 vsDirtyConstF = 256 + 1;
85                 vsDirtyConstI = 16;
86                 vsDirtyConstB = 16;
87
88                 psDirtyConstF = 224;
89                 psDirtyConstI = 16;
90                 psDirtyConstB = 16;
91
92                 references = -1;
93
94                 data = (DrawData*)allocate(sizeof(DrawData));
95                 data->constants = &constants;
96         }
97
98         DrawCall::~DrawCall()
99         {
100                 delete queries;
101
102                 deallocate(data);
103         }
104
105         Renderer::Renderer(Context *context, bool halfIntegerCoordinates, bool symmetricNormalizedDepth, bool booleanFaceRegister, bool fullPixelPositionRegister, bool exactColorRounding) : context(context), VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), viewport()
106         {
107                 sw::halfIntegerCoordinates = halfIntegerCoordinates;
108                 sw::symmetricNormalizedDepth = symmetricNormalizedDepth;
109                 sw::booleanFaceRegister = booleanFaceRegister;
110                 sw::fullPixelPositionRegister = fullPixelPositionRegister;
111                 sw::exactColorRounding = exactColorRounding;
112
113                 setRenderTarget(0, 0);
114                 clipper = new Clipper();
115
116                 updateViewMatrix = true;
117                 updateBaseMatrix = true;
118                 updateProjectionMatrix = true;
119                 updateClipPlanes = true;
120
121                 #if PERF_HUD
122                         resetTimers();
123                 #endif
124
125                 for(int i = 0; i < 16; i++)
126                 {
127                         vertexTask[i] = 0;
128
129                         worker[i] = 0;
130                         resume[i] = 0;
131                         suspend[i] = 0;
132                 }
133
134                 threadsAwake = 0;
135                 resumeApp = new Event();
136
137                 currentDraw = 0;
138                 nextDraw = 0;
139
140                 qHead = 0;
141                 qSize = 0;
142
143                 for(int i = 0; i < 16; i++)
144                 {
145                         triangleBatch[i] = 0;
146                         primitiveBatch[i] = 0;
147                 }
148
149                 for(int draw = 0; draw < DRAW_COUNT; draw++)
150                 {
151                         drawCall[draw] = new DrawCall();
152                         drawList[draw] = drawCall[draw];
153                 }
154
155                 for(int unit = 0; unit < 16; unit++)
156                 {
157                         primitiveProgress[unit].init();
158                 }
159
160                 for(int cluster = 0; cluster < 16; cluster++)
161                 {
162                         pixelProgress[cluster].init();
163                 }
164
165                 clipFlags = 0;
166
167                 swiftConfig = new SwiftConfig(disableServer);
168                 updateConfiguration(true);
169
170                 sync = new Resource(0);
171         }
172
173         Renderer::~Renderer()
174         {
175                 sync->destruct();
176
177                 delete clipper;
178                 clipper = 0;
179
180                 terminateThreads();
181                 delete resumeApp;
182
183                 for(int draw = 0; draw < DRAW_COUNT; draw++)
184                 {
185                         delete drawCall[draw];
186                 }
187
188                 delete swiftConfig;
189         }
190
191         void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter)
192         {
193                 blitter.blit(source, sRect, dest, dRect, filter);
194         }
195
196         void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
197         {
198                 #ifndef NDEBUG
199                         if(count < minPrimitives || count > maxPrimitives)
200                         {
201                                 return;
202                         }
203                 #endif
204
205                 context->drawType = drawType;
206
207                 updateConfiguration();
208                 updateClipper();
209
210                 int ss = context->getSuperSampleCount();
211                 int ms = context->getMultiSampleCount();
212
213                 for(int q = 0; q < ss; q++)
214                 {
215                         int oldMultiSampleMask = context->multiSampleMask;
216                         context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
217
218                         if(!context->multiSampleMask)
219                         {
220                                 continue;
221                         }
222
223                         sync->lock(sw::PRIVATE);
224
225                         Routine *vertexRoutine;
226                         Routine *setupRoutine;
227                         Routine *pixelRoutine;
228
229                         if(update || oldMultiSampleMask != context->multiSampleMask)
230                         {
231                                 vertexState = VertexProcessor::update();
232                                 setupState = SetupProcessor::update();
233                                 pixelState = PixelProcessor::update();
234
235                                 codegenMutex.lock();
236
237                                 vertexRoutine = VertexProcessor::routine(vertexState);
238                                 setupRoutine = SetupProcessor::routine(setupState);
239                                 pixelRoutine = PixelProcessor::routine(pixelState);
240
241                                 codegenMutex.unlock();
242                         }
243
244                         int batch = batchSize / ms;
245
246                         int (*setupPrimitives)(Renderer *renderer, int batch, int count);
247
248                         if(context->isDrawTriangle())
249                         {
250                                 switch(context->fillMode)
251                                 {
252                                 case FILL_SOLID:
253                                         setupPrimitives = setupSolidTriangles;
254                                         break;
255                                 case FILL_WIREFRAME:
256                                         setupPrimitives = setupWireframeTriangle;
257                                         batch = 1;
258                                         break;
259                                 case FILL_VERTEX:
260                                         setupPrimitives = setupVertexTriangle;
261                                         batch = 1;
262                                         break;
263                                 default: ASSERT(false);
264                                 }
265                         }
266                         else if(context->isDrawLine())
267                         {
268                                 setupPrimitives = setupLines;
269                         }
270                         else   // Point draw
271                         {
272                                 setupPrimitives = setupPoints;
273                         }
274
275                         DrawCall *draw = 0;
276
277                         do
278                         {
279                                 for(int i = 0; i < DRAW_COUNT; i++)
280                                 {
281                                         if(drawCall[i]->references == -1)
282                                         {
283                                                 draw = drawCall[i];
284                                                 drawList[nextDraw % DRAW_COUNT] = draw;
285
286                                                 break;
287                                         }
288                                 }
289
290                                 if(!draw)
291                                 {
292                                         resumeApp->wait();
293                                 }
294                         }
295                         while(!draw);
296
297                         DrawData *data = draw->data;
298
299                         if(queries.size() != 0)
300                         {
301                                 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
302                                 {
303                                         atomicIncrement(&(*query)->reference);
304                                 }
305
306                                 draw->queries = new std::list<Query*>(queries);
307                         }
308
309                         draw->drawType = drawType;
310                         draw->batchSize = batch;
311
312                         vertexRoutine->bind();
313                         setupRoutine->bind();
314                         pixelRoutine->bind();
315
316                         draw->vertexRoutine = vertexRoutine;
317                         draw->setupRoutine = setupRoutine;
318                         draw->pixelRoutine = pixelRoutine;
319                         draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();;
320                         draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
321                         draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
322                         draw->setupPrimitives = setupPrimitives;
323                         draw->setupState = setupState;
324
325                         for(int i = 0; i < 16; i++)
326                         {
327                                 draw->vertexStream[i] = context->input[i].resource;
328                                 data->input[i] = context->input[i].buffer;
329                                 data->stride[i] = context->input[i].stride;
330
331                                 if(draw->vertexStream[i])
332                                 {
333                                         draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
334                                 }
335                         }
336
337                         if(context->indexBuffer)
338                         {
339                                 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
340                         }
341
342                         draw->indexBuffer = context->indexBuffer;
343
344                         for(int sampler = 0; sampler < 20; sampler++)
345                         {
346                                 draw->texture[sampler] = 0;
347                         }
348
349                         for(int sampler = 0; sampler < 16; sampler++)
350                         {
351                                 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
352                                 {
353                                         draw->texture[sampler] = context->texture[sampler];
354                                         draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
355
356                                         data->mipmap[sampler] = context->sampler[sampler].getTextureData();
357                                 }
358                         }
359
360                         if(context->pixelShader)
361                         {
362                                 if(draw->psDirtyConstF)
363                                 {
364                                         memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
365                                         memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
366                                         draw->psDirtyConstF = 0;
367                                 }
368
369                                 if(draw->psDirtyConstI)
370                                 {
371                                         memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
372                                         draw->psDirtyConstI = 0;
373                                 }
374
375                                 if(draw->psDirtyConstB)
376                                 {
377                                         memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
378                                         draw->psDirtyConstB = 0;
379                                 }
380                         }
381                         
382                         if(context->pixelShaderVersion() <= 0x0104)
383                         {
384                                 for(int stage = 0; stage < 8; stage++)
385                                 {
386                                         if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
387                                         {
388                                                 data->textureStage[stage] = context->textureStage[stage].uniforms;
389                                         }
390                                         else break;
391                                 }
392                         }
393
394                         if(context->vertexShader)
395                         {
396                                 if(context->vertexShader->getVersion() >= 0x0300)
397                                 {
398                                         for(int sampler = 0; sampler < 4; sampler++)
399                                         {
400                                                 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
401                                                 {
402                                                         draw->texture[16 + sampler] = context->texture[16 + sampler];
403                                                         draw->texture[16 + sampler]->lock(PUBLIC, PRIVATE);
404
405                                                         data->mipmap[16 + sampler] = context->sampler[16 + sampler].getTextureData();
406                                                 }
407                                         }
408                                 }
409
410                                 if(draw->vsDirtyConstF)
411                                 {
412                                         memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
413                                         draw->vsDirtyConstF = 0;
414                                 }
415
416                                 if(draw->vsDirtyConstI)
417                                 {
418                                         memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
419                                         draw->vsDirtyConstI = 0;
420                                 }
421
422                                 if(draw->vsDirtyConstB)
423                                 {
424                                         memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
425                                         draw->vsDirtyConstB = 0;
426                                 }
427                         }
428                         else
429                         {
430                                 data->ff = ff;
431
432                                 draw->vsDirtyConstF = 256 + 1;
433                                 draw->vsDirtyConstI = 16;
434                                 draw->vsDirtyConstB = 16;
435                         }
436
437                         if(pixelState.stencilActive)
438                         {
439                                 data->stencil[0] = stencil;
440                                 data->stencil[1] = stencilCCW;
441                         }
442
443                         if(pixelState.fogActive)
444                         {
445                                 data->fog = fog;
446                         }
447
448                         if(setupState.isDrawPoint)
449                         {
450                                 data->point = point;
451                         }
452
453                         data->lineWidth = context->lineWidth;
454
455                         data->factor = factor;
456
457                         if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
458                         {
459                                 float ref = (float)context->alphaReference * (1.0f / 255.0f);
460                                 float margin = sw::min(ref, 1.0f - ref);
461
462                                 if(ms == 4)
463                                 {
464                                         data->a2c0 = replicate(ref - margin * 0.6f);
465                                         data->a2c1 = replicate(ref - margin * 0.2f);
466                                         data->a2c2 = replicate(ref + margin * 0.2f);
467                                         data->a2c3 = replicate(ref + margin * 0.6f);
468                                 }
469                                 else if(ms == 2)
470                                 {
471                                         data->a2c0 = replicate(ref - margin * 0.3f);
472                                         data->a2c1 = replicate(ref + margin * 0.3f);
473                                 }
474                                 else ASSERT(false);
475                         }
476
477                         if(pixelState.occlusionEnabled)
478                         {
479                                 for(int cluster = 0; cluster < clusterCount; cluster++)
480                                 {
481                                         data->occlusion[cluster] = 0;
482                                 }
483                         }
484
485                         #if PERF_PROFILE
486                                 for(int cluster = 0; cluster < clusterCount; cluster++)
487                                 {
488                                         for(int i = 0; i < PERF_TIMERS; i++)
489                                         {
490                                                 data->cycles[i][cluster] = 0;
491                                         }
492                                 }
493                         #endif
494
495                         // Viewport
496                         {
497                                 float W = 0.5f * viewport.width;
498                                 float H = 0.5f * viewport.height;
499                                 float X0 = viewport.x0 + W;
500                                 float Y0 = viewport.y0 + H;
501                                 float N = viewport.minZ;
502                                 float F = viewport.maxZ;
503                                 float Z = F - N;
504
505                                 if(context->isDrawTriangle(false))
506                                 {
507                                         N += depthBias;
508                                 }
509
510                                 if(complementaryDepthBuffer)
511                                 {
512                                         Z = -Z;
513                                         N = 1 - N;
514                                 }
515
516                                 static const float X[5][16] =   // Fragment offsets
517                                 {
518                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
519                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
520                                         {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
521                                         {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
522                                         {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
523                                 };
524
525                                 static const float Y[5][16] =   // Fragment offsets
526                                 {
527                                         {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
528                                         {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
529                                         {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
530                                         {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
531                                         {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
532                                 };
533
534                                 int s = sw::log2(ss);
535
536                                 data->Wx16 = replicate(W * 16);
537                                 data->Hx16 = replicate(H * 16);
538                                 data->X0x16 = replicate(X0 * 16);
539                                 data->Y0x16 = replicate(Y0 * 16);
540                                 data->XXXX = replicate(X[s][q] / W);
541                                 data->YYYY = replicate(Y[s][q] / H);
542                                 data->halfPixelX = replicate(0.5f / W);
543                                 data->halfPixelY = replicate(0.5f / H);
544                                 data->viewportHeight = abs(viewport.height);
545                                 data->slopeDepthBias = slopeDepthBias;
546                                 data->depthRange = Z;
547                                 data->depthNear = N;
548                                 draw->clipFlags = clipFlags;
549
550                                 if(clipFlags)
551                                 {
552                                         if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
553                                         if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
554                                         if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
555                                         if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
556                                         if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
557                                         if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
558                                 }
559                         }
560
561                         // Target
562                         {
563                                 for(int index = 0; index < 4; index++)
564                                 {
565                                         draw->renderTarget[index] = context->renderTarget[index];
566
567                                         if(draw->renderTarget[index])
568                                         {
569                                                 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
570                                                 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
571                                                 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
572                                         }
573                                 }
574
575                                 draw->depthStencil = context->depthStencil;
576
577                                 if(draw->depthStencil)
578                                 {
579                                         data->depthBuffer = (float*)context->depthStencil->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
580                                         data->depthPitchB = context->depthStencil->getInternalPitchB();
581                                         data->depthSliceB = context->depthStencil->getInternalSliceB();
582
583                                         data->stencilBuffer = (unsigned char*)context->depthStencil->lockStencil(q * ms, MANAGED);
584                                         data->stencilPitchB = context->depthStencil->getStencilPitchB();
585                                         data->stencilSliceB = context->depthStencil->getStencilSliceB();
586                                 }
587                         }
588
589                         // Scissor
590                         {
591                                 data->scissorX0 = scissor.x0;
592                                 data->scissorX1 = scissor.x1;
593                                 data->scissorY0 = scissor.y0;
594                                 data->scissorY1 = scissor.y1;
595                         }
596
597                         draw->primitive = 0;
598                         draw->count = count;
599
600                         draw->references = (count + batch - 1) / batch;
601
602                         schedulerMutex.lock();
603                         nextDraw++;
604                         schedulerMutex.unlock();
605
606                         if(!threadsAwake)
607                         {
608                                 suspend[0]->wait();
609
610                                 threadsAwake = 1;
611                                 task[0].type = Task::RESUME;
612
613                                 resume[0]->signal();
614                         }
615                 }
616         }
617
618         void Renderer::threadFunction(void *parameters)
619         {
620                 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
621                 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
622
623                 if(logPrecision < IEEE)
624                 {
625                         CPUID::setFlushToZero(true);
626                         CPUID::setDenormalsAreZero(true);
627                 }
628
629                 renderer->threadLoop(threadIndex);
630         }
631
632         void Renderer::threadLoop(int threadIndex)
633         {
634                 while(!exitThreads)
635                 {
636                         taskLoop(threadIndex);
637
638                         suspend[threadIndex]->signal();
639                         resume[threadIndex]->wait();
640                 }
641         }
642
643         void Renderer::taskLoop(int threadIndex)
644         {
645                 while(task[threadIndex].type != Task::SUSPEND)
646                 {
647                         scheduleTask(threadIndex);
648                         executeTask(threadIndex);
649                 }
650         }
651
652         void Renderer::findAvailableTasks()
653         {
654                 // Find pixel tasks
655                 for(int cluster = 0; cluster < clusterCount; cluster++)
656                 {
657                         if(!pixelProgress[cluster].executing)
658                         {
659                                 for(int unit = 0; unit < unitCount; unit++)
660                                 {
661                                         if(primitiveProgress[unit].references > 0)   // Contains processed primitives
662                                         {
663                                                 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
664                                                 {
665                                                         if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
666                                                         {
667                                                                 Task &task = taskQueue[qHead];
668                                                                 task.type = Task::PIXELS;
669                                                                 task.primitiveUnit = unit;
670                                                                 task.pixelCluster = cluster;
671
672                                                                 pixelProgress[cluster].executing = true;
673
674                                                                 // Commit to the task queue
675                                                                 qHead = (qHead + 1) % 32;
676                                                                 qSize++;
677
678                                                                 break;
679                                                         }
680                                                 }
681                                         }
682                                 }
683                         }
684                 }
685         
686                 // Find primitive tasks
687                 if(currentDraw == nextDraw)
688                 {
689                         return;   // No more primitives to process
690                 }
691
692                 for(int unit = 0; unit < unitCount; unit++)
693                 {
694                         DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
695
696                         if(draw->primitive >= draw->count)
697                         {
698                                 currentDraw++;
699
700                                 if(currentDraw == nextDraw)
701                                 {
702                                         return;   // No more primitives to process
703                                 }
704
705                                 draw = drawList[currentDraw % DRAW_COUNT];
706                         }
707
708                         if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
709                         {
710                                 int primitive = draw->primitive;
711                                 int count = draw->count;
712                                 int batch = draw->batchSize;
713
714                                 primitiveProgress[unit].drawCall = currentDraw;
715                                 primitiveProgress[unit].firstPrimitive = primitive;
716                                 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
717
718                                 draw->primitive += batch;
719
720                                 Task &task = taskQueue[qHead];
721                                 task.type = Task::PRIMITIVES;
722                                 task.primitiveUnit = unit;
723
724                                 primitiveProgress[unit].references = -1;
725
726                                 // Commit to the task queue
727                                 qHead = (qHead + 1) % 32;
728                                 qSize++;
729                         }
730                 }
731         }
732
733         void Renderer::scheduleTask(int threadIndex)
734         {
735                 schedulerMutex.lock();
736
737                 if((int)qSize < threadCount - threadsAwake + 1)
738                 {
739                         findAvailableTasks();
740                 }
741
742                 if(qSize != 0)
743                 {
744                         task[threadIndex] = taskQueue[(qHead - qSize) % 32];
745                         qSize--;
746
747                         if(threadsAwake != threadCount)
748                         {
749                                 int wakeup = qSize - threadsAwake + 1;
750
751                                 for(int i = 0; i < threadCount && wakeup > 0; i++)
752                                 {
753                                         if(task[i].type == Task::SUSPEND)
754                                         {
755                                                 suspend[i]->wait();
756                                                 task[i].type = Task::RESUME;
757                                                 resume[i]->signal();
758
759                                                 threadsAwake++;
760                                                 wakeup--;
761                                         }
762                                 }
763                         }
764                 }
765                 else
766                 {
767                         task[threadIndex].type = Task::SUSPEND;
768
769                         threadsAwake--;
770                 }
771
772                 schedulerMutex.unlock();
773         }
774
775         void Renderer::executeTask(int threadIndex)
776         {
777                 #if PERF_HUD
778                         int64_t startTick = Timer::ticks();
779                 #endif
780
781                 switch(task[threadIndex].type)
782                 {
783                 case Task::PRIMITIVES:
784                         {
785                                 int unit = task[threadIndex].primitiveUnit;
786                                 
787                                 int input = primitiveProgress[unit].firstPrimitive;
788                                 int count = primitiveProgress[unit].primitiveCount;
789                                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
790                                 int (*setupPrimitives)(Renderer *renderer, int batch, int count) = draw->setupPrimitives;
791
792                                 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
793
794                                 #if PERF_HUD
795                                         int64_t time = Timer::ticks();
796                                         vertexTime[threadIndex] += time - startTick;
797                                         startTick = time;
798                                 #endif
799
800                                 int visible = setupPrimitives(this, unit, count);
801
802                                 primitiveProgress[unit].visible = visible;
803                                 primitiveProgress[unit].references = clusterCount;
804
805                                 #if PERF_HUD
806                                         setupTime[threadIndex] += Timer::ticks() - startTick;
807                                 #endif
808                         }
809                         break;
810                 case Task::PIXELS:
811                         {
812                                 int unit = task[threadIndex].primitiveUnit;
813                                 int visible = primitiveProgress[unit].visible;
814
815                                 if(visible > 0)
816                                 {
817                                         int cluster = task[threadIndex].pixelCluster;
818                                         Primitive *primitive = primitiveBatch[unit];
819                                         DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
820                                         DrawData *data = draw->data;
821                                         PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
822
823                                         pixelRoutine(primitive, visible, cluster, data);
824                                 }
825
826                                 finishRendering(task[threadIndex]);
827
828                                 #if PERF_HUD
829                                         pixelTime[threadIndex] += Timer::ticks() - startTick;
830                                 #endif
831                         }
832                         break;
833                 case Task::RESUME:
834                         break;
835                 case Task::SUSPEND:
836                         break;
837                 default:
838                         ASSERT(false);
839                 }
840         }
841
842         void Renderer::synchronize()
843         {
844                 sync->lock(sw::PUBLIC);
845                 sync->unlock();
846         }
847
848         void Renderer::finishRendering(Task &pixelTask)
849         {
850                 int unit = pixelTask.primitiveUnit;
851                 int cluster = pixelTask.pixelCluster;
852
853                 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
854                 DrawData &data = *draw.data;
855                 int primitive = primitiveProgress[unit].firstPrimitive;
856                 int count = primitiveProgress[unit].primitiveCount;
857
858                 pixelProgress[cluster].processedPrimitives = primitive + count;
859
860                 if(pixelProgress[cluster].processedPrimitives >= draw.count)
861                 {
862                         pixelProgress[cluster].drawCall++;
863                         pixelProgress[cluster].processedPrimitives = 0;
864                 }
865
866                 int ref = atomicDecrement(&primitiveProgress[unit].references);
867
868                 if(ref == 0)
869                 {
870                         ref = atomicDecrement(&draw.references);
871
872                         if(ref == 0)
873                         {
874                                 #if PERF_PROFILE
875                                         for(int cluster = 0; cluster < clusterCount; cluster++)
876                                         {
877                                                 for(int i = 0; i < PERF_TIMERS; i++)
878                                                 {
879                                                         profiler.cycles[i] += data.cycles[i][cluster];
880                                                 }
881                                         }
882                                 #endif
883
884                                 if(draw.queries)
885                                 {
886                                         for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
887                                         {
888                                                 Query *query = *q;
889
890                                                 for(int cluster = 0; cluster < clusterCount; cluster++)
891                                                 {
892                                                         atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
893                                                 }
894
895                                                 atomicDecrement(&query->reference);
896                                         }
897
898                                         delete draw.queries;
899                                         draw.queries = 0;
900                                 }
901
902                                 for(int i = 0; i < 4; i++)
903                                 {
904                                         if(draw.renderTarget[i])
905                                         {
906                                                 draw.renderTarget[i]->unlockInternal();
907                                         }
908                                 }
909
910                                 if(draw.depthStencil)
911                                 {
912                                         draw.depthStencil->unlockInternal();
913                                         draw.depthStencil->unlockStencil();
914                                 }
915
916                                 for(int i = 0; i < 16 + 4; i++)
917                                 {
918                                         if(draw.texture[i])
919                                         {
920                                                 draw.texture[i]->unlock();
921                                         }
922                                 }
923
924                                 for(int i = 0; i < 16; i++)
925                                 {
926                                         if(draw.vertexStream[i])
927                                         {
928                                                 draw.vertexStream[i]->unlock();
929                                         }
930                                 }
931
932                                 if(draw.indexBuffer)
933                                 {
934                                         draw.indexBuffer->unlock();
935                                 }
936
937                                 draw.vertexRoutine->unbind();
938                                 draw.setupRoutine->unbind();
939                                 draw.pixelRoutine->unbind();
940
941                                 sync->unlock();
942
943                                 draw.references = -1;
944                                 resumeApp->signal();
945                         }
946                 }
947
948                 pixelProgress[cluster].executing = false;
949         }
950
951         void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
952         {
953                 Triangle *triangle = triangleBatch[unit];
954                 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
955                 DrawData *data = draw->data;
956                 VertexTask *task = vertexTask[thread];
957
958                 const void *indices = data->indices;
959                 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
960
961                 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
962                 {
963                         task->vertexCache.clear();
964                         task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
965                 }
966
967                 unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
968
969                 switch(draw->drawType)
970                 {
971                 case DRAW_POINTLIST:
972                         {
973                                 unsigned int index = start;
974
975                                 for(unsigned int i = 0; i < triangleCount; i++)
976                                 {
977                                         batch[i][0] = index;
978                                         batch[i][1] = index;
979                                         batch[i][2] = index;
980
981                                         index += 1;
982                                 }
983                         }
984                         break;
985                 case DRAW_LINELIST:
986                         {
987                                 unsigned int index = 2 * start;
988
989                                 for(unsigned int i = 0; i < triangleCount; i++)
990                                 {
991                                         batch[i][0] = index + 0;
992                                         batch[i][1] = index + 1;
993                                         batch[i][2] = index + 1;
994
995                                         index += 2;
996                                 }
997                         }
998                         break;
999                 case DRAW_LINESTRIP:
1000                         {
1001                                 unsigned int index = start;
1002
1003                                 for(unsigned int i = 0; i < triangleCount; i++)
1004                                 {
1005                                         batch[i][0] = index + 0;
1006                                         batch[i][1] = index + 1;
1007                                         batch[i][2] = index + 1;
1008
1009                                         index += 1;
1010                                 }
1011                         }
1012                         break;
1013                 case DRAW_LINELOOP:
1014                         {
1015                                 unsigned int index = start;
1016
1017                                 for(unsigned int i = 0; i < triangleCount; i++)
1018                                 {
1019                                         batch[i][0] = (index + 0) % loop;
1020                                         batch[i][1] = (index + 1) % loop;
1021                                         batch[i][2] = (index + 1) % loop;
1022
1023                                         index += 1;
1024                                 }
1025                         }
1026                         break;
1027                 case DRAW_TRIANGLELIST:
1028                         {
1029                                 unsigned int index = 3 * start;
1030
1031                                 for(unsigned int i = 0; i < triangleCount; i++)
1032                                 {
1033                                         batch[i][0] = index + 0;
1034                                         batch[i][1] = index + 1;
1035                                         batch[i][2] = index + 2;
1036
1037                                         index += 3;
1038                                 }
1039                         }
1040                         break;
1041                 case DRAW_TRIANGLESTRIP:
1042                         {
1043                                 unsigned int index = start;
1044
1045                                 for(unsigned int i = 0; i < triangleCount; i++)
1046                                 {
1047                                         batch[i][0] = index + 0;
1048                                         batch[i][1] = index + (index & 1) + 1;
1049                                         batch[i][2] = index + (~index & 1) + 1;
1050
1051                                         index += 1;
1052                                 }
1053                         }
1054                         break;
1055                 case DRAW_TRIANGLEFAN:
1056                         {
1057                                 unsigned int index = start;
1058
1059                                 for(unsigned int i = 0; i < triangleCount; i++)
1060                                 {
1061                                         batch[i][0] = index + 1;
1062                                         batch[i][1] = index + 2;
1063                                         batch[i][2] = 0;
1064
1065                                         index += 1;
1066                                 }
1067                         }
1068                         break;
1069                 case DRAW_INDEXEDPOINTLIST8:
1070                         {
1071                                 const unsigned char *index = (const unsigned char*)indices + start;
1072
1073                                 for(unsigned int i = 0; i < triangleCount; i++)
1074                                 {
1075                                         batch[i][0] = *index;
1076                                         batch[i][1] = *index;
1077                                         batch[i][2] = *index;
1078
1079                                         index += 1;
1080                                 }
1081                         }
1082                         break;
1083                 case DRAW_INDEXEDPOINTLIST16:
1084                         {
1085                                 const unsigned short *index = (const unsigned short*)indices + start;
1086
1087                                 for(unsigned int i = 0; i < triangleCount; i++)
1088                                 {
1089                                         batch[i][0] = *index;
1090                                         batch[i][1] = *index;
1091                                         batch[i][2] = *index;
1092
1093                                         index += 1;
1094                                 }
1095                         }
1096                         break;
1097                 case DRAW_INDEXEDPOINTLIST32:
1098                         {
1099                                 const unsigned int *index = (const unsigned int*)indices + start;
1100
1101                                 for(unsigned int i = 0; i < triangleCount; i++)
1102                                 {
1103                                         batch[i][0] = *index;
1104                                         batch[i][1] = *index;
1105                                         batch[i][2] = *index;
1106
1107                                         index += 1;
1108                                 }
1109                         }
1110                         break;
1111                 case DRAW_INDEXEDLINELIST8:
1112                         {
1113                                 const unsigned char *index = (const unsigned char*)indices + 2 * start;
1114
1115                                 for(unsigned int i = 0; i < triangleCount; i++)
1116                                 {
1117                                         batch[i][0] = index[0];
1118                                         batch[i][1] = index[1];
1119                                         batch[i][2] = index[1];
1120
1121                                         index += 2;
1122                                 }
1123                         }
1124                         break;
1125                 case DRAW_INDEXEDLINELIST16:
1126                         {
1127                                 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1128
1129                                 for(unsigned int i = 0; i < triangleCount; i++)
1130                                 {
1131                                         batch[i][0] = index[0];
1132                                         batch[i][1] = index[1];
1133                                         batch[i][2] = index[1];
1134
1135                                         index += 2;
1136                                 }
1137                         }
1138                         break;
1139                 case DRAW_INDEXEDLINELIST32:
1140                         {
1141                                 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1142
1143                                 for(unsigned int i = 0; i < triangleCount; i++)
1144                                 {
1145                                         batch[i][0] = index[0];
1146                                         batch[i][1] = index[1];
1147                                         batch[i][2] = index[1];
1148
1149                                         index += 2;
1150                                 }
1151                         }
1152                         break;
1153                 case DRAW_INDEXEDLINESTRIP8:
1154                         {
1155                                 const unsigned char *index = (const unsigned char*)indices + start;
1156
1157                                 for(unsigned int i = 0; i < triangleCount; i++)
1158                                 {
1159                                         batch[i][0] = index[0];
1160                                         batch[i][1] = index[1];
1161                                         batch[i][2] = index[1];
1162
1163                                         index += 1;
1164                                 }
1165                         }
1166                         break;
1167                 case DRAW_INDEXEDLINESTRIP16:
1168                         {
1169                                 const unsigned short *index = (const unsigned short*)indices + start;
1170
1171                                 for(unsigned int i = 0; i < triangleCount; i++)
1172                                 {
1173                                         batch[i][0] = index[0];
1174                                         batch[i][1] = index[1];
1175                                         batch[i][2] = index[1];
1176
1177                                         index += 1;
1178                                 }
1179                         }
1180                         break;
1181                 case DRAW_INDEXEDLINESTRIP32:
1182                         {
1183                                 const unsigned int *index = (const unsigned int*)indices + start;
1184
1185                                 for(unsigned int i = 0; i < triangleCount; i++)
1186                                 {
1187                                         batch[i][0] = index[0];
1188                                         batch[i][1] = index[1];
1189                                         batch[i][2] = index[1];
1190
1191                                         index += 1;
1192                                 }
1193                         }
1194                         break;
1195                 case DRAW_INDEXEDLINELOOP8:
1196                         {
1197                                 const unsigned char *index = (const unsigned char*)indices;
1198
1199                                 for(unsigned int i = 0; i < triangleCount; i++)
1200                                 {
1201                                         batch[i][0] = index[(start + i + 0) % loop];
1202                                         batch[i][1] = index[(start + i + 1) % loop];
1203                                         batch[i][2] = index[(start + i + 1) % loop];
1204                                 }
1205                         }
1206                         break;
1207                 case DRAW_INDEXEDLINELOOP16:
1208                         {
1209                                 const unsigned short *index = (const unsigned short*)indices;
1210
1211                                 for(unsigned int i = 0; i < triangleCount; i++)
1212                                 {
1213                                         batch[i][0] = index[(start + i + 0) % loop];
1214                                         batch[i][1] = index[(start + i + 1) % loop];
1215                                         batch[i][2] = index[(start + i + 1) % loop];
1216                                 }
1217                         }
1218                         break;
1219                 case DRAW_INDEXEDLINELOOP32:
1220                         {
1221                                 const unsigned int *index = (const unsigned int*)indices;
1222
1223                                 for(unsigned int i = 0; i < triangleCount; i++)
1224                                 {
1225                                         batch[i][0] = index[(start + i + 0) % loop];
1226                                         batch[i][1] = index[(start + i + 1) % loop];
1227                                         batch[i][2] = index[(start + i + 1) % loop];
1228                                 }
1229                         }
1230                         break;
1231                 case DRAW_INDEXEDTRIANGLELIST8:
1232                         {
1233                                 const unsigned char *index = (const unsigned char*)indices + 3 * start;
1234
1235                                 for(unsigned int i = 0; i < triangleCount; i++)
1236                                 {
1237                                         batch[i][0] = index[0];
1238                                         batch[i][1] = index[1];
1239                                         batch[i][2] = index[2];
1240
1241                                         index += 3;
1242                                 }
1243                         }
1244                         break;
1245                 case DRAW_INDEXEDTRIANGLELIST16:
1246                         {
1247                                 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1248
1249                                 for(unsigned int i = 0; i < triangleCount; i++)
1250                                 {
1251                                         batch[i][0] = index[0];
1252                                         batch[i][1] = index[1];
1253                                         batch[i][2] = index[2];
1254
1255                                         index += 3;
1256                                 }
1257                         }
1258                         break;
1259                 case DRAW_INDEXEDTRIANGLELIST32:
1260                         {
1261                                 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1262
1263                                 for(unsigned int i = 0; i < triangleCount; i++)
1264                                 {
1265                                         batch[i][0] = index[0];
1266                                         batch[i][1] = index[1];
1267                                         batch[i][2] = index[2];
1268
1269                                         index += 3;
1270                                 }
1271                         }
1272                         break;
1273                 case DRAW_INDEXEDTRIANGLESTRIP8:
1274                         {
1275                                 const unsigned char *index = (const unsigned char*)indices + start;
1276
1277                                 for(unsigned int i = 0; i < triangleCount; i++)
1278                                 {
1279                                         batch[i][0] = index[0];
1280                                         batch[i][1] = index[((start + i) & 1) + 1];
1281                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1282
1283                                         index += 1;
1284                                 }
1285                         }
1286                         break;
1287                 case DRAW_INDEXEDTRIANGLESTRIP16:
1288                         {
1289                                 const unsigned short *index = (const unsigned short*)indices + start;
1290
1291                                 for(unsigned int i = 0; i < triangleCount; i++)
1292                                 {
1293                                         batch[i][0] = index[0];
1294                                         batch[i][1] = index[((start + i) & 1) + 1];
1295                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1296
1297                                         index += 1;
1298                                 }
1299                         }
1300                         break;
1301                 case DRAW_INDEXEDTRIANGLESTRIP32:
1302                         {
1303                                 const unsigned int *index = (const unsigned int*)indices + start;
1304
1305                                 for(unsigned int i = 0; i < triangleCount; i++)
1306                                 {
1307                                         batch[i][0] = index[0];
1308                                         batch[i][1] = index[((start + i) & 1) + 1];
1309                                         batch[i][2] = index[(~(start + i) & 1) + 1];
1310
1311                                         index += 1;
1312                                 }
1313                         }
1314                         break;
1315                 case DRAW_INDEXEDTRIANGLEFAN8:
1316                         {
1317                                 const unsigned char *index = (const unsigned char*)indices;
1318
1319                                 for(unsigned int i = 0; i < triangleCount; i++)
1320                                 {
1321                                         batch[i][0] = index[start + i + 1];
1322                                         batch[i][1] = index[start + i + 2];
1323                                         batch[i][2] = index[0];
1324                                 }
1325                         }
1326                         break;
1327                 case DRAW_INDEXEDTRIANGLEFAN16:
1328                         {
1329                                 const unsigned short *index = (const unsigned short*)indices;
1330
1331                                 for(unsigned int i = 0; i < triangleCount; i++)
1332                                 {
1333                                         batch[i][0] = index[start + i + 1];
1334                                         batch[i][1] = index[start + i + 2];
1335                                         batch[i][2] = index[0];
1336                                 }
1337                         }
1338                         break;
1339                 case DRAW_INDEXEDTRIANGLEFAN32:
1340                         {
1341                                 const unsigned int *index = (const unsigned int*)indices;
1342
1343                                 for(unsigned int i = 0; i < triangleCount; i++)
1344                                 {
1345                                         batch[i][0] = index[start + i + 1];
1346                                         batch[i][1] = index[start + i + 2];
1347                                         batch[i][2] = index[0];
1348                                 }
1349                         }
1350                         break;
1351         case DRAW_QUADLIST:
1352                         {
1353                                 unsigned int index = 4 * start / 2;
1354
1355                                 for(unsigned int i = 0; i < triangleCount; i += 2)
1356                                 {
1357                                         batch[i+0][0] = index + 0;
1358                                         batch[i+0][1] = index + 1;
1359                                         batch[i+0][2] = index + 2;
1360
1361                     batch[i+1][0] = index + 0;
1362                                         batch[i+1][1] = index + 2;
1363                                         batch[i+1][2] = index + 3;
1364
1365                                         index += 4;
1366                                 }
1367                         }
1368                         break;
1369                 default:
1370                         ASSERT(false);
1371                 }
1372
1373                 task->vertexCount = triangleCount * 3;
1374                 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1375         }
1376
1377         int Renderer::setupSolidTriangles(Renderer *renderer, int unit, int count)
1378         {
1379                 Triangle *triangle = renderer->triangleBatch[unit];
1380                 Primitive *primitive = renderer->primitiveBatch[unit];
1381
1382                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1383                 SetupProcessor::State &state = draw.setupState;
1384                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1385
1386                 int ms = state.multiSample;
1387                 int pos = state.positionRegister;
1388                 const DrawData *data = draw.data;
1389                 int visible = 0;
1390
1391                 for(int i = 0; i < count; i++, triangle++)
1392                 {
1393                         Vertex &v0 = triangle->v0;
1394                         Vertex &v1 = triangle->v1;
1395                         Vertex &v2 = triangle->v2;
1396
1397                         if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1398                         {
1399                                 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1400
1401                                 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1402
1403                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1404                                 {
1405                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1406                                         {
1407                                                 continue;
1408                                         }
1409                                 }
1410
1411                                 if(setupRoutine(primitive, triangle, &polygon, data))
1412                                 {
1413                                         primitive += ms;
1414                                         visible++;
1415                                 }
1416                         }
1417                 }
1418
1419                 return visible;
1420         }
1421
1422         int Renderer::setupWireframeTriangle(Renderer *renderer, int unit, int count)
1423         {
1424                 Triangle *triangle = renderer->triangleBatch[unit];
1425                 Primitive *primitive = renderer->primitiveBatch[unit];
1426                 int visible = 0;
1427
1428                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1429                 SetupProcessor::State &state = draw.setupState;
1430                 SetupProcessor::RoutinePointer setupRoutine = draw.setupPointer;
1431
1432                 const Vertex &v0 = triangle[0].v0;
1433                 const Vertex &v1 = triangle[0].v1;
1434                 const Vertex &v2 = triangle[0].v2;
1435
1436                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1437
1438                 if(state.cullMode == CULL_CLOCKWISE)
1439                 {
1440                         if(d >= 0) return 0;
1441                 }
1442                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1443                 {
1444                         if(d <= 0) return 0;
1445                 }
1446
1447                 // Copy attributes
1448                 triangle[1].v0 = v1;
1449                 triangle[1].v1 = v2;
1450                 triangle[2].v0 = v2;
1451                 triangle[2].v1 = v0;
1452
1453                 if(state.color[0][0].flat)   // FIXME
1454                 {
1455                         for(int i = 0; i < 2; i++)
1456                         {
1457                                 triangle[1].v0.C[i] = triangle[0].v0.C[i];
1458                                 triangle[1].v1.C[i] = triangle[0].v0.C[i];
1459                                 triangle[2].v0.C[i] = triangle[0].v0.C[i];
1460                                 triangle[2].v1.C[i] = triangle[0].v0.C[i];
1461                         }
1462                 }
1463
1464                 for(int i = 0; i < 3; i++)
1465                 {
1466                         if(setupLine(renderer, *primitive, *triangle, draw))
1467                         {
1468                                 primitive->area = 0.5f * d;
1469
1470                                 primitive++;
1471                                 visible++;
1472                         }
1473
1474                         triangle++;
1475                 }
1476
1477                 return visible;
1478         }
1479         
1480         int Renderer::setupVertexTriangle(Renderer *renderer, int unit, int count)
1481         {
1482                 Triangle *triangle = renderer->triangleBatch[unit];
1483                 Primitive *primitive = renderer->primitiveBatch[unit];
1484                 int visible = 0;
1485
1486                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1487                 SetupProcessor::State &state = draw.setupState;
1488
1489                 const Vertex &v0 = triangle[0].v0;
1490                 const Vertex &v1 = triangle[0].v1;
1491                 const Vertex &v2 = triangle[0].v2;
1492
1493                 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1494
1495                 if(state.cullMode == CULL_CLOCKWISE)
1496                 {
1497                         if(d >= 0) return 0;
1498                 }
1499                 else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1500                 {
1501                         if(d <= 0) return 0;
1502                 }
1503
1504                 // Copy attributes
1505                 triangle[1].v0 = v1;
1506                 triangle[2].v0 = v2;
1507
1508                 for(int i = 0; i < 3; i++)
1509                 {
1510                         if(setupPoint(renderer, *primitive, *triangle, draw))
1511                         {
1512                                 primitive->area = 0.5f * d;
1513
1514                                 primitive++;
1515                                 visible++;
1516                         }
1517
1518                         triangle++;
1519                 }
1520
1521                 return visible;
1522         }
1523
1524         int Renderer::setupLines(Renderer *renderer, int unit, int count)
1525         {
1526                 Triangle *triangle = renderer->triangleBatch[unit];
1527                 Primitive *primitive = renderer->primitiveBatch[unit];
1528                 int visible = 0;
1529
1530                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1531                 SetupProcessor::State &state = draw.setupState;
1532
1533                 int ms = state.multiSample;
1534
1535                 for(int i = 0; i < count; i++)
1536                 {
1537                         if(setupLine(renderer, *primitive, *triangle, draw))
1538                         {
1539                                 primitive += ms;
1540                                 visible++;
1541                         }
1542
1543                         triangle++;
1544                 }
1545
1546                 return visible;
1547         }
1548
1549         int Renderer::setupPoints(Renderer *renderer, int unit, int count)
1550         {
1551                 Triangle *triangle = renderer->triangleBatch[unit];
1552                 Primitive *primitive = renderer->primitiveBatch[unit];
1553                 int visible = 0;
1554
1555                 DrawCall &draw = *renderer->drawList[renderer->primitiveProgress[unit].drawCall % DRAW_COUNT];
1556                 SetupProcessor::State &state = draw.setupState;
1557
1558                 int ms = state.multiSample;
1559
1560                 for(int i = 0; i < count; i++)
1561                 {
1562                         if(setupPoint(renderer, *primitive, *triangle, draw))
1563                         {
1564                                 primitive += ms;
1565                                 visible++;
1566                         }
1567
1568                         triangle++;
1569                 }
1570
1571                 return visible;
1572         }
1573
1574         bool Renderer::setupLine(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1575         {
1576                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1577                 const SetupProcessor::State &state = draw.setupState;
1578                 const DrawData &data = *draw.data;
1579
1580                 float lineWidth = data.lineWidth;
1581
1582                 Vertex &v0 = triangle.v0;
1583                 Vertex &v1 = triangle.v1;
1584
1585                 int pos = state.positionRegister;
1586
1587                 const float4 &P0 = v0.v[pos];
1588                 const float4 &P1 = v1.v[pos];
1589
1590                 if(P0.w <= 0 && P1.w <= 0)
1591                 {
1592                         return false;
1593                 }
1594
1595                 const float W = data.Wx16[0] * (1.0f / 16.0f);
1596                 const float H = data.Hx16[0] * (1.0f / 16.0f);
1597
1598                 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1599                 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1600
1601                 if(dx == 0 && dy == 0)
1602                 {
1603                         return false;
1604                 }
1605
1606                 if(false)   // Rectangle
1607                 {
1608                         float4 P[4];
1609                         int C[4];
1610
1611                         P[0] = P0;
1612                         P[1] = P1;
1613                         P[2] = P1;
1614                         P[3] = P0;
1615
1616                         float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1617
1618                         dx *= scale;
1619                         dy *= scale;
1620
1621                         float dx0w = dx * P0.w / W;
1622                         float dy0h = dy * P0.w / H;
1623                         float dx0h = dx * P0.w / H;
1624                         float dy0w = dy * P0.w / W;
1625
1626                         float dx1w = dx * P1.w / W;
1627                         float dy1h = dy * P1.w / H;
1628                         float dx1h = dx * P1.w / H;
1629                         float dy1w = dy * P1.w / W;
1630
1631                         P[0].x += -dy0w + -dx0w;
1632                         P[0].y += -dx0h + +dy0h;
1633                         C[0] = computeClipFlags(P[0], data);
1634
1635                         P[1].x += -dy1w + +dx1w;
1636                         P[1].y += -dx1h + +dy1h;
1637                         C[1] = computeClipFlags(P[1], data);
1638
1639                         P[2].x += +dy1w + +dx1w;
1640                         P[2].y += +dx1h + -dy1h;
1641                         C[2] = computeClipFlags(P[2], data);
1642
1643                         P[3].x += +dy0w + -dx0w;
1644                         P[3].y += +dx0h + +dy0h;
1645                         C[3] = computeClipFlags(P[3], data);
1646
1647                         if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1648                         {
1649                                 Polygon polygon(P, 4);
1650
1651                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1652
1653                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1654                                 {
1655                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1656                                         {
1657                                                 return false;
1658                                         }
1659                                 }
1660
1661                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1662                         }
1663                 }
1664                 else   // Diamond test convention
1665                 {
1666                         float4 P[8];
1667                         int C[8];
1668
1669                         P[0] = P0;
1670                         P[1] = P0;
1671                         P[2] = P0;
1672                         P[3] = P0;
1673                         P[4] = P1;
1674                         P[5] = P1;
1675                         P[6] = P1;
1676                         P[7] = P1;
1677
1678                         float dx0 = lineWidth * 0.5f * P0.w / W;
1679                         float dy0 = lineWidth * 0.5f * P0.w / H;
1680
1681                         float dx1 = lineWidth * 0.5f * P1.w / W;
1682                         float dy1 = lineWidth * 0.5f * P1.w / H;
1683
1684                         P[0].x += -dx0;
1685                         C[0] = computeClipFlags(P[0], data);
1686
1687                         P[1].y += +dy0;
1688                         C[1] = computeClipFlags(P[1], data);
1689
1690                         P[2].x += +dx0;
1691                         C[2] = computeClipFlags(P[2], data);
1692
1693                         P[3].y += -dy0;
1694                         C[3] = computeClipFlags(P[3], data);
1695
1696                         P[4].x += -dx1;
1697                         C[4] = computeClipFlags(P[4], data);
1698
1699                         P[5].y += +dy1;
1700                         C[5] = computeClipFlags(P[5], data);
1701
1702                         P[6].x += +dx1;
1703                         C[6] = computeClipFlags(P[6], data);
1704
1705                         P[7].y += -dy1;
1706                         C[7] = computeClipFlags(P[7], data);
1707
1708                         if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1709                         {
1710                                 float4 L[6];
1711
1712                                 if(dx > -dy)
1713                                 {
1714                                         if(dx > dy)   // Right
1715                                         {
1716                                                 L[0] = P[0];
1717                                                 L[1] = P[1];
1718                                                 L[2] = P[5];
1719                                                 L[3] = P[6];
1720                                                 L[4] = P[7];
1721                                                 L[5] = P[3];
1722                                         }
1723                                         else   // Down
1724                                         {
1725                                                 L[0] = P[0];
1726                                                 L[1] = P[4];
1727                                                 L[2] = P[5];
1728                                                 L[3] = P[6];
1729                                                 L[4] = P[2];
1730                                                 L[5] = P[3];
1731                                         }
1732                                 }
1733                                 else
1734                                 {
1735                                         if(dx > dy)   // Up
1736                                         {
1737                                                 L[0] = P[0];
1738                                                 L[1] = P[1];
1739                                                 L[2] = P[2];
1740                                                 L[3] = P[6];
1741                                                 L[4] = P[7];
1742                                                 L[5] = P[4];
1743                                         }
1744                                         else   // Left
1745                                         {
1746                                                 L[0] = P[1];
1747                                                 L[1] = P[2];
1748                                                 L[2] = P[3];
1749                                                 L[3] = P[7];
1750                                                 L[4] = P[4];
1751                                                 L[5] = P[5];
1752                                         }
1753                                 }
1754
1755                                 Polygon polygon(L, 6);
1756
1757                                 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1758
1759                                 if(clipFlagsOr != Clipper::CLIP_FINITE)
1760                                 {
1761                                         if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1762                                         {
1763                                                 return false;
1764                                         }
1765                                 }
1766
1767                                 return setupRoutine(&primitive, &triangle, &polygon, &data);
1768                         }
1769                 }
1770
1771                 return false;
1772         }
1773
1774         bool Renderer::setupPoint(Renderer *renderer, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1775         {
1776                 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1777                 const SetupProcessor::State &state = draw.setupState;
1778                 const DrawData &data = *draw.data;
1779
1780                 Vertex &v = triangle.v0;
1781
1782                 float pSize;
1783
1784                 int pts = state.pointSizeRegister;
1785
1786                 if(state.pointSizeRegister != 0xF)
1787                 {
1788                         pSize = v.v[pts].y;
1789                 }
1790                 else
1791                 {
1792                         pSize = data.point.pointSize[0];
1793                 }
1794
1795                 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1796
1797                 float4 P[4];
1798                 int C[4];
1799
1800                 int pos = state.positionRegister;
1801
1802                 P[0] = v.v[pos];
1803                 P[1] = v.v[pos];
1804                 P[2] = v.v[pos];
1805                 P[3] = v.v[pos];
1806
1807                 const float X = pSize * P[0].w * data.halfPixelX[0];
1808                 const float Y = pSize * P[0].w * data.halfPixelY[0];
1809
1810                 P[0].x -= X;
1811                 P[0].y += Y;
1812                 C[0] = computeClipFlags(P[0], data);
1813
1814                 P[1].x += X;
1815                 P[1].y += Y;
1816                 C[1] = computeClipFlags(P[1], data);
1817
1818                 P[2].x += X;
1819                 P[2].y -= Y;
1820                 C[2] = computeClipFlags(P[2], data);
1821
1822                 P[3].x -= X;
1823                 P[3].y -= Y;
1824                 C[3] = computeClipFlags(P[3], data);
1825
1826                 triangle.v1 = triangle.v0;
1827                 triangle.v2 = triangle.v0;
1828
1829                 triangle.v1.X += iround(16 * 0.5f * pSize);
1830                 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1831
1832                 Polygon polygon(P, 4);
1833
1834                 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1835                 {
1836                         int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1837
1838                         if(clipFlagsOr != Clipper::CLIP_FINITE)
1839                         {
1840                                 if(!renderer->clipper->clip(polygon, clipFlagsOr, draw))
1841                                 {
1842                                         return false;
1843                                 }
1844                         }
1845                         
1846                         return setupRoutine(&primitive, &triangle, &polygon, &data);
1847                 }
1848
1849                 return false;
1850         }
1851
1852         unsigned int Renderer::computeClipFlags(const float4 &v, const DrawData &data)
1853         {
1854                 float clX = v.x + data.halfPixelX[0] * v.w;
1855                 float clY = v.y + data.halfPixelY[0] * v.w;
1856
1857                 return ((clX > v.w)  << 0) |
1858                            ((clY > v.w)  << 1) |
1859                            ((v.z > v.w)  << 2) |
1860                            ((clX < -v.w) << 3) |
1861                        ((clY < -v.w) << 4) |
1862                            ((v.z < 0)    << 5) |
1863                            Clipper::CLIP_FINITE;   // FIXME: xyz finite
1864         }
1865
1866         void Renderer::initializeThreads()
1867         {
1868                 unitCount = ceilPow2(threadCount);
1869                 clusterCount = ceilPow2(threadCount);
1870
1871                 for(int i = 0; i < unitCount; i++)
1872                 {
1873                         triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1874                         primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1875                 }
1876
1877                 for(int i = 0; i < threadCount; i++)
1878                 {
1879                         vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1880                         vertexTask[i]->vertexCache.drawCall = -1;
1881
1882                         task[i].type = Task::SUSPEND;
1883
1884                         resume[i] = new Event();
1885                         suspend[i] = new Event();
1886
1887                         Parameters parameters;
1888                         parameters.threadIndex = i;
1889                         parameters.renderer = this;
1890
1891                         exitThreads = false;
1892                         worker[i] = new Thread(threadFunction, &parameters);
1893
1894                         suspend[i]->wait();
1895                         suspend[i]->signal();
1896                 }
1897         }
1898
1899         void Renderer::terminateThreads()
1900         {
1901                 while(threadsAwake != 0)
1902                 {
1903                         Thread::sleep(1);
1904                 }
1905
1906                 for(int thread = 0; thread < threadCount; thread++)
1907                 {
1908                         if(worker[thread])
1909                         {
1910                                 exitThreads = true;
1911                                 resume[thread]->signal();
1912                                 worker[thread]->join();
1913                                 
1914                                 delete worker[thread];
1915                                 worker[thread] = 0;
1916                                 delete resume[thread];
1917                                 resume[thread] = 0;
1918                                 delete suspend[thread];
1919                                 suspend[thread] = 0;
1920                         }
1921                 
1922                         deallocate(vertexTask[thread]);
1923                         vertexTask[thread] = 0;
1924                 }
1925
1926                 for(int i = 0; i < 16; i++)
1927                 {
1928                         deallocate(triangleBatch[i]);
1929                         triangleBatch[i] = 0;
1930
1931                         deallocate(primitiveBatch[i]);
1932                         primitiveBatch[i] = 0;
1933                 }
1934         }
1935
1936         void Renderer::loadConstants(const VertexShader *vertexShader)
1937         {
1938                 if(!vertexShader) return;
1939
1940                 size_t count = vertexShader->getLength();
1941
1942                 for(size_t i = 0; i < count; i++)
1943                 {
1944                         const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1945
1946                         if(instruction->opcode == Shader::OPCODE_DEF)
1947                         {
1948                                 int index = instruction->dst.index;
1949                                 float value[4];
1950
1951                                 value[0] = instruction->src[0].value[0];
1952                                 value[1] = instruction->src[0].value[1];
1953                                 value[2] = instruction->src[0].value[2];
1954                                 value[3] = instruction->src[0].value[3];
1955
1956                                 setVertexShaderConstantF(index, value);
1957                         }
1958                         else if(instruction->opcode == Shader::OPCODE_DEFI)
1959                         {
1960                                 int index = instruction->dst.index;
1961                                 int integer[4];
1962
1963                                 integer[0] = instruction->src[0].integer[0];
1964                                 integer[1] = instruction->src[0].integer[1];
1965                                 integer[2] = instruction->src[0].integer[2];
1966                                 integer[3] = instruction->src[0].integer[3];
1967
1968                                 setVertexShaderConstantI(index, integer);
1969                         }
1970                         else if(instruction->opcode == Shader::OPCODE_DEFB)
1971                         {
1972                                 int index = instruction->dst.index;
1973                                 int boolean = instruction->src[0].boolean[0];
1974
1975                                 setVertexShaderConstantB(index, &boolean);
1976                         }
1977                 }
1978         }
1979
1980         void Renderer::loadConstants(const PixelShader *pixelShader)
1981         {
1982                 if(!pixelShader) return;
1983
1984                 size_t count = pixelShader->getLength();
1985
1986                 for(size_t i = 0; i < count; i++)
1987                 {
1988                         const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1989
1990                         if(instruction->opcode == Shader::OPCODE_DEF)
1991                         {
1992                                 int index = instruction->dst.index;
1993                                 float value[4];
1994
1995                                 value[0] = instruction->src[0].value[0];
1996                                 value[1] = instruction->src[0].value[1];
1997                                 value[2] = instruction->src[0].value[2];
1998                                 value[3] = instruction->src[0].value[3];
1999
2000                                 setPixelShaderConstantF(index, value);
2001                         }
2002                         else if(instruction->opcode == Shader::OPCODE_DEFI)
2003                         {
2004                                 int index = instruction->dst.index;
2005                                 int integer[4];
2006
2007                                 integer[0] = instruction->src[0].integer[0];
2008                                 integer[1] = instruction->src[0].integer[1];
2009                                 integer[2] = instruction->src[0].integer[2];
2010                                 integer[3] = instruction->src[0].integer[3];
2011
2012                                 setPixelShaderConstantI(index, integer);
2013                         }
2014                         else if(instruction->opcode == Shader::OPCODE_DEFB)
2015                         {
2016                                 int index = instruction->dst.index;
2017                                 int boolean = instruction->src[0].boolean[0];
2018
2019                                 setPixelShaderConstantB(index, &boolean);
2020                         }
2021                 }
2022         }
2023
2024         void Renderer::setIndexBuffer(Resource *indexBuffer)
2025         {
2026                 context->indexBuffer = indexBuffer;
2027         }
2028
2029         void Renderer::setMultiSampleMask(unsigned int mask)
2030         {
2031                 context->sampleMask = mask;
2032         }
2033
2034         void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2035         {
2036                 sw::transparencyAntialiasing = transparencyAntialiasing;
2037         }
2038
2039         bool Renderer::isReadWriteTexture(int sampler)
2040         {
2041                 for(int index = 0; index < 4; index++)
2042                 {
2043                         if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2044                         {
2045                                 return true;
2046                         }
2047                 }
2048         
2049                 if(context->depthStencil && context->texture[sampler] == context->depthStencil->getResource())
2050                 {
2051                         return true;
2052                 }
2053
2054                 return false;
2055         }
2056         
2057         void Renderer::updateClipper()
2058         {
2059                 if(updateClipPlanes)
2060                 {
2061                         if(VertexProcessor::isFixedFunction())   // User plane in world space
2062                         {
2063                                 const Matrix &scissorWorld = getViewTransform();
2064
2065                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2066                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2067                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2068                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2069                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2070                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2071                         }
2072                         else   // User plane in clip space
2073                         {
2074                                 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2075                                 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2076                                 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2077                                 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2078                                 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2079                                 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2080                         }
2081
2082                         updateClipPlanes = false;
2083                 }
2084         }
2085
2086         void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2087         {
2088                 ASSERT(sampler < (16 + 4));
2089
2090                 context->texture[sampler] = resource;
2091         }
2092
2093         void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2094         {
2095                 ASSERT(sampler < (16 + 4) && face < 6 && level < MIPMAP_LEVELS);
2096                 
2097                 context->sampler[sampler].setTextureLevel(face, level, surface, type);
2098         }
2099
2100         void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2101         {
2102                 if(type == SAMPLER_PIXEL)
2103                 {
2104                         PixelProcessor::setTextureFilter(sampler, textureFilter);
2105                 }
2106                 else
2107                 {
2108                         VertexProcessor::setTextureFilter(sampler, textureFilter);
2109                 }
2110         }
2111
2112         void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2113         {
2114                 if(type == SAMPLER_PIXEL)
2115                 {
2116                         PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2117                 }
2118                 else
2119                 {
2120                         VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2121                 }
2122         }
2123
2124         void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2125         {
2126                 if(type == SAMPLER_PIXEL)
2127                 {
2128                         PixelProcessor::setGatherEnable(sampler, enable);
2129                 }
2130                 else
2131                 {
2132                         VertexProcessor::setGatherEnable(sampler, enable);
2133                 }
2134         }
2135
2136         void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2137         {
2138                 if(type == SAMPLER_PIXEL)
2139                 {
2140                         PixelProcessor::setAddressingModeU(sampler, addressMode);
2141                 }
2142                 else
2143                 {
2144                         VertexProcessor::setAddressingModeU(sampler, addressMode);
2145                 }
2146         }
2147
2148         void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2149         {
2150                 if(type == SAMPLER_PIXEL)
2151                 {
2152                         PixelProcessor::setAddressingModeV(sampler, addressMode);
2153                 }
2154                 else
2155                 {
2156                         VertexProcessor::setAddressingModeV(sampler, addressMode);
2157                 }
2158         }
2159
2160         void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2161         {
2162                 if(type == SAMPLER_PIXEL)
2163                 {
2164                         PixelProcessor::setAddressingModeW(sampler, addressMode);
2165                 }
2166                 else
2167                 {
2168                         VertexProcessor::setAddressingModeW(sampler, addressMode);
2169                 }
2170         }
2171
2172         void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2173         {
2174                 if(type == SAMPLER_PIXEL)
2175                 {
2176                         PixelProcessor::setReadSRGB(sampler, sRGB);
2177                 }
2178                 else
2179                 {
2180                         VertexProcessor::setReadSRGB(sampler, sRGB);
2181                 }
2182         }
2183
2184         void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2185         {
2186                 if(type == SAMPLER_PIXEL)
2187                 {
2188                         PixelProcessor::setMipmapLOD(sampler, bias);
2189                 }
2190                 else
2191                 {
2192                         VertexProcessor::setMipmapLOD(sampler, bias);
2193                 }
2194         }
2195
2196         void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2197         {
2198                 if(type == SAMPLER_PIXEL)
2199                 {
2200                         PixelProcessor::setBorderColor(sampler, borderColor);
2201                 }
2202                 else
2203                 {
2204                         VertexProcessor::setBorderColor(sampler, borderColor);
2205                 }
2206         }
2207
2208         void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2209         {
2210                 if(type == SAMPLER_PIXEL)
2211                 {
2212                         PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2213                 }
2214                 else
2215                 {
2216                         VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2217                 }
2218         }
2219
2220         void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2221         {
2222                 context->setPointSpriteEnable(pointSpriteEnable);
2223         }
2224
2225         void Renderer::setPointScaleEnable(bool pointScaleEnable)
2226         {
2227                 context->setPointScaleEnable(pointScaleEnable);
2228         }
2229
2230         void Renderer::setLineWidth(float width)
2231         {
2232                 context->lineWidth = width;
2233         }
2234
2235         void Renderer::setDepthBias(float bias)
2236         {
2237                 depthBias = bias;
2238         }
2239
2240         void Renderer::setSlopeDepthBias(float slopeBias)
2241         {
2242                 slopeDepthBias = slopeBias;
2243         }
2244
2245         void Renderer::setPixelShader(const PixelShader *shader)
2246         {
2247                 context->pixelShader = shader;
2248
2249                 loadConstants(shader);
2250         }
2251
2252         void Renderer::setVertexShader(const VertexShader *shader)
2253         {
2254                 context->vertexShader = shader;
2255
2256                 loadConstants(shader);
2257         }
2258
2259         void Renderer::setPixelShaderConstantF(int index, const float value[4], int count)
2260         {
2261                 for(int i = 0; i < DRAW_COUNT; i++)
2262                 {
2263                         if(drawCall[i]->psDirtyConstF < index + count)
2264                         {
2265                                 drawCall[i]->psDirtyConstF = index + count;
2266                         }
2267                 }
2268
2269                 for(int i = 0; i < count; i++)
2270                 {
2271                         PixelProcessor::setFloatConstant(index + i, value);
2272                         value += 4;
2273                 }
2274         }
2275
2276         void Renderer::setPixelShaderConstantI(int index, const int value[4], int count)
2277         {
2278                 for(int i = 0; i < DRAW_COUNT; i++)
2279                 {
2280                         if(drawCall[i]->psDirtyConstI < index + count)
2281                         {
2282                                 drawCall[i]->psDirtyConstI = index + count;
2283                         }
2284                 }
2285
2286                 for(int i = 0; i < count; i++)
2287                 {
2288                         PixelProcessor::setIntegerConstant(index + i, value);
2289                         value += 4;
2290                 }
2291         }
2292
2293         void Renderer::setPixelShaderConstantB(int index, const int *boolean, int count)
2294         {
2295                 for(int i = 0; i < DRAW_COUNT; i++)
2296                 {
2297                         if(drawCall[i]->psDirtyConstB < index + count)
2298                         {
2299                                 drawCall[i]->psDirtyConstB = index + count;
2300                         }
2301                 }
2302
2303                 for(int i = 0; i < count; i++)
2304                 {
2305                         PixelProcessor::setBooleanConstant(index + i, *boolean);
2306                         boolean++;
2307                 }
2308         }
2309
2310         void Renderer::setVertexShaderConstantF(int index, const float value[4], int count)
2311         {
2312                 for(int i = 0; i < DRAW_COUNT; i++)
2313                 {
2314                         if(drawCall[i]->vsDirtyConstF < index + count)
2315                         {
2316                                 drawCall[i]->vsDirtyConstF = index + count;
2317                         }
2318                 }
2319
2320                 for(int i = 0; i < count; i++)
2321                 {
2322                         VertexProcessor::setFloatConstant(index + i, value);
2323                         value += 4;
2324                 }
2325         }
2326
2327         void Renderer::setVertexShaderConstantI(int index, const int value[4], int count)
2328         {
2329                 for(int i = 0; i < DRAW_COUNT; i++)
2330                 {
2331                         if(drawCall[i]->vsDirtyConstI < index + count)
2332                         {
2333                                 drawCall[i]->vsDirtyConstI = index + count;
2334                         }
2335                 }
2336
2337                 for(int i = 0; i < count; i++)
2338                 {
2339                         VertexProcessor::setIntegerConstant(index + i, value);
2340                         value += 4;
2341                 }
2342         }
2343
2344         void Renderer::setVertexShaderConstantB(int index, const int *boolean, int count)
2345         {
2346                 for(int i = 0; i < DRAW_COUNT; i++)
2347                 {
2348                         if(drawCall[i]->vsDirtyConstB < index + count)
2349                         {
2350                                 drawCall[i]->vsDirtyConstB = index + count;
2351                         }
2352                 }
2353
2354                 for(int i = 0; i < count; i++)
2355                 {
2356                         VertexProcessor::setBooleanConstant(index + i, *boolean);
2357                         boolean++;
2358                 }
2359         }
2360
2361         void Renderer::setModelMatrix(const Matrix &M, int i)
2362         {
2363                 VertexProcessor::setModelMatrix(M, i);
2364         }
2365
2366         void Renderer::setViewMatrix(const Matrix &V)
2367         {
2368                 VertexProcessor::setViewMatrix(V);
2369                 updateClipPlanes = true;
2370         }
2371
2372         void Renderer::setBaseMatrix(const Matrix &B)
2373         {
2374                 VertexProcessor::setBaseMatrix(B);
2375                 updateClipPlanes = true;
2376         }
2377
2378         void Renderer::setProjectionMatrix(const Matrix &P)
2379         {
2380                 VertexProcessor::setProjectionMatrix(P);
2381                 updateClipPlanes = true;
2382         }
2383
2384         void Renderer::addQuery(Query *query)
2385         {
2386                 queries.push_back(query);
2387         }
2388         
2389         void Renderer::removeQuery(Query *query)
2390         {
2391                 queries.remove(query);
2392         }
2393
2394         #if PERF_HUD
2395                 int Renderer::getThreadCount()
2396                 {
2397                         return threadCount;
2398                 }
2399                 
2400                 int64_t Renderer::getVertexTime(int thread)
2401                 {
2402                         return vertexTime[thread];
2403                 }
2404
2405                 int64_t Renderer::getSetupTime(int thread)
2406                 {
2407                         return setupTime[thread];
2408                 }
2409                         
2410                 int64_t Renderer::getPixelTime(int thread)
2411                 {
2412                         return pixelTime[thread];
2413                 }
2414
2415                 void Renderer::resetTimers()
2416                 {
2417                         for(int thread = 0; thread < threadCount; thread++)
2418                         {
2419                                 vertexTime[thread] = 0;
2420                                 setupTime[thread] = 0;
2421                                 pixelTime[thread] = 0;
2422                         }
2423                 }
2424         #endif
2425
2426         void Renderer::setViewport(const Viewport &viewport)
2427         {
2428                 this->viewport = viewport;
2429         }
2430
2431         void Renderer::setScissor(const Rect &scissor)
2432         {
2433                 this->scissor = scissor;
2434         }
2435
2436         void Renderer::setClipFlags(int flags)
2437         {
2438                 clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2439         }
2440
2441         void Renderer::setClipPlane(unsigned int index, const float plane[4])
2442         {
2443                 if(index < 6)
2444                 {
2445                         userPlane[index] = plane;
2446                 }
2447                 else ASSERT(false);
2448
2449                 updateClipPlanes = true;
2450         }
2451
2452         void Renderer::updateConfiguration(bool initialUpdate)
2453         {
2454                 bool newConfiguration = swiftConfig->hasNewConfiguration();
2455
2456                 if(newConfiguration || initialUpdate)
2457                 {
2458                         terminateThreads();
2459
2460                         SwiftConfig::Configuration configuration = {0};
2461                         swiftConfig->getConfiguration(configuration);
2462
2463                         precacheVertex = !newConfiguration && configuration.precache;
2464                         precacheSetup = !newConfiguration && configuration.precache;
2465                         precachePixel = !newConfiguration && configuration.precache;
2466
2467                         VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2468                         PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2469                         SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2470
2471                         switch(configuration.textureSampleQuality)
2472                         {
2473                         case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2474                         case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2475                         case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2476                         default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2477                         }
2478
2479                         switch(configuration.mipmapQuality)
2480                         {
2481                         case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2482                         case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2483                         default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2484                         }
2485
2486                         setPerspectiveCorrection(configuration.perspectiveCorrection);
2487
2488                         switch(configuration.transcendentalPrecision)
2489                         {
2490                         case 0:
2491                                 logPrecision = APPROXIMATE;
2492                                 expPrecision = APPROXIMATE;
2493                                 rcpPrecision = APPROXIMATE;
2494                                 rsqPrecision = APPROXIMATE;
2495                                 break;
2496                         case 1:
2497                                 logPrecision = PARTIAL;
2498                                 expPrecision = PARTIAL;
2499                                 rcpPrecision = PARTIAL;
2500                                 rsqPrecision = PARTIAL;
2501                                 break;
2502                         case 2:
2503                                 logPrecision = ACCURATE;
2504                                 expPrecision = ACCURATE;
2505                                 rcpPrecision = ACCURATE;
2506                                 rsqPrecision = ACCURATE;
2507                                 break;
2508                         case 3:
2509                                 logPrecision = WHQL;
2510                                 expPrecision = WHQL;
2511                                 rcpPrecision = WHQL;
2512                                 rsqPrecision = WHQL;
2513                                 break;
2514                         case 4:
2515                                 logPrecision = IEEE;
2516                                 expPrecision = IEEE;
2517                                 rcpPrecision = IEEE;
2518                                 rsqPrecision = IEEE;
2519                                 break;
2520                         default:
2521                                 logPrecision = ACCURATE;
2522                                 expPrecision = ACCURATE;
2523                                 rcpPrecision = ACCURATE;
2524                                 rsqPrecision = ACCURATE;
2525                                 break;
2526                         }
2527
2528                         switch(configuration.transparencyAntialiasing)
2529                         {
2530                         case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2531                         case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2532                         default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2533                         }
2534
2535                         switch(configuration.threadCount)
2536                         {
2537                         case -1: threadCount = CPUID::coreCount();        break;
2538                         case 0:  threadCount = CPUID::processAffinity();  break;
2539                         default: threadCount = configuration.threadCount; break;
2540                         }
2541
2542                         CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2543                         CPUID::setEnableSSSE3(configuration.enableSSSE3);
2544                         CPUID::setEnableSSE3(configuration.enableSSE3);
2545                         CPUID::setEnableSSE2(configuration.enableSSE2);
2546                         CPUID::setEnableSSE(configuration.enableSSE);
2547
2548                         for(int pass = 0; pass < 10; pass++)
2549                         {
2550                                 optimization[pass] = configuration.optimization[pass];
2551                         }
2552
2553                         forceWindowed = configuration.forceWindowed;
2554                         complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2555                         postBlendSRGB = configuration.postBlendSRGB;
2556                         exactColorRounding = configuration.exactColorRounding;
2557                         forceClearRegisters = configuration.forceClearRegisters;
2558
2559                 #ifndef NDEBUG
2560                         minPrimitives = configuration.minPrimitives;
2561                         maxPrimitives = configuration.maxPrimitives;
2562                 #endif
2563                 }
2564
2565                 if(!initialUpdate && !worker[0])
2566                 {
2567                         initializeThreads();
2568                 }
2569         }
2570 }